contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallPtrSet.h"
  28 #include "llvm/ADT/SmallSet.h"
  29 #include "llvm/ADT/SmallVector.h"
  30 #include "llvm/ADT/Statistic.h"
  31 #include "llvm/Analysis/AliasAnalysis.h"
  32 #include "llvm/Analysis/MemoryLocation.h"
  33 #include "llvm/Analysis/VectorUtils.h"
  34 #include "llvm/CodeGen/DAGCombine.h"
  35 #include "llvm/CodeGen/ISDOpcodes.h"
  36 #include "llvm/CodeGen/MachineFrameInfo.h"
  37 #include "llvm/CodeGen/MachineFunction.h"
  38 #include "llvm/CodeGen/MachineMemOperand.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/SelectionDAG.h"
  41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  42 #include "llvm/CodeGen/SelectionDAGNodes.h"
  43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  44 #include "llvm/CodeGen/TargetLowering.h"
  45 #include "llvm/CodeGen/TargetRegisterInfo.h"
  46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  47 #include "llvm/CodeGen/ValueTypes.h"
  48 #include "llvm/IR/Attributes.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/DataLayout.h"
  51 #include "llvm/IR/DerivedTypes.h"
  52 #include "llvm/IR/Function.h"
  53 #include "llvm/IR/LLVMContext.h"
  54 #include "llvm/IR/Metadata.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CodeGen.h"
  57 #include "llvm/Support/CommandLine.h"
  58 #include "llvm/Support/Compiler.h"
  59 #include "llvm/Support/Debug.h"
  60 #include "llvm/Support/ErrorHandling.h"
  61 #include "llvm/Support/KnownBits.h"
  62 #include "llvm/Support/MachineValueType.h"
  63 #include "llvm/Support/MathExtras.h"
  64 #include "llvm/Support/raw_ostream.h"
  65 #include "llvm/Target/TargetMachine.h"
  66 #include "llvm/Target/TargetOptions.h"
  67 #include <algorithm>
  68 #include <cassert>
  69 #include <cstdint>
  70 #include <functional>
  71 #include <iterator>
  72 #include <string>
  73 #include <tuple>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 #define DEBUG_TYPE "dagcombine"
  79
  80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  85 STATISTIC(SlicedLoads, "Number of load sliced");
  86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  87
  88 static cl::opt<bool>
  89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  91
  92 static cl::opt<bool>
  93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  94         cl::desc("Enable DAG combiner's use of TBAA"));
  95
  96 #ifndef NDEBUG
  97 static cl::opt<std::string>
  98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  99                    cl::desc("Only use DAG-combiner alias analysis in this"
 100                             " function"));
 101 #endif
 102
 103 /// Hidden option to stress test load slicing, i.e., when this option
 104 /// is enabled, load slicing bypasses most of its profitability guards.
 105 static cl::opt<bool>
 106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 107                   cl::desc("Bypass the profitability model of load slicing"),
 108                   cl::init(false));
 109
 110 static cl::opt<bool>
 111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 112                     cl::desc("DAG combiner may split indexing from loads"));
 113
 114 static cl::opt<bool>
 115     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 116                        cl::desc("DAG combiner enable merging multiple stores "
 117                                 "into a wider store"));
 118
 119 static cl::opt<unsigned> TokenFactorInlineLimit(
 120     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 121     cl::desc("Limit the number of operands to inline for Token Factors"));
 122
 123 static cl::opt<unsigned> StoreMergeDependenceLimit(
 124     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 125     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 126              "to bail out in store merging dependence check"));
 127
 128 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
 129     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
 130     cl::desc("DAG cominber enable reducing the width of load/op/store "
 131              "sequence"));
 132
 133 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
 134     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
 135     cl::desc("DAG cominber enable load/<replace bytes>/store with "
 136              "a narrower store"));
 137
 138 namespace {
 139
 140   class DAGCombiner {
 141     SelectionDAG &DAG;
 142     const TargetLowering &TLI;
 143     const SelectionDAGTargetInfo *STI;
 144     CombineLevel Level;
 145     CodeGenOpt::Level OptLevel;
 146     bool LegalDAG = false;
 147     bool LegalOperations = false;
 148     bool LegalTypes = false;
 149     bool ForCodeSize;
 150     bool DisableGenericCombines;
 151
 152     /// Worklist of all of the nodes that need to be simplified.
 153     ///
 154     /// This must behave as a stack -- new nodes to process are pushed onto the
 155     /// back and when processing we pop off of the back.
 156     ///
 157     /// The worklist will not contain duplicates but may contain null entries
 158     /// due to nodes being deleted from the underlying DAG.
 159     SmallVector<SDNode *, 64> Worklist;
 160
 161     /// Mapping from an SDNode to its position on the worklist.
 162     ///
 163     /// This is used to find and remove nodes from the worklist (by nulling
 164     /// them) when they are deleted from the underlying DAG. It relies on
 165     /// stable indices of nodes within the worklist.
 166     DenseMap<SDNode *, unsigned> WorklistMap;
 167     /// This records all nodes attempted to add to the worklist since we
 168     /// considered a new worklist entry. As we keep do not add duplicate nodes
 169     /// in the worklist, this is different from the tail of the worklist.
 170     SmallSetVector<SDNode *, 32> PruningList;
 171
 172     /// Set of nodes which have been combined (at least once).
 173     ///
 174     /// This is used to allow us to reliably add any operands of a DAG node
 175     /// which have not yet been combined to the worklist.
 176     SmallPtrSet<SDNode *, 32> CombinedNodes;
 177
 178     /// Map from candidate StoreNode to the pair of RootNode and count.
 179     /// The count is used to track how many times we have seen the StoreNode
 180     /// with the same RootNode bail out in dependence check. If we have seen
 181     /// the bail out for the same pair many times over a limit, we won't
 182     /// consider the StoreNode with the same RootNode as store merging
 183     /// candidate again.
 184     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 185
 186     // AA - Used for DAG load/store alias analysis.
 187     AliasAnalysis *AA;
 188
 189     /// When an instruction is simplified, add all users of the instruction to
 190     /// the work lists because they might get more simplified now.
 191     void AddUsersToWorklist(SDNode *N) {
 192       for (SDNode *Node : N->uses())
 193         AddToWorklist(Node);
 194     }
 195
 196     /// Convenient shorthand to add a node and all of its user to the worklist.
 197     void AddToWorklistWithUsers(SDNode *N) {
 198       AddUsersToWorklist(N);
 199       AddToWorklist(N);
 200     }
 201
 202     // Prune potentially dangling nodes. This is called after
 203     // any visit to a node, but should also be called during a visit after any
 204     // failed combine which may have created a DAG node.
 205     void clearAddedDanglingWorklistEntries() {
 206       // Check any nodes added to the worklist to see if they are prunable.
 207       while (!PruningList.empty()) {
 208         auto *N = PruningList.pop_back_val();
 209         if (N->use_empty())
 210           recursivelyDeleteUnusedNodes(N);
 211       }
 212     }
 213
 214     SDNode *getNextWorklistEntry() {
 215       // Before we do any work, remove nodes that are not in use.
 216       clearAddedDanglingWorklistEntries();
 217       SDNode *N = nullptr;
 218       // The Worklist holds the SDNodes in order, but it may contain null
 219       // entries.
 220       while (!N && !Worklist.empty()) {
 221         N = Worklist.pop_back_val();
 222       }
 223
 224       if (N) {
 225         bool GoodWorklistEntry = WorklistMap.erase(N);
 226         (void)GoodWorklistEntry;
 227         assert(GoodWorklistEntry &&
 228                "Found a worklist entry without a corresponding map entry!");
 229       }
 230       return N;
 231     }
 232
 233     /// Call the node-specific routine that folds each particular type of node.
 234     SDValue visit(SDNode *N);
 235
 236   public:
 237     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 238         : DAG(D), TLI(D.getTargetLoweringInfo()),
 239           STI(D.getSubtarget().getSelectionDAGInfo()),
 240           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
 241       ForCodeSize = DAG.shouldOptForSize();
 242       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
 243
 244       MaximumLegalStoreInBits = 0;
 245       // We use the minimum store size here, since that's all we can guarantee
 246       // for the scalable vector types.
 247       for (MVT VT : MVT::all_valuetypes())
 248         if (EVT(VT).isSimple() && VT != MVT::Other &&
 249             TLI.isTypeLegal(EVT(VT)) &&
 250             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
 251           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
 252     }
 253
 254     void ConsiderForPruning(SDNode *N) {
 255       // Mark this for potential pruning.
 256       PruningList.insert(N);
 257     }
 258
 259     /// Add to the worklist making sure its instance is at the back (next to be
 260     /// processed.)
 261     void AddToWorklist(SDNode *N) {
 262       assert(N->getOpcode() != ISD::DELETED_NODE &&
 263              "Deleted Node added to Worklist");
 264
 265       // Skip handle nodes as they can't usefully be combined and confuse the
 266       // zero-use deletion strategy.
 267       if (N->getOpcode() == ISD::HANDLENODE)
 268         return;
 269
 270       ConsiderForPruning(N);
 271
 272       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 273         Worklist.push_back(N);
 274     }
 275
 276     /// Remove all instances of N from the worklist.
 277     void removeFromWorklist(SDNode *N) {
 278       CombinedNodes.erase(N);
 279       PruningList.remove(N);
 280       StoreRootCountMap.erase(N);
 281
 282       auto It = WorklistMap.find(N);
 283       if (It == WorklistMap.end())
 284         return; // Not in the worklist.
 285
 286       // Null out the entry rather than erasing it to avoid a linear operation.
 287       Worklist[It->second] = nullptr;
 288       WorklistMap.erase(It);
 289     }
 290
 291     void deleteAndRecombine(SDNode *N);
 292     bool recursivelyDeleteUnusedNodes(SDNode *N);
 293
 294     /// Replaces all uses of the results of one DAG node with new values.
 295     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 296                       bool AddTo = true);
 297
 298     /// Replaces all uses of the results of one DAG node with new values.
 299     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 300       return CombineTo(N, &Res, 1, AddTo);
 301     }
 302
 303     /// Replaces all uses of the results of one DAG node with new values.
 304     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 305                       bool AddTo = true) {
 306       SDValue To[] = { Res0, Res1 };
 307       return CombineTo(N, To, 2, AddTo);
 308     }
 309
 310     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 311
 312   private:
 313     unsigned MaximumLegalStoreInBits;
 314
 315     /// Check the specified integer node value to see if it can be simplified or
 316     /// if things it uses can be simplified by bit propagation.
 317     /// If so, return true.
 318     bool SimplifyDemandedBits(SDValue Op) {
 319       unsigned BitWidth = Op.getScalarValueSizeInBits();
 320       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 321       return SimplifyDemandedBits(Op, DemandedBits);
 322     }
 323
 324     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 325       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
 326       KnownBits Known;
 327       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
 328         return false;
 329
 330       // Revisit the node.
 331       AddToWorklist(Op.getNode());
 332
 333       CommitTargetLoweringOpt(TLO);
 334       return true;
 335     }
 336
 337     /// Check the specified vector node value to see if it can be simplified or
 338     /// if things it uses can be simplified as it only uses some of the
 339     /// elements. If so, return true.
 340     bool SimplifyDemandedVectorElts(SDValue Op) {
 341       // TODO: For now just pretend it cannot be simplified.
 342       if (Op.getValueType().isScalableVector())
 343         return false;
 344
 345       unsigned NumElts = Op.getValueType().getVectorNumElements();
 346       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 347       return SimplifyDemandedVectorElts(Op, DemandedElts);
 348     }
 349
 350     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 351                               const APInt &DemandedElts,
 352                               bool AssumeSingleUse = false);
 353     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 354                                     bool AssumeSingleUse = false);
 355
 356     bool CombineToPreIndexedLoadStore(SDNode *N);
 357     bool CombineToPostIndexedLoadStore(SDNode *N);
 358     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 359     bool SliceUpLoad(SDNode *N);
 360
 361     // Scalars have size 0 to distinguish from singleton vectors.
 362     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 363     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 364     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 365
 366     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 367     ///   load.
 368     ///
 369     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 370     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 371     /// \param EltNo index of the vector element to load.
 372     /// \param OriginalLoad load that EVE came from to be replaced.
 373     /// \returns EVE on success SDValue() on failure.
 374     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 375                                          SDValue EltNo,
 376                                          LoadSDNode *OriginalLoad);
 377     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 378     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 379     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 380     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 381     SDValue PromoteIntBinOp(SDValue Op);
 382     SDValue PromoteIntShiftOp(SDValue Op);
 383     SDValue PromoteExtend(SDValue Op);
 384     bool PromoteLoad(SDValue Op);
 385
 386     /// Call the node-specific routine that knows how to fold each
 387     /// particular type of node. If that doesn't do anything, try the
 388     /// target-specific DAG combines.
 389     SDValue combine(SDNode *N);
 390
 391     // Visitation implementation - Implement dag node combining for different
 392     // node types.  The semantics are as follows:
 393     // Return Value:
 394     //   SDValue.getNode() == 0 - No change was made
 395     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 396     //   otherwise              - N should be replaced by the returned Operand.
 397     //
 398     SDValue visitTokenFactor(SDNode *N);
 399     SDValue visitMERGE_VALUES(SDNode *N);
 400     SDValue visitADD(SDNode *N);
 401     SDValue visitADDLike(SDNode *N);
 402     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 403     SDValue visitSUB(SDNode *N);
 404     SDValue visitADDSAT(SDNode *N);
 405     SDValue visitSUBSAT(SDNode *N);
 406     SDValue visitADDC(SDNode *N);
 407     SDValue visitADDO(SDNode *N);
 408     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 409     SDValue visitSUBC(SDNode *N);
 410     SDValue visitSUBO(SDNode *N);
 411     SDValue visitADDE(SDNode *N);
 412     SDValue visitADDCARRY(SDNode *N);
 413     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 414     SDValue visitSUBE(SDNode *N);
 415     SDValue visitSUBCARRY(SDNode *N);
 416     SDValue visitMUL(SDNode *N);
 417     SDValue visitMULFIX(SDNode *N);
 418     SDValue useDivRem(SDNode *N);
 419     SDValue visitSDIV(SDNode *N);
 420     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 421     SDValue visitUDIV(SDNode *N);
 422     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 423     SDValue visitREM(SDNode *N);
 424     SDValue visitMULHU(SDNode *N);
 425     SDValue visitMULHS(SDNode *N);
 426     SDValue visitSMUL_LOHI(SDNode *N);
 427     SDValue visitUMUL_LOHI(SDNode *N);
 428     SDValue visitMULO(SDNode *N);
 429     SDValue visitIMINMAX(SDNode *N);
 430     SDValue visitAND(SDNode *N);
 431     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 432     SDValue visitOR(SDNode *N);
 433     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 434     SDValue visitXOR(SDNode *N);
 435     SDValue SimplifyVBinOp(SDNode *N);
 436     SDValue visitSHL(SDNode *N);
 437     SDValue visitSRA(SDNode *N);
 438     SDValue visitSRL(SDNode *N);
 439     SDValue visitFunnelShift(SDNode *N);
 440     SDValue visitRotate(SDNode *N);
 441     SDValue visitABS(SDNode *N);
 442     SDValue visitBSWAP(SDNode *N);
 443     SDValue visitBITREVERSE(SDNode *N);
 444     SDValue visitCTLZ(SDNode *N);
 445     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 446     SDValue visitCTTZ(SDNode *N);
 447     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 448     SDValue visitCTPOP(SDNode *N);
 449     SDValue visitSELECT(SDNode *N);
 450     SDValue visitVSELECT(SDNode *N);
 451     SDValue visitSELECT_CC(SDNode *N);
 452     SDValue visitSETCC(SDNode *N);
 453     SDValue visitSETCCCARRY(SDNode *N);
 454     SDValue visitSIGN_EXTEND(SDNode *N);
 455     SDValue visitZERO_EXTEND(SDNode *N);
 456     SDValue visitANY_EXTEND(SDNode *N);
 457     SDValue visitAssertExt(SDNode *N);
 458     SDValue visitAssertAlign(SDNode *N);
 459     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 460     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 461     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 462     SDValue visitTRUNCATE(SDNode *N);
 463     SDValue visitBITCAST(SDNode *N);
 464     SDValue visitFREEZE(SDNode *N);
 465     SDValue visitBUILD_PAIR(SDNode *N);
 466     SDValue visitFADD(SDNode *N);
 467     SDValue visitFSUB(SDNode *N);
 468     SDValue visitFMUL(SDNode *N);
 469     SDValue visitFMA(SDNode *N);
 470     SDValue visitFDIV(SDNode *N);
 471     SDValue visitFREM(SDNode *N);
 472     SDValue visitFSQRT(SDNode *N);
 473     SDValue visitFCOPYSIGN(SDNode *N);
 474     SDValue visitFPOW(SDNode *N);
 475     SDValue visitSINT_TO_FP(SDNode *N);
 476     SDValue visitUINT_TO_FP(SDNode *N);
 477     SDValue visitFP_TO_SINT(SDNode *N);
 478     SDValue visitFP_TO_UINT(SDNode *N);
 479     SDValue visitFP_ROUND(SDNode *N);
 480     SDValue visitFP_EXTEND(SDNode *N);
 481     SDValue visitFNEG(SDNode *N);
 482     SDValue visitFABS(SDNode *N);
 483     SDValue visitFCEIL(SDNode *N);
 484     SDValue visitFTRUNC(SDNode *N);
 485     SDValue visitFFLOOR(SDNode *N);
 486     SDValue visitFMINNUM(SDNode *N);
 487     SDValue visitFMAXNUM(SDNode *N);
 488     SDValue visitFMINIMUM(SDNode *N);
 489     SDValue visitFMAXIMUM(SDNode *N);
 490     SDValue visitBRCOND(SDNode *N);
 491     SDValue visitBR_CC(SDNode *N);
 492     SDValue visitLOAD(SDNode *N);
 493
 494     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 495     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 496
 497     SDValue visitSTORE(SDNode *N);
 498     SDValue visitLIFETIME_END(SDNode *N);
 499     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 500     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 501     SDValue visitBUILD_VECTOR(SDNode *N);
 502     SDValue visitCONCAT_VECTORS(SDNode *N);
 503     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 504     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 505     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 506     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 507     SDValue visitMLOAD(SDNode *N);
 508     SDValue visitMSTORE(SDNode *N);
 509     SDValue visitMGATHER(SDNode *N);
 510     SDValue visitMSCATTER(SDNode *N);
 511     SDValue visitFP_TO_FP16(SDNode *N);
 512     SDValue visitFP16_TO_FP(SDNode *N);
 513     SDValue visitVECREDUCE(SDNode *N);
 514
 515     SDValue visitFADDForFMACombine(SDNode *N);
 516     SDValue visitFSUBForFMACombine(SDNode *N);
 517     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 518
 519     SDValue XformToShuffleWithZero(SDNode *N);
 520     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 521                                                     const SDLoc &DL, SDValue N0,
 522                                                     SDValue N1);
 523     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 524                                       SDValue N1);
 525     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 526                            SDValue N1, SDNodeFlags Flags);
 527
 528     SDValue visitShiftByConstant(SDNode *N);
 529
 530     SDValue foldSelectOfConstants(SDNode *N);
 531     SDValue foldVSelectOfConstants(SDNode *N);
 532     SDValue foldBinOpIntoSelect(SDNode *BO);
 533     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 534     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 535     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 536     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 537                              SDValue N2, SDValue N3, ISD::CondCode CC,
 538                              bool NotExtCompare = false);
 539     SDValue convertSelectOfFPConstantsToLoadOffset(
 540         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 541         ISD::CondCode CC);
 542     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 543                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 544     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 545                               const SDLoc &DL);
 546     SDValue unfoldMaskedMerge(SDNode *N);
 547     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 548     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 549                           const SDLoc &DL, bool foldBooleans);
 550     SDValue rebuildSetCC(SDValue N);
 551
 552     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 553                            SDValue &CC, bool MatchStrict = false) const;
 554     bool isOneUseSetCC(SDValue N) const;
 555
 556     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 557                                          unsigned HiOp);
 558     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 559     SDValue CombineExtLoad(SDNode *N);
 560     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 561     SDValue combineRepeatedFPDivisors(SDNode *N);
 562     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 563     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 564     SDValue BuildSDIV(SDNode *N);
 565     SDValue BuildSDIVPow2(SDNode *N);
 566     SDValue BuildUDIV(SDNode *N);
 567     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 568     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
 569     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 570     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 571     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 572     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 573                                 SDNodeFlags Flags, bool Reciprocal);
 574     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 575                                 SDNodeFlags Flags, bool Reciprocal);
 576     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 577                                bool DemandHighBits = true);
 578     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 579     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 580                               SDValue InnerPos, SDValue InnerNeg,
 581                               unsigned PosOpcode, unsigned NegOpcode,
 582                               const SDLoc &DL);
 583     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
 584                               SDValue InnerPos, SDValue InnerNeg,
 585                               unsigned PosOpcode, unsigned NegOpcode,
 586                               const SDLoc &DL);
 587     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 588     SDValue MatchLoadCombine(SDNode *N);
 589     SDValue MatchStoreCombine(StoreSDNode *N);
 590     SDValue ReduceLoadWidth(SDNode *N);
 591     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 592     SDValue splitMergedValStore(StoreSDNode *ST);
 593     SDValue TransformFPLoadStorePair(SDNode *N);
 594     SDValue convertBuildVecZextToZext(SDNode *N);
 595     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 596     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
 597     SDValue reduceBuildVecToShuffle(SDNode *N);
 598     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 599                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 600                                   SDValue VecIn2, unsigned LeftIdx,
 601                                   bool DidSplitVec);
 602     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 603
 604     /// Walk up chain skipping non-aliasing memory nodes,
 605     /// looking for aliasing nodes and adding them to the Aliases vector.
 606     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 607                           SmallVectorImpl<SDValue> &Aliases);
 608
 609     /// Return true if there is any possibility that the two addresses overlap.
 610     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 611
 612     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 613     /// chain (aliasing node.)
 614     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 615
 616     /// Try to replace a store and any possibly adjacent stores on
 617     /// consecutive chains with better chains. Return true only if St is
 618     /// replaced.
 619     ///
 620     /// Notice that other chains may still be replaced even if the function
 621     /// returns false.
 622     bool findBetterNeighborChains(StoreSDNode *St);
 623
 624     // Helper for findBetterNeighborChains. Walk up store chain add additional
 625     // chained stores that do not overlap and can be parallelized.
 626     bool parallelizeChainedStores(StoreSDNode *St);
 627
 628     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 629     /// is located in a sequence of memory operations connected by a chain.
 630     struct MemOpLink {
 631       // Ptr to the mem node.
 632       LSBaseSDNode *MemNode;
 633
 634       // Offset from the base ptr.
 635       int64_t OffsetFromBase;
 636
 637       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 638           : MemNode(N), OffsetFromBase(Offset) {}
 639     };
 640
 641     // Classify the origin of a stored value.
 642     enum class StoreSource { Unknown, Constant, Extract, Load };
 643     StoreSource getStoreSource(SDValue StoreVal) {
 644       if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
 645         return StoreSource::Constant;
 646       if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
 647           StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
 648         return StoreSource::Extract;
 649       if (isa<LoadSDNode>(StoreVal))
 650         return StoreSource::Load;
 651       return StoreSource::Unknown;
 652     }
 653
 654     /// This is a helper function for visitMUL to check the profitability
 655     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 656     /// MulNode is the original multiply, AddNode is (add x, c1),
 657     /// and ConstNode is c2.
 658     bool isMulAddWithConstProfitable(SDNode *MulNode,
 659                                      SDValue &AddNode,
 660                                      SDValue &ConstNode);
 661
 662     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 663     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 664     /// the type of the loaded value to be extended.
 665     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 666                           EVT LoadResultTy, EVT &ExtVT);
 667
 668     /// Helper function to calculate whether the given Load/Store can have its
 669     /// width reduced to ExtVT.
 670     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 671                            EVT &MemVT, unsigned ShAmt = 0);
 672
 673     /// Used by BackwardsPropagateMask to find suitable loads.
 674     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 675                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 676                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 677     /// Attempt to propagate a given AND node back to load leaves so that they
 678     /// can be combined into narrow loads.
 679     bool BackwardsPropagateMask(SDNode *N);
 680
 681     /// Helper function for mergeConsecutiveStores which merges the component
 682     /// store chains.
 683     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 684                                 unsigned NumStores);
 685
 686     /// This is a helper function for mergeConsecutiveStores. When the source
 687     /// elements of the consecutive stores are all constants or all extracted
 688     /// vector elements, try to merge them into one larger store introducing
 689     /// bitcasts if necessary.  \return True if a merged store was created.
 690     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 691                                          EVT MemVT, unsigned NumStores,
 692                                          bool IsConstantSrc, bool UseVector,
 693                                          bool UseTrunc);
 694
 695     /// This is a helper function for mergeConsecutiveStores. Stores that
 696     /// potentially may be merged with St are placed in StoreNodes. RootNode is
 697     /// a chain predecessor to all store candidates.
 698     void getStoreMergeCandidates(StoreSDNode *St,
 699                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 700                                  SDNode *&Root);
 701
 702     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
 703     /// have indirect dependency through their operands. RootNode is the
 704     /// predecessor to all stores calculated by getStoreMergeCandidates and is
 705     /// used to prune the dependency check. \return True if safe to merge.
 706     bool checkMergeStoreCandidatesForDependencies(
 707         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 708         SDNode *RootNode);
 709
 710     /// This is a helper function for mergeConsecutiveStores. Given a list of
 711     /// store candidates, find the first N that are consecutive in memory.
 712     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
 713     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
 714                                   int64_t ElementSizeBytes) const;
 715
 716     /// This is a helper function for mergeConsecutiveStores. It is used for
 717     /// store chains that are composed entirely of constant values.
 718     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
 719                                   unsigned NumConsecutiveStores,
 720                                   EVT MemVT, SDNode *Root, bool AllowVectors);
 721
 722     /// This is a helper function for mergeConsecutiveStores. It is used for
 723     /// store chains that are composed entirely of extracted vector elements.
 724     /// When extracting multiple vector elements, try to store them in one
 725     /// vector store rather than a sequence of scalar stores.
 726     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
 727                                  unsigned NumConsecutiveStores, EVT MemVT,
 728                                  SDNode *Root);
 729
 730     /// This is a helper function for mergeConsecutiveStores. It is used for
 731     /// store chains that are composed entirely of loaded values.
 732     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
 733                               unsigned NumConsecutiveStores, EVT MemVT,
 734                               SDNode *Root, bool AllowVectors,
 735                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
 736
 737     /// Merge consecutive store operations into a wide store.
 738     /// This optimization uses wide integers or vectors when possible.
 739     /// \return true if stores were merged.
 740     bool mergeConsecutiveStores(StoreSDNode *St);
 741
 742     /// Try to transform a truncation where C is a constant:
 743     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 744     ///
 745     /// \p N needs to be a truncation and its first operand an AND. Other
 746     /// requirements are checked by the function (e.g. that trunc is
 747     /// single-use) and if missed an empty SDValue is returned.
 748     SDValue distributeTruncateThroughAnd(SDNode *N);
 749
 750     /// Helper function to determine whether the target supports operation
 751     /// given by \p Opcode for type \p VT, that is, whether the operation
 752     /// is legal or custom before legalizing operations, and whether is
 753     /// legal (but not custom) after legalization.
 754     bool hasOperation(unsigned Opcode, EVT VT) {
 755       if (LegalOperations)
 756         return TLI.isOperationLegal(Opcode, VT);
 757       return TLI.isOperationLegalOrCustom(Opcode, VT);
 758     }
 759
 760   public:
 761     /// Runs the dag combiner on all nodes in the work list
 762     void Run(CombineLevel AtLevel);
 763
 764     SelectionDAG &getDAG() const { return DAG; }
 765
 766     /// Returns a type large enough to hold any valid shift amount - before type
 767     /// legalization these can be huge.
 768     EVT getShiftAmountTy(EVT LHSTy) {
 769       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 770       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 771     }
 772
 773     /// This method returns true if we are running before type legalization or
 774     /// if the specified VT is legal.
 775     bool isTypeLegal(const EVT &VT) {
 776       if (!LegalTypes) return true;
 777       return TLI.isTypeLegal(VT);
 778     }
 779
 780     /// Convenience wrapper around TargetLowering::getSetCCResultType
 781     EVT getSetCCResultType(EVT VT) const {
 782       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 783     }
 784
 785     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 786                          SDValue OrigLoad, SDValue ExtLoad,
 787                          ISD::NodeType ExtType);
 788   };
 789
 790 /// This class is a DAGUpdateListener that removes any deleted
 791 /// nodes from the worklist.
 792 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 793   DAGCombiner &DC;
 794
 795 public:
 796   explicit WorklistRemover(DAGCombiner &dc)
 797     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 798
 799   void NodeDeleted(SDNode *N, SDNode *E) override {
 800     DC.removeFromWorklist(N);
 801   }
 802 };
 803
 804 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 805   DAGCombiner &DC;
 806
 807 public:
 808   explicit WorklistInserter(DAGCombiner &dc)
 809       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 810
 811   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 812   //        compile time costs in large DAGs, e.g. Halide.
 813   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 814 };
 815
 816 } // end anonymous namespace
 817
 818 //===----------------------------------------------------------------------===//
 819 //  TargetLowering::DAGCombinerInfo implementation
 820 //===----------------------------------------------------------------------===//
 821
 822 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 823   ((DAGCombiner*)DC)->AddToWorklist(N);
 824 }
 825
 826 SDValue TargetLowering::DAGCombinerInfo::
 827 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 828   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 829 }
 830
 831 SDValue TargetLowering::DAGCombinerInfo::
 832 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 833   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 834 }
 835
 836 SDValue TargetLowering::DAGCombinerInfo::
 837 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 838   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 839 }
 840
 841 bool TargetLowering::DAGCombinerInfo::
 842 recursivelyDeleteUnusedNodes(SDNode *N) {
 843   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
 844 }
 845
 846 void TargetLowering::DAGCombinerInfo::
 847 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 848   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 849 }
 850
 851 //===----------------------------------------------------------------------===//
 852 // Helper Functions
 853 //===----------------------------------------------------------------------===//
 854
 855 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 856   removeFromWorklist(N);
 857
 858   // If the operands of this node are only used by the node, they will now be
 859   // dead. Make sure to re-visit them and recursively delete dead nodes.
 860   for (const SDValue &Op : N->ops())
 861     // For an operand generating multiple values, one of the values may
 862     // become dead allowing further simplification (e.g. split index
 863     // arithmetic from an indexed load).
 864     if (Op->hasOneUse() || Op->getNumValues() > 1)
 865       AddToWorklist(Op.getNode());
 866
 867   DAG.DeleteNode(N);
 868 }
 869
 870 // APInts must be the same size for most operations, this helper
 871 // function zero extends the shorter of the pair so that they match.
 872 // We provide an Offset so that we can create bitwidths that won't overflow.
 873 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 874   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 875   LHS = LHS.zextOrSelf(Bits);
 876   RHS = RHS.zextOrSelf(Bits);
 877 }
 878
 879 // Return true if this node is a setcc, or is a select_cc
 880 // that selects between the target values used for true and false, making it
 881 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 882 // the appropriate nodes based on the type of node we are checking. This
 883 // simplifies life a bit for the callers.
 884 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 885                                     SDValue &CC, bool MatchStrict) const {
 886   if (N.getOpcode() == ISD::SETCC) {
 887     LHS = N.getOperand(0);
 888     RHS = N.getOperand(1);
 889     CC  = N.getOperand(2);
 890     return true;
 891   }
 892
 893   if (MatchStrict &&
 894       (N.getOpcode() == ISD::STRICT_FSETCC ||
 895        N.getOpcode() == ISD::STRICT_FSETCCS)) {
 896     LHS = N.getOperand(1);
 897     RHS = N.getOperand(2);
 898     CC  = N.getOperand(3);
 899     return true;
 900   }
 901
 902   if (N.getOpcode() != ISD::SELECT_CC ||
 903       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 904       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 905     return false;
 906
 907   if (TLI.getBooleanContents(N.getValueType()) ==
 908       TargetLowering::UndefinedBooleanContent)
 909     return false;
 910
 911   LHS = N.getOperand(0);
 912   RHS = N.getOperand(1);
 913   CC  = N.getOperand(4);
 914   return true;
 915 }
 916
 917 /// Return true if this is a SetCC-equivalent operation with only one use.
 918 /// If this is true, it allows the users to invert the operation for free when
 919 /// it is profitable to do so.
 920 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 921   SDValue N0, N1, N2;
 922   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 923     return true;
 924   return false;
 925 }
 926
 927 // Returns the SDNode if it is a constant float BuildVector
 928 // or constant float.
 929 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 930   if (isa<ConstantFPSDNode>(N))
 931     return N.getNode();
 932   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 933     return N.getNode();
 934   return nullptr;
 935 }
 936
 937 // Determines if it is a constant integer or a build vector of constant
 938 // integers (and undefs).
 939 // Do not permit build vector implicit truncation.
 940 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 941   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 942     return !(Const->isOpaque() && NoOpaques);
 943   if (N.getOpcode() != ISD::BUILD_VECTOR)
 944     return false;
 945   unsigned BitWidth = N.getScalarValueSizeInBits();
 946   for (const SDValue &Op : N->op_values()) {
 947     if (Op.isUndef())
 948       continue;
 949     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 950     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 951         (Const->isOpaque() && NoOpaques))
 952       return false;
 953   }
 954   return true;
 955 }
 956
 957 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 958 // undef's.
 959 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 960   if (V.getOpcode() != ISD::BUILD_VECTOR)
 961     return false;
 962   return isConstantOrConstantVector(V, NoOpaques) ||
 963          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 964 }
 965
 966 // Determine if this an indexed load with an opaque target constant index.
 967 static bool canSplitIdx(LoadSDNode *LD) {
 968   return MaySplitLoadIndex &&
 969          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
 970           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
 971 }
 972
 973 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
 974                                                              const SDLoc &DL,
 975                                                              SDValue N0,
 976                                                              SDValue N1) {
 977   // Currently this only tries to ensure we don't undo the GEP splits done by
 978   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
 979   // we check if the following transformation would be problematic:
 980   // (load/store (add, (add, x, offset1), offset2)) ->
 981   // (load/store (add, x, offset1+offset2)).
 982
 983   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
 984     return false;
 985
 986   if (N0.hasOneUse())
 987     return false;
 988
 989   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
 990   auto *C2 = dyn_cast<ConstantSDNode>(N1);
 991   if (!C1 || !C2)
 992     return false;
 993
 994   const APInt &C1APIntVal = C1->getAPIntValue();
 995   const APInt &C2APIntVal = C2->getAPIntValue();
 996   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
 997     return false;
 998
 999   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1000   if (CombinedValueIntVal.getBitWidth() > 64)
1001     return false;
1002   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1003
1004   for (SDNode *Node : N0->uses()) {
1005     auto LoadStore = dyn_cast<MemSDNode>(Node);
1006     if (LoadStore) {
1007       // Is x[offset2] already not a legal addressing mode? If so then
1008       // reassociating the constants breaks nothing (we test offset2 because
1009       // that's the one we hope to fold into the load or store).
1010       TargetLoweringBase::AddrMode AM;
1011       AM.HasBaseReg = true;
1012       AM.BaseOffs = C2APIntVal.getSExtValue();
1013       EVT VT = LoadStore->getMemoryVT();
1014       unsigned AS = LoadStore->getAddressSpace();
1015       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1016       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1017         continue;
1018
1019       // Would x[offset1+offset2] still be a legal addressing mode?
1020       AM.BaseOffs = CombinedValue;
1021       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1022         return true;
1023     }
1024   }
1025
1026   return false;
1027 }
1028
1029 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1030 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1031 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1032                                                SDValue N0, SDValue N1) {
1033   EVT VT = N0.getValueType();
1034
1035   if (N0.getOpcode() != Opc)
1036     return SDValue();
1037
1038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1039     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1040       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1041       if (SDValue OpNode =
1042               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1043         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1044       return SDValue();
1045     }
1046     if (N0.hasOneUse()) {
1047       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1048       //              iff (op x, c1) has one use
1049       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1050       if (!OpNode.getNode())
1051         return SDValue();
1052       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1053     }
1054   }
1055   return SDValue();
1056 }
1057
1058 // Try to reassociate commutative binops.
1059 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1060                                     SDValue N1, SDNodeFlags Flags) {
1061   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1062
1063   // Floating-point reassociation is not allowed without loose FP math.
1064   if (N0.getValueType().isFloatingPoint() ||
1065       N1.getValueType().isFloatingPoint())
1066     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1067       return SDValue();
1068
1069   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1070     return Combined;
1071   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1072     return Combined;
1073   return SDValue();
1074 }
1075
1076 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1077                                bool AddTo) {
1078   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1079   ++NodesCombined;
1080   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1081              To[0].getNode()->dump(&DAG);
1082              dbgs() << " and " << NumTo - 1 << " other values\n");
1083   for (unsigned i = 0, e = NumTo; i != e; ++i)
1084     assert((!To[i].getNode() ||
1085             N->getValueType(i) == To[i].getValueType()) &&
1086            "Cannot combine value to value of different type!");
1087
1088   WorklistRemover DeadNodes(*this);
1089   DAG.ReplaceAllUsesWith(N, To);
1090   if (AddTo) {
1091     // Push the new nodes and any users onto the worklist
1092     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1093       if (To[i].getNode()) {
1094         AddToWorklist(To[i].getNode());
1095         AddUsersToWorklist(To[i].getNode());
1096       }
1097     }
1098   }
1099
1100   // Finally, if the node is now dead, remove it from the graph.  The node
1101   // may not be dead if the replacement process recursively simplified to
1102   // something else needing this node.
1103   if (N->use_empty())
1104     deleteAndRecombine(N);
1105   return SDValue(N, 0);
1106 }
1107
1108 void DAGCombiner::
1109 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1110   // Replace the old value with the new one.
1111   ++NodesCombined;
1112   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1113              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1114              dbgs() << '\n');
1115
1116   // Replace all uses.  If any nodes become isomorphic to other nodes and
1117   // are deleted, make sure to remove them from our worklist.
1118   WorklistRemover DeadNodes(*this);
1119   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1120
1121   // Push the new node and any (possibly new) users onto the worklist.
1122   AddToWorklistWithUsers(TLO.New.getNode());
1123
1124   // Finally, if the node is now dead, remove it from the graph.  The node
1125   // may not be dead if the replacement process recursively simplified to
1126   // something else needing this node.
1127   if (TLO.Old.getNode()->use_empty())
1128     deleteAndRecombine(TLO.Old.getNode());
1129 }
1130
1131 /// Check the specified integer node value to see if it can be simplified or if
1132 /// things it uses can be simplified by bit propagation. If so, return true.
1133 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1134                                        const APInt &DemandedElts,
1135                                        bool AssumeSingleUse) {
1136   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1137   KnownBits Known;
1138   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1139                                 AssumeSingleUse))
1140     return false;
1141
1142   // Revisit the node.
1143   AddToWorklist(Op.getNode());
1144
1145   CommitTargetLoweringOpt(TLO);
1146   return true;
1147 }
1148
1149 /// Check the specified vector node value to see if it can be simplified or
1150 /// if things it uses can be simplified as it only uses some of the elements.
1151 /// If so, return true.
1152 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1153                                              const APInt &DemandedElts,
1154                                              bool AssumeSingleUse) {
1155   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1156   APInt KnownUndef, KnownZero;
1157   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1158                                       TLO, 0, AssumeSingleUse))
1159     return false;
1160
1161   // Revisit the node.
1162   AddToWorklist(Op.getNode());
1163
1164   CommitTargetLoweringOpt(TLO);
1165   return true;
1166 }
1167
1168 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1169   SDLoc DL(Load);
1170   EVT VT = Load->getValueType(0);
1171   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1172
1173   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1174              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1175   WorklistRemover DeadNodes(*this);
1176   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1177   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1178   deleteAndRecombine(Load);
1179   AddToWorklist(Trunc.getNode());
1180 }
1181
1182 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1183   Replace = false;
1184   SDLoc DL(Op);
1185   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1186     LoadSDNode *LD = cast<LoadSDNode>(Op);
1187     EVT MemVT = LD->getMemoryVT();
1188     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1189                                                       : LD->getExtensionType();
1190     Replace = true;
1191     return DAG.getExtLoad(ExtType, DL, PVT,
1192                           LD->getChain(), LD->getBasePtr(),
1193                           MemVT, LD->getMemOperand());
1194   }
1195
1196   unsigned Opc = Op.getOpcode();
1197   switch (Opc) {
1198   default: break;
1199   case ISD::AssertSext:
1200     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1201       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1202     break;
1203   case ISD::AssertZext:
1204     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1205       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1206     break;
1207   case ISD::Constant: {
1208     unsigned ExtOpc =
1209       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1210     return DAG.getNode(ExtOpc, DL, PVT, Op);
1211   }
1212   }
1213
1214   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1215     return SDValue();
1216   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1217 }
1218
1219 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1220   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1221     return SDValue();
1222   EVT OldVT = Op.getValueType();
1223   SDLoc DL(Op);
1224   bool Replace = false;
1225   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1226   if (!NewOp.getNode())
1227     return SDValue();
1228   AddToWorklist(NewOp.getNode());
1229
1230   if (Replace)
1231     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1232   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1233                      DAG.getValueType(OldVT));
1234 }
1235
1236 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1237   EVT OldVT = Op.getValueType();
1238   SDLoc DL(Op);
1239   bool Replace = false;
1240   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1241   if (!NewOp.getNode())
1242     return SDValue();
1243   AddToWorklist(NewOp.getNode());
1244
1245   if (Replace)
1246     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1247   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1248 }
1249
1250 /// Promote the specified integer binary operation if the target indicates it is
1251 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1252 /// i32 since i16 instructions are longer.
1253 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1254   if (!LegalOperations)
1255     return SDValue();
1256
1257   EVT VT = Op.getValueType();
1258   if (VT.isVector() || !VT.isInteger())
1259     return SDValue();
1260
1261   // If operation type is 'undesirable', e.g. i16 on x86, consider
1262   // promoting it.
1263   unsigned Opc = Op.getOpcode();
1264   if (TLI.isTypeDesirableForOp(Opc, VT))
1265     return SDValue();
1266
1267   EVT PVT = VT;
1268   // Consult target whether it is a good idea to promote this operation and
1269   // what's the right type to promote it to.
1270   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1271     assert(PVT != VT && "Don't know what type to promote to!");
1272
1273     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1274
1275     bool Replace0 = false;
1276     SDValue N0 = Op.getOperand(0);
1277     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1278
1279     bool Replace1 = false;
1280     SDValue N1 = Op.getOperand(1);
1281     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1282     SDLoc DL(Op);
1283
1284     SDValue RV =
1285         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1286
1287     // We are always replacing N0/N1's use in N and only need additional
1288     // replacements if there are additional uses.
1289     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1290     //       (SDValue) here because the node may reference multiple values
1291     //       (for example, the chain value of a load node).
1292     Replace0 &= !N0->hasOneUse();
1293     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1294
1295     // Combine Op here so it is preserved past replacements.
1296     CombineTo(Op.getNode(), RV);
1297
1298     // If operands have a use ordering, make sure we deal with
1299     // predecessor first.
1300     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1301       std::swap(N0, N1);
1302       std::swap(NN0, NN1);
1303     }
1304
1305     if (Replace0) {
1306       AddToWorklist(NN0.getNode());
1307       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1308     }
1309     if (Replace1) {
1310       AddToWorklist(NN1.getNode());
1311       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1312     }
1313     return Op;
1314   }
1315   return SDValue();
1316 }
1317
1318 /// Promote the specified integer shift operation if the target indicates it is
1319 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1320 /// i32 since i16 instructions are longer.
1321 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1322   if (!LegalOperations)
1323     return SDValue();
1324
1325   EVT VT = Op.getValueType();
1326   if (VT.isVector() || !VT.isInteger())
1327     return SDValue();
1328
1329   // If operation type is 'undesirable', e.g. i16 on x86, consider
1330   // promoting it.
1331   unsigned Opc = Op.getOpcode();
1332   if (TLI.isTypeDesirableForOp(Opc, VT))
1333     return SDValue();
1334
1335   EVT PVT = VT;
1336   // Consult target whether it is a good idea to promote this operation and
1337   // what's the right type to promote it to.
1338   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1339     assert(PVT != VT && "Don't know what type to promote to!");
1340
1341     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1342
1343     bool Replace = false;
1344     SDValue N0 = Op.getOperand(0);
1345     SDValue N1 = Op.getOperand(1);
1346     if (Opc == ISD::SRA)
1347       N0 = SExtPromoteOperand(N0, PVT);
1348     else if (Opc == ISD::SRL)
1349       N0 = ZExtPromoteOperand(N0, PVT);
1350     else
1351       N0 = PromoteOperand(N0, PVT, Replace);
1352
1353     if (!N0.getNode())
1354       return SDValue();
1355
1356     SDLoc DL(Op);
1357     SDValue RV =
1358         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1359
1360     if (Replace)
1361       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1362
1363     // Deal with Op being deleted.
1364     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1365       return RV;
1366   }
1367   return SDValue();
1368 }
1369
1370 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1371   if (!LegalOperations)
1372     return SDValue();
1373
1374   EVT VT = Op.getValueType();
1375   if (VT.isVector() || !VT.isInteger())
1376     return SDValue();
1377
1378   // If operation type is 'undesirable', e.g. i16 on x86, consider
1379   // promoting it.
1380   unsigned Opc = Op.getOpcode();
1381   if (TLI.isTypeDesirableForOp(Opc, VT))
1382     return SDValue();
1383
1384   EVT PVT = VT;
1385   // Consult target whether it is a good idea to promote this operation and
1386   // what's the right type to promote it to.
1387   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1388     assert(PVT != VT && "Don't know what type to promote to!");
1389     // fold (aext (aext x)) -> (aext x)
1390     // fold (aext (zext x)) -> (zext x)
1391     // fold (aext (sext x)) -> (sext x)
1392     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1393     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1394   }
1395   return SDValue();
1396 }
1397
1398 bool DAGCombiner::PromoteLoad(SDValue Op) {
1399   if (!LegalOperations)
1400     return false;
1401
1402   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1403     return false;
1404
1405   EVT VT = Op.getValueType();
1406   if (VT.isVector() || !VT.isInteger())
1407     return false;
1408
1409   // If operation type is 'undesirable', e.g. i16 on x86, consider
1410   // promoting it.
1411   unsigned Opc = Op.getOpcode();
1412   if (TLI.isTypeDesirableForOp(Opc, VT))
1413     return false;
1414
1415   EVT PVT = VT;
1416   // Consult target whether it is a good idea to promote this operation and
1417   // what's the right type to promote it to.
1418   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1419     assert(PVT != VT && "Don't know what type to promote to!");
1420
1421     SDLoc DL(Op);
1422     SDNode *N = Op.getNode();
1423     LoadSDNode *LD = cast<LoadSDNode>(N);
1424     EVT MemVT = LD->getMemoryVT();
1425     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1426                                                       : LD->getExtensionType();
1427     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1428                                    LD->getChain(), LD->getBasePtr(),
1429                                    MemVT, LD->getMemOperand());
1430     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1431
1432     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1433                Result.getNode()->dump(&DAG); dbgs() << '\n');
1434     WorklistRemover DeadNodes(*this);
1435     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1436     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1437     deleteAndRecombine(N);
1438     AddToWorklist(Result.getNode());
1439     return true;
1440   }
1441   return false;
1442 }
1443
1444 /// Recursively delete a node which has no uses and any operands for
1445 /// which it is the only use.
1446 ///
1447 /// Note that this both deletes the nodes and removes them from the worklist.
1448 /// It also adds any nodes who have had a user deleted to the worklist as they
1449 /// may now have only one use and subject to other combines.
1450 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1451   if (!N->use_empty())
1452     return false;
1453
1454   SmallSetVector<SDNode *, 16> Nodes;
1455   Nodes.insert(N);
1456   do {
1457     N = Nodes.pop_back_val();
1458     if (!N)
1459       continue;
1460
1461     if (N->use_empty()) {
1462       for (const SDValue &ChildN : N->op_values())
1463         Nodes.insert(ChildN.getNode());
1464
1465       removeFromWorklist(N);
1466       DAG.DeleteNode(N);
1467     } else {
1468       AddToWorklist(N);
1469     }
1470   } while (!Nodes.empty());
1471   return true;
1472 }
1473
1474 //===----------------------------------------------------------------------===//
1475 //  Main DAG Combiner implementation
1476 //===----------------------------------------------------------------------===//
1477
1478 void DAGCombiner::Run(CombineLevel AtLevel) {
1479   // set the instance variables, so that the various visit routines may use it.
1480   Level = AtLevel;
1481   LegalDAG = Level >= AfterLegalizeDAG;
1482   LegalOperations = Level >= AfterLegalizeVectorOps;
1483   LegalTypes = Level >= AfterLegalizeTypes;
1484
1485   WorklistInserter AddNodes(*this);
1486
1487   // Add all the dag nodes to the worklist.
1488   for (SDNode &Node : DAG.allnodes())
1489     AddToWorklist(&Node);
1490
1491   // Create a dummy node (which is not added to allnodes), that adds a reference
1492   // to the root node, preventing it from being deleted, and tracking any
1493   // changes of the root.
1494   HandleSDNode Dummy(DAG.getRoot());
1495
1496   // While we have a valid worklist entry node, try to combine it.
1497   while (SDNode *N = getNextWorklistEntry()) {
1498     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1499     // N is deleted from the DAG, since they too may now be dead or may have a
1500     // reduced number of uses, allowing other xforms.
1501     if (recursivelyDeleteUnusedNodes(N))
1502       continue;
1503
1504     WorklistRemover DeadNodes(*this);
1505
1506     // If this combine is running after legalizing the DAG, re-legalize any
1507     // nodes pulled off the worklist.
1508     if (LegalDAG) {
1509       SmallSetVector<SDNode *, 16> UpdatedNodes;
1510       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1511
1512       for (SDNode *LN : UpdatedNodes)
1513         AddToWorklistWithUsers(LN);
1514
1515       if (!NIsValid)
1516         continue;
1517     }
1518
1519     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1520
1521     // Add any operands of the new node which have not yet been combined to the
1522     // worklist as well. Because the worklist uniques things already, this
1523     // won't repeatedly process the same operand.
1524     CombinedNodes.insert(N);
1525     for (const SDValue &ChildN : N->op_values())
1526       if (!CombinedNodes.count(ChildN.getNode()))
1527         AddToWorklist(ChildN.getNode());
1528
1529     SDValue RV = combine(N);
1530
1531     if (!RV.getNode())
1532       continue;
1533
1534     ++NodesCombined;
1535
1536     // If we get back the same node we passed in, rather than a new node or
1537     // zero, we know that the node must have defined multiple values and
1538     // CombineTo was used.  Since CombineTo takes care of the worklist
1539     // mechanics for us, we have no work to do in this case.
1540     if (RV.getNode() == N)
1541       continue;
1542
1543     assert(N->getOpcode() != ISD::DELETED_NODE &&
1544            RV.getOpcode() != ISD::DELETED_NODE &&
1545            "Node was deleted but visit returned new node!");
1546
1547     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1548
1549     if (N->getNumValues() == RV.getNode()->getNumValues())
1550       DAG.ReplaceAllUsesWith(N, RV.getNode());
1551     else {
1552       assert(N->getValueType(0) == RV.getValueType() &&
1553              N->getNumValues() == 1 && "Type mismatch");
1554       DAG.ReplaceAllUsesWith(N, &RV);
1555     }
1556
1557     // Push the new node and any users onto the worklist
1558     AddToWorklist(RV.getNode());
1559     AddUsersToWorklist(RV.getNode());
1560
1561     // Finally, if the node is now dead, remove it from the graph.  The node
1562     // may not be dead if the replacement process recursively simplified to
1563     // something else needing this node. This will also take care of adding any
1564     // operands which have lost a user to the worklist.
1565     recursivelyDeleteUnusedNodes(N);
1566   }
1567
1568   // If the root changed (e.g. it was a dead load, update the root).
1569   DAG.setRoot(Dummy.getValue());
1570   DAG.RemoveDeadNodes();
1571 }
1572
1573 SDValue DAGCombiner::visit(SDNode *N) {
1574   switch (N->getOpcode()) {
1575   default: break;
1576   case ISD::TokenFactor:        return visitTokenFactor(N);
1577   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1578   case ISD::ADD:                return visitADD(N);
1579   case ISD::SUB:                return visitSUB(N);
1580   case ISD::SADDSAT:
1581   case ISD::UADDSAT:            return visitADDSAT(N);
1582   case ISD::SSUBSAT:
1583   case ISD::USUBSAT:            return visitSUBSAT(N);
1584   case ISD::ADDC:               return visitADDC(N);
1585   case ISD::SADDO:
1586   case ISD::UADDO:              return visitADDO(N);
1587   case ISD::SUBC:               return visitSUBC(N);
1588   case ISD::SSUBO:
1589   case ISD::USUBO:              return visitSUBO(N);
1590   case ISD::ADDE:               return visitADDE(N);
1591   case ISD::ADDCARRY:           return visitADDCARRY(N);
1592   case ISD::SUBE:               return visitSUBE(N);
1593   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1594   case ISD::SMULFIX:
1595   case ISD::SMULFIXSAT:
1596   case ISD::UMULFIX:
1597   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1598   case ISD::MUL:                return visitMUL(N);
1599   case ISD::SDIV:               return visitSDIV(N);
1600   case ISD::UDIV:               return visitUDIV(N);
1601   case ISD::SREM:
1602   case ISD::UREM:               return visitREM(N);
1603   case ISD::MULHU:              return visitMULHU(N);
1604   case ISD::MULHS:              return visitMULHS(N);
1605   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1606   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1607   case ISD::SMULO:
1608   case ISD::UMULO:              return visitMULO(N);
1609   case ISD::SMIN:
1610   case ISD::SMAX:
1611   case ISD::UMIN:
1612   case ISD::UMAX:               return visitIMINMAX(N);
1613   case ISD::AND:                return visitAND(N);
1614   case ISD::OR:                 return visitOR(N);
1615   case ISD::XOR:                return visitXOR(N);
1616   case ISD::SHL:                return visitSHL(N);
1617   case ISD::SRA:                return visitSRA(N);
1618   case ISD::SRL:                return visitSRL(N);
1619   case ISD::ROTR:
1620   case ISD::ROTL:               return visitRotate(N);
1621   case ISD::FSHL:
1622   case ISD::FSHR:               return visitFunnelShift(N);
1623   case ISD::ABS:                return visitABS(N);
1624   case ISD::BSWAP:              return visitBSWAP(N);
1625   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1626   case ISD::CTLZ:               return visitCTLZ(N);
1627   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1628   case ISD::CTTZ:               return visitCTTZ(N);
1629   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1630   case ISD::CTPOP:              return visitCTPOP(N);
1631   case ISD::SELECT:             return visitSELECT(N);
1632   case ISD::VSELECT:            return visitVSELECT(N);
1633   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1634   case ISD::SETCC:              return visitSETCC(N);
1635   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1636   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1637   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1638   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1639   case ISD::AssertSext:
1640   case ISD::AssertZext:         return visitAssertExt(N);
1641   case ISD::AssertAlign:        return visitAssertAlign(N);
1642   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1643   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1644   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1645   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1646   case ISD::BITCAST:            return visitBITCAST(N);
1647   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1648   case ISD::FADD:               return visitFADD(N);
1649   case ISD::FSUB:               return visitFSUB(N);
1650   case ISD::FMUL:               return visitFMUL(N);
1651   case ISD::FMA:                return visitFMA(N);
1652   case ISD::FDIV:               return visitFDIV(N);
1653   case ISD::FREM:               return visitFREM(N);
1654   case ISD::FSQRT:              return visitFSQRT(N);
1655   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1656   case ISD::FPOW:               return visitFPOW(N);
1657   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1658   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1659   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1660   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1661   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1662   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1663   case ISD::FNEG:               return visitFNEG(N);
1664   case ISD::FABS:               return visitFABS(N);
1665   case ISD::FFLOOR:             return visitFFLOOR(N);
1666   case ISD::FMINNUM:            return visitFMINNUM(N);
1667   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1668   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1669   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1670   case ISD::FCEIL:              return visitFCEIL(N);
1671   case ISD::FTRUNC:             return visitFTRUNC(N);
1672   case ISD::BRCOND:             return visitBRCOND(N);
1673   case ISD::BR_CC:              return visitBR_CC(N);
1674   case ISD::LOAD:               return visitLOAD(N);
1675   case ISD::STORE:              return visitSTORE(N);
1676   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1677   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1678   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1679   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1680   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1681   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1682   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1683   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1684   case ISD::MGATHER:            return visitMGATHER(N);
1685   case ISD::MLOAD:              return visitMLOAD(N);
1686   case ISD::MSCATTER:           return visitMSCATTER(N);
1687   case ISD::MSTORE:             return visitMSTORE(N);
1688   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1689   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1690   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1691   case ISD::FREEZE:             return visitFREEZE(N);
1692   case ISD::VECREDUCE_FADD:
1693   case ISD::VECREDUCE_FMUL:
1694   case ISD::VECREDUCE_ADD:
1695   case ISD::VECREDUCE_MUL:
1696   case ISD::VECREDUCE_AND:
1697   case ISD::VECREDUCE_OR:
1698   case ISD::VECREDUCE_XOR:
1699   case ISD::VECREDUCE_SMAX:
1700   case ISD::VECREDUCE_SMIN:
1701   case ISD::VECREDUCE_UMAX:
1702   case ISD::VECREDUCE_UMIN:
1703   case ISD::VECREDUCE_FMAX:
1704   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1705   }
1706   return SDValue();
1707 }
1708
1709 SDValue DAGCombiner::combine(SDNode *N) {
1710   SDValue RV;
1711   if (!DisableGenericCombines)
1712     RV = visit(N);
1713
1714   // If nothing happened, try a target-specific DAG combine.
1715   if (!RV.getNode()) {
1716     assert(N->getOpcode() != ISD::DELETED_NODE &&
1717            "Node was deleted but visit returned NULL!");
1718
1719     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1720         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1721
1722       // Expose the DAG combiner to the target combiner impls.
1723       TargetLowering::DAGCombinerInfo
1724         DagCombineInfo(DAG, Level, false, this);
1725
1726       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1727     }
1728   }
1729
1730   // If nothing happened still, try promoting the operation.
1731   if (!RV.getNode()) {
1732     switch (N->getOpcode()) {
1733     default: break;
1734     case ISD::ADD:
1735     case ISD::SUB:
1736     case ISD::MUL:
1737     case ISD::AND:
1738     case ISD::OR:
1739     case ISD::XOR:
1740       RV = PromoteIntBinOp(SDValue(N, 0));
1741       break;
1742     case ISD::SHL:
1743     case ISD::SRA:
1744     case ISD::SRL:
1745       RV = PromoteIntShiftOp(SDValue(N, 0));
1746       break;
1747     case ISD::SIGN_EXTEND:
1748     case ISD::ZERO_EXTEND:
1749     case ISD::ANY_EXTEND:
1750       RV = PromoteExtend(SDValue(N, 0));
1751       break;
1752     case ISD::LOAD:
1753       if (PromoteLoad(SDValue(N, 0)))
1754         RV = SDValue(N, 0);
1755       break;
1756     }
1757   }
1758
1759   // If N is a commutative binary node, try to eliminate it if the commuted
1760   // version is already present in the DAG.
1761   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1762       N->getNumValues() == 1) {
1763     SDValue N0 = N->getOperand(0);
1764     SDValue N1 = N->getOperand(1);
1765
1766     // Constant operands are canonicalized to RHS.
1767     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1768       SDValue Ops[] = {N1, N0};
1769       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1770                                             N->getFlags());
1771       if (CSENode)
1772         return SDValue(CSENode, 0);
1773     }
1774   }
1775
1776   return RV;
1777 }
1778
1779 /// Given a node, return its input chain if it has one, otherwise return a null
1780 /// sd operand.
1781 static SDValue getInputChainForNode(SDNode *N) {
1782   if (unsigned NumOps = N->getNumOperands()) {
1783     if (N->getOperand(0).getValueType() == MVT::Other)
1784       return N->getOperand(0);
1785     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1786       return N->getOperand(NumOps-1);
1787     for (unsigned i = 1; i < NumOps-1; ++i)
1788       if (N->getOperand(i).getValueType() == MVT::Other)
1789         return N->getOperand(i);
1790   }
1791   return SDValue();
1792 }
1793
1794 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1795   // If N has two operands, where one has an input chain equal to the other,
1796   // the 'other' chain is redundant.
1797   if (N->getNumOperands() == 2) {
1798     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1799       return N->getOperand(0);
1800     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1801       return N->getOperand(1);
1802   }
1803
1804   // Don't simplify token factors if optnone.
1805   if (OptLevel == CodeGenOpt::None)
1806     return SDValue();
1807
1808   // If the sole user is a token factor, we should make sure we have a
1809   // chance to merge them together. This prevents TF chains from inhibiting
1810   // optimizations.
1811   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1812     AddToWorklist(*(N->use_begin()));
1813
1814   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1815   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1816   SmallPtrSet<SDNode*, 16> SeenOps;
1817   bool Changed = false;             // If we should replace this token factor.
1818
1819   // Start out with this token factor.
1820   TFs.push_back(N);
1821
1822   // Iterate through token factors.  The TFs grows when new token factors are
1823   // encountered.
1824   for (unsigned i = 0; i < TFs.size(); ++i) {
1825     // Limit number of nodes to inline, to avoid quadratic compile times.
1826     // We have to add the outstanding Token Factors to Ops, otherwise we might
1827     // drop Ops from the resulting Token Factors.
1828     if (Ops.size() > TokenFactorInlineLimit) {
1829       for (unsigned j = i; j < TFs.size(); j++)
1830         Ops.emplace_back(TFs[j], 0);
1831       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1832       // combiner worklist later.
1833       TFs.resize(i);
1834       break;
1835     }
1836
1837     SDNode *TF = TFs[i];
1838     // Check each of the operands.
1839     for (const SDValue &Op : TF->op_values()) {
1840       switch (Op.getOpcode()) {
1841       case ISD::EntryToken:
1842         // Entry tokens don't need to be added to the list. They are
1843         // redundant.
1844         Changed = true;
1845         break;
1846
1847       case ISD::TokenFactor:
1848         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1849           // Queue up for processing.
1850           TFs.push_back(Op.getNode());
1851           Changed = true;
1852           break;
1853         }
1854         LLVM_FALLTHROUGH;
1855
1856       default:
1857         // Only add if it isn't already in the list.
1858         if (SeenOps.insert(Op.getNode()).second)
1859           Ops.push_back(Op);
1860         else
1861           Changed = true;
1862         break;
1863       }
1864     }
1865   }
1866
1867   // Re-visit inlined Token Factors, to clean them up in case they have been
1868   // removed. Skip the first Token Factor, as this is the current node.
1869   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1870     AddToWorklist(TFs[i]);
1871
1872   // Remove Nodes that are chained to another node in the list. Do so
1873   // by walking up chains breath-first stopping when we've seen
1874   // another operand. In general we must climb to the EntryNode, but we can exit
1875   // early if we find all remaining work is associated with just one operand as
1876   // no further pruning is possible.
1877
1878   // List of nodes to search through and original Ops from which they originate.
1879   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1880   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1881   SmallPtrSet<SDNode *, 16> SeenChains;
1882   bool DidPruneOps = false;
1883
1884   unsigned NumLeftToConsider = 0;
1885   for (const SDValue &Op : Ops) {
1886     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1887     OpWorkCount.push_back(1);
1888   }
1889
1890   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1891     // If this is an Op, we can remove the op from the list. Remark any
1892     // search associated with it as from the current OpNumber.
1893     if (SeenOps.count(Op) != 0) {
1894       Changed = true;
1895       DidPruneOps = true;
1896       unsigned OrigOpNumber = 0;
1897       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1898         OrigOpNumber++;
1899       assert((OrigOpNumber != Ops.size()) &&
1900              "expected to find TokenFactor Operand");
1901       // Re-mark worklist from OrigOpNumber to OpNumber
1902       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1903         if (Worklist[i].second == OrigOpNumber) {
1904           Worklist[i].second = OpNumber;
1905         }
1906       }
1907       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1908       OpWorkCount[OrigOpNumber] = 0;
1909       NumLeftToConsider--;
1910     }
1911     // Add if it's a new chain
1912     if (SeenChains.insert(Op).second) {
1913       OpWorkCount[OpNumber]++;
1914       Worklist.push_back(std::make_pair(Op, OpNumber));
1915     }
1916   };
1917
1918   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1919     // We need at least be consider at least 2 Ops to prune.
1920     if (NumLeftToConsider <= 1)
1921       break;
1922     auto CurNode = Worklist[i].first;
1923     auto CurOpNumber = Worklist[i].second;
1924     assert((OpWorkCount[CurOpNumber] > 0) &&
1925            "Node should not appear in worklist");
1926     switch (CurNode->getOpcode()) {
1927     case ISD::EntryToken:
1928       // Hitting EntryToken is the only way for the search to terminate without
1929       // hitting
1930       // another operand's search. Prevent us from marking this operand
1931       // considered.
1932       NumLeftToConsider++;
1933       break;
1934     case ISD::TokenFactor:
1935       for (const SDValue &Op : CurNode->op_values())
1936         AddToWorklist(i, Op.getNode(), CurOpNumber);
1937       break;
1938     case ISD::LIFETIME_START:
1939     case ISD::LIFETIME_END:
1940     case ISD::CopyFromReg:
1941     case ISD::CopyToReg:
1942       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1943       break;
1944     default:
1945       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1946         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1947       break;
1948     }
1949     OpWorkCount[CurOpNumber]--;
1950     if (OpWorkCount[CurOpNumber] == 0)
1951       NumLeftToConsider--;
1952   }
1953
1954   // If we've changed things around then replace token factor.
1955   if (Changed) {
1956     SDValue Result;
1957     if (Ops.empty()) {
1958       // The entry token is the only possible outcome.
1959       Result = DAG.getEntryNode();
1960     } else {
1961       if (DidPruneOps) {
1962         SmallVector<SDValue, 8> PrunedOps;
1963         //
1964         for (const SDValue &Op : Ops) {
1965           if (SeenChains.count(Op.getNode()) == 0)
1966             PrunedOps.push_back(Op);
1967         }
1968         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1969       } else {
1970         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1971       }
1972     }
1973     return Result;
1974   }
1975   return SDValue();
1976 }
1977
1978 /// MERGE_VALUES can always be eliminated.
1979 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1980   WorklistRemover DeadNodes(*this);
1981   // Replacing results may cause a different MERGE_VALUES to suddenly
1982   // be CSE'd with N, and carry its uses with it. Iterate until no
1983   // uses remain, to ensure that the node can be safely deleted.
1984   // First add the users of this node to the work list so that they
1985   // can be tried again once they have new operands.
1986   AddUsersToWorklist(N);
1987   do {
1988     // Do as a single replacement to avoid rewalking use lists.
1989     SmallVector<SDValue, 8> Ops;
1990     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1991       Ops.push_back(N->getOperand(i));
1992     DAG.ReplaceAllUsesWith(N, Ops.data());
1993   } while (!N->use_empty());
1994   deleteAndRecombine(N);
1995   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1996 }
1997
1998 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1999 /// ConstantSDNode pointer else nullptr.
2000 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2001   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2002   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2003 }
2004
2005 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2006   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2007          "Unexpected binary operator");
2008
2009   // Don't do this unless the old select is going away. We want to eliminate the
2010   // binary operator, not replace a binop with a select.
2011   // TODO: Handle ISD::SELECT_CC.
2012   unsigned SelOpNo = 0;
2013   SDValue Sel = BO->getOperand(0);
2014   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2015     SelOpNo = 1;
2016     Sel = BO->getOperand(1);
2017   }
2018
2019   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2020     return SDValue();
2021
2022   SDValue CT = Sel.getOperand(1);
2023   if (!isConstantOrConstantVector(CT, true) &&
2024       !isConstantFPBuildVectorOrConstantFP(CT))
2025     return SDValue();
2026
2027   SDValue CF = Sel.getOperand(2);
2028   if (!isConstantOrConstantVector(CF, true) &&
2029       !isConstantFPBuildVectorOrConstantFP(CF))
2030     return SDValue();
2031
2032   // Bail out if any constants are opaque because we can't constant fold those.
2033   // The exception is "and" and "or" with either 0 or -1 in which case we can
2034   // propagate non constant operands into select. I.e.:
2035   // and (select Cond, 0, -1), X --> select Cond, 0, X
2036   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2037   auto BinOpcode = BO->getOpcode();
2038   bool CanFoldNonConst =
2039       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2040       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2041       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2042
2043   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2044   if (!CanFoldNonConst &&
2045       !isConstantOrConstantVector(CBO, true) &&
2046       !isConstantFPBuildVectorOrConstantFP(CBO))
2047     return SDValue();
2048
2049   EVT VT = Sel.getValueType();
2050
2051   // In case of shift value and shift amount may have different VT. For instance
2052   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2053   // swapped operands and value types do not match. NB: x86 is fine if operands
2054   // are not swapped with shift amount VT being not bigger than shifted value.
2055   // TODO: that is possible to check for a shift operation, correct VTs and
2056   // still perform optimization on x86 if needed.
2057   if (SelOpNo && VT != CBO.getValueType())
2058     return SDValue();
2059
2060   // We have a select-of-constants followed by a binary operator with a
2061   // constant. Eliminate the binop by pulling the constant math into the select.
2062   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2063   SDLoc DL(Sel);
2064   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2065                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2066   if (!CanFoldNonConst && !NewCT.isUndef() &&
2067       !isConstantOrConstantVector(NewCT, true) &&
2068       !isConstantFPBuildVectorOrConstantFP(NewCT))
2069     return SDValue();
2070
2071   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2072                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2073   if (!CanFoldNonConst && !NewCF.isUndef() &&
2074       !isConstantOrConstantVector(NewCF, true) &&
2075       !isConstantFPBuildVectorOrConstantFP(NewCF))
2076     return SDValue();
2077
2078   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2079   SelectOp->setFlags(BO->getFlags());
2080   return SelectOp;
2081 }
2082
2083 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2084   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2085          "Expecting add or sub");
2086
2087   // Match a constant operand and a zext operand for the math instruction:
2088   // add Z, C
2089   // sub C, Z
2090   bool IsAdd = N->getOpcode() == ISD::ADD;
2091   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2092   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2093   auto *CN = dyn_cast<ConstantSDNode>(C);
2094   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2095     return SDValue();
2096
2097   // Match the zext operand as a setcc of a boolean.
2098   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2099       Z.getOperand(0).getValueType() != MVT::i1)
2100     return SDValue();
2101
2102   // Match the compare as: setcc (X & 1), 0, eq.
2103   SDValue SetCC = Z.getOperand(0);
2104   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2105   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2106       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2107       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2108     return SDValue();
2109
2110   // We are adding/subtracting a constant and an inverted low bit. Turn that
2111   // into a subtract/add of the low bit with incremented/decremented constant:
2112   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2113   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2114   EVT VT = C.getValueType();
2115   SDLoc DL(N);
2116   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2117   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2118                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2119   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2120 }
2121
2122 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2123 /// a shift and add with a different constant.
2124 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2125   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2126          "Expecting add or sub");
2127
2128   // We need a constant operand for the add/sub, and the other operand is a
2129   // logical shift right: add (srl), C or sub C, (srl).
2130   bool IsAdd = N->getOpcode() == ISD::ADD;
2131   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2132   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2133   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2134       ShiftOp.getOpcode() != ISD::SRL)
2135     return SDValue();
2136
2137   // The shift must be of a 'not' value.
2138   SDValue Not = ShiftOp.getOperand(0);
2139   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2140     return SDValue();
2141
2142   // The shift must be moving the sign bit to the least-significant-bit.
2143   EVT VT = ShiftOp.getValueType();
2144   SDValue ShAmt = ShiftOp.getOperand(1);
2145   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2146   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2147     return SDValue();
2148
2149   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2150   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2151   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2152   SDLoc DL(N);
2153   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2154   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2155   if (SDValue NewC =
2156           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2157                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2158     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2159   return SDValue();
2160 }
2161
2162 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2163 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2164 /// are no common bits set in the operands).
2165 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2166   SDValue N0 = N->getOperand(0);
2167   SDValue N1 = N->getOperand(1);
2168   EVT VT = N0.getValueType();
2169   SDLoc DL(N);
2170
2171   // fold vector ops
2172   if (VT.isVector()) {
2173     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2174       return FoldedVOp;
2175
2176     // fold (add x, 0) -> x, vector edition
2177     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2178       return N0;
2179     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2180       return N1;
2181   }
2182
2183   // fold (add x, undef) -> undef
2184   if (N0.isUndef())
2185     return N0;
2186
2187   if (N1.isUndef())
2188     return N1;
2189
2190   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2191     // canonicalize constant to RHS
2192     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2193       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2194     // fold (add c1, c2) -> c1+c2
2195     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2196   }
2197
2198   // fold (add x, 0) -> x
2199   if (isNullConstant(N1))
2200     return N0;
2201
2202   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2203     // fold ((A-c1)+c2) -> (A+(c2-c1))
2204     if (N0.getOpcode() == ISD::SUB &&
2205         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2206       SDValue Sub =
2207           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2208       assert(Sub && "Constant folding failed");
2209       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2210     }
2211
2212     // fold ((c1-A)+c2) -> (c1+c2)-A
2213     if (N0.getOpcode() == ISD::SUB &&
2214         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2215       SDValue Add =
2216           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2217       assert(Add && "Constant folding failed");
2218       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2219     }
2220
2221     // add (sext i1 X), 1 -> zext (not i1 X)
2222     // We don't transform this pattern:
2223     //   add (zext i1 X), -1 -> sext (not i1 X)
2224     // because most (?) targets generate better code for the zext form.
2225     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2226         isOneOrOneSplat(N1)) {
2227       SDValue X = N0.getOperand(0);
2228       if ((!LegalOperations ||
2229            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2230             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2231           X.getScalarValueSizeInBits() == 1) {
2232         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2233         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2234       }
2235     }
2236
2237     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2238     // equivalent to (add x, c0).
2239     if (N0.getOpcode() == ISD::OR &&
2240         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2241         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2242       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2243                                                     {N1, N0.getOperand(1)}))
2244         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2245     }
2246   }
2247
2248   if (SDValue NewSel = foldBinOpIntoSelect(N))
2249     return NewSel;
2250
2251   // reassociate add
2252   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2253     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2254       return RADD;
2255   }
2256   // fold ((0-A) + B) -> B-A
2257   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2258     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2259
2260   // fold (A + (0-B)) -> A-B
2261   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2262     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2263
2264   // fold (A+(B-A)) -> B
2265   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2266     return N1.getOperand(0);
2267
2268   // fold ((B-A)+A) -> B
2269   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2270     return N0.getOperand(0);
2271
2272   // fold ((A-B)+(C-A)) -> (C-B)
2273   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2274       N0.getOperand(0) == N1.getOperand(1))
2275     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2276                        N0.getOperand(1));
2277
2278   // fold ((A-B)+(B-C)) -> (A-C)
2279   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2280       N0.getOperand(1) == N1.getOperand(0))
2281     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2282                        N1.getOperand(1));
2283
2284   // fold (A+(B-(A+C))) to (B-C)
2285   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2286       N0 == N1.getOperand(1).getOperand(0))
2287     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2288                        N1.getOperand(1).getOperand(1));
2289
2290   // fold (A+(B-(C+A))) to (B-C)
2291   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2292       N0 == N1.getOperand(1).getOperand(1))
2293     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2294                        N1.getOperand(1).getOperand(0));
2295
2296   // fold (A+((B-A)+or-C)) to (B+or-C)
2297   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2298       N1.getOperand(0).getOpcode() == ISD::SUB &&
2299       N0 == N1.getOperand(0).getOperand(1))
2300     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2301                        N1.getOperand(1));
2302
2303   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2304   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2305     SDValue N00 = N0.getOperand(0);
2306     SDValue N01 = N0.getOperand(1);
2307     SDValue N10 = N1.getOperand(0);
2308     SDValue N11 = N1.getOperand(1);
2309
2310     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2311       return DAG.getNode(ISD::SUB, DL, VT,
2312                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2313                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2314   }
2315
2316   // fold (add (umax X, C), -C) --> (usubsat X, C)
2317   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2318     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2319       return (!Max && !Op) ||
2320              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2321     };
2322     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2323                                   /*AllowUndefs*/ true))
2324       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2325                          N0.getOperand(1));
2326   }
2327
2328   if (SimplifyDemandedBits(SDValue(N, 0)))
2329     return SDValue(N, 0);
2330
2331   if (isOneOrOneSplat(N1)) {
2332     // fold (add (xor a, -1), 1) -> (sub 0, a)
2333     if (isBitwiseNot(N0))
2334       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2335                          N0.getOperand(0));
2336
2337     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2338     if (N0.getOpcode() == ISD::ADD ||
2339         N0.getOpcode() == ISD::UADDO ||
2340         N0.getOpcode() == ISD::SADDO) {
2341       SDValue A, Xor;
2342
2343       if (isBitwiseNot(N0.getOperand(0))) {
2344         A = N0.getOperand(1);
2345         Xor = N0.getOperand(0);
2346       } else if (isBitwiseNot(N0.getOperand(1))) {
2347         A = N0.getOperand(0);
2348         Xor = N0.getOperand(1);
2349       }
2350
2351       if (Xor)
2352         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2353     }
2354
2355     // Look for:
2356     //   add (add x, y), 1
2357     // And if the target does not like this form then turn into:
2358     //   sub y, (xor x, -1)
2359     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2360         N0.getOpcode() == ISD::ADD) {
2361       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2362                                 DAG.getAllOnesConstant(DL, VT));
2363       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2364     }
2365   }
2366
2367   // (x - y) + -1  ->  add (xor y, -1), x
2368   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2369       isAllOnesOrAllOnesSplat(N1)) {
2370     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2371     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2372   }
2373
2374   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2375     return Combined;
2376
2377   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2378     return Combined;
2379
2380   return SDValue();
2381 }
2382
2383 SDValue DAGCombiner::visitADD(SDNode *N) {
2384   SDValue N0 = N->getOperand(0);
2385   SDValue N1 = N->getOperand(1);
2386   EVT VT = N0.getValueType();
2387   SDLoc DL(N);
2388
2389   if (SDValue Combined = visitADDLike(N))
2390     return Combined;
2391
2392   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2393     return V;
2394
2395   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2396     return V;
2397
2398   // fold (a+b) -> (a|b) iff a and b share no bits.
2399   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2400       DAG.haveNoCommonBitsSet(N0, N1))
2401     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2402
2403   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2404   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2405     APInt C0 = N0->getConstantOperandAPInt(0);
2406     APInt C1 = N1->getConstantOperandAPInt(0);
2407     return DAG.getVScale(DL, VT, C0 + C1);
2408   }
2409
2410   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2411   if ((N0.getOpcode() == ISD::ADD) &&
2412       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2413       (N1.getOpcode() == ISD::VSCALE)) {
2414     auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2415     auto VS1 = N1->getConstantOperandAPInt(0);
2416     auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
2417     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2418   }
2419
2420   return SDValue();
2421 }
2422
2423 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2424   unsigned Opcode = N->getOpcode();
2425   SDValue N0 = N->getOperand(0);
2426   SDValue N1 = N->getOperand(1);
2427   EVT VT = N0.getValueType();
2428   SDLoc DL(N);
2429
2430   // fold vector ops
2431   if (VT.isVector()) {
2432     // TODO SimplifyVBinOp
2433
2434     // fold (add_sat x, 0) -> x, vector edition
2435     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2436       return N0;
2437     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2438       return N1;
2439   }
2440
2441   // fold (add_sat x, undef) -> -1
2442   if (N0.isUndef() || N1.isUndef())
2443     return DAG.getAllOnesConstant(DL, VT);
2444
2445   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2446     // canonicalize constant to RHS
2447     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2448       return DAG.getNode(Opcode, DL, VT, N1, N0);
2449     // fold (add_sat c1, c2) -> c3
2450     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2451   }
2452
2453   // fold (add_sat x, 0) -> x
2454   if (isNullConstant(N1))
2455     return N0;
2456
2457   // If it cannot overflow, transform into an add.
2458   if (Opcode == ISD::UADDSAT)
2459     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2460       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2461
2462   return SDValue();
2463 }
2464
2465 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2466   bool Masked = false;
2467
2468   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2469   while (true) {
2470     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2471       V = V.getOperand(0);
2472       continue;
2473     }
2474
2475     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2476       Masked = true;
2477       V = V.getOperand(0);
2478       continue;
2479     }
2480
2481     break;
2482   }
2483
2484   // If this is not a carry, return.
2485   if (V.getResNo() != 1)
2486     return SDValue();
2487
2488   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2489       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2490     return SDValue();
2491
2492   EVT VT = V.getNode()->getValueType(0);
2493   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2494     return SDValue();
2495
2496   // If the result is masked, then no matter what kind of bool it is we can
2497   // return. If it isn't, then we need to make sure the bool type is either 0 or
2498   // 1 and not other values.
2499   if (Masked ||
2500       TLI.getBooleanContents(V.getValueType()) ==
2501           TargetLoweringBase::ZeroOrOneBooleanContent)
2502     return V;
2503
2504   return SDValue();
2505 }
2506
2507 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2508 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2509 /// the opcode and bypass the mask operation.
2510 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2511                                  SelectionDAG &DAG, const SDLoc &DL) {
2512   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2513     return SDValue();
2514
2515   EVT VT = N0.getValueType();
2516   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2517     return SDValue();
2518
2519   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2520   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2521   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2522 }
2523
2524 /// Helper for doing combines based on N0 and N1 being added to each other.
2525 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2526                                           SDNode *LocReference) {
2527   EVT VT = N0.getValueType();
2528   SDLoc DL(LocReference);
2529
2530   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2531   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2532       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2533     return DAG.getNode(ISD::SUB, DL, VT, N0,
2534                        DAG.getNode(ISD::SHL, DL, VT,
2535                                    N1.getOperand(0).getOperand(1),
2536                                    N1.getOperand(1)));
2537
2538   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2539     return V;
2540
2541   // Look for:
2542   //   add (add x, 1), y
2543   // And if the target does not like this form then turn into:
2544   //   sub y, (xor x, -1)
2545   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2546       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2547     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2548                               DAG.getAllOnesConstant(DL, VT));
2549     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2550   }
2551
2552   // Hoist one-use subtraction by non-opaque constant:
2553   //   (x - C) + y  ->  (x + y) - C
2554   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2555   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2556       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2557     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2558     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2559   }
2560   // Hoist one-use subtraction from non-opaque constant:
2561   //   (C - x) + y  ->  (y - x) + C
2562   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2563       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2564     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2565     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2566   }
2567
2568   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2569   // rather than 'add 0/-1' (the zext should get folded).
2570   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2571   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2572       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2573       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2574     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2575     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2576   }
2577
2578   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2579   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2580     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2581     if (TN->getVT() == MVT::i1) {
2582       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2583                                  DAG.getConstant(1, DL, VT));
2584       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2585     }
2586   }
2587
2588   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2589   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2590       N1.getResNo() == 0)
2591     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2592                        N0, N1.getOperand(0), N1.getOperand(2));
2593
2594   // (add X, Carry) -> (addcarry X, 0, Carry)
2595   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2596     if (SDValue Carry = getAsCarry(TLI, N1))
2597       return DAG.getNode(ISD::ADDCARRY, DL,
2598                          DAG.getVTList(VT, Carry.getValueType()), N0,
2599                          DAG.getConstant(0, DL, VT), Carry);
2600
2601   return SDValue();
2602 }
2603
2604 SDValue DAGCombiner::visitADDC(SDNode *N) {
2605   SDValue N0 = N->getOperand(0);
2606   SDValue N1 = N->getOperand(1);
2607   EVT VT = N0.getValueType();
2608   SDLoc DL(N);
2609
2610   // If the flag result is dead, turn this into an ADD.
2611   if (!N->hasAnyUseOfValue(1))
2612     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2613                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2614
2615   // canonicalize constant to RHS.
2616   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2617   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2618   if (N0C && !N1C)
2619     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2620
2621   // fold (addc x, 0) -> x + no carry out
2622   if (isNullConstant(N1))
2623     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2624                                         DL, MVT::Glue));
2625
2626   // If it cannot overflow, transform into an add.
2627   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2628     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2629                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2630
2631   return SDValue();
2632 }
2633
2634 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2635                            SelectionDAG &DAG, const TargetLowering &TLI) {
2636   EVT VT = V.getValueType();
2637
2638   SDValue Cst;
2639   switch (TLI.getBooleanContents(VT)) {
2640   case TargetLowering::ZeroOrOneBooleanContent:
2641   case TargetLowering::UndefinedBooleanContent:
2642     Cst = DAG.getConstant(1, DL, VT);
2643     break;
2644   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2645     Cst = DAG.getAllOnesConstant(DL, VT);
2646     break;
2647   }
2648
2649   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2650 }
2651
2652 /**
2653  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2654  * then the flip also occurs if computing the inverse is the same cost.
2655  * This function returns an empty SDValue in case it cannot flip the boolean
2656  * without increasing the cost of the computation. If you want to flip a boolean
2657  * no matter what, use flipBoolean.
2658  */
2659 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2660                                   const TargetLowering &TLI,
2661                                   bool Force) {
2662   if (Force && isa<ConstantSDNode>(V))
2663     return flipBoolean(V, SDLoc(V), DAG, TLI);
2664
2665   if (V.getOpcode() != ISD::XOR)
2666     return SDValue();
2667
2668   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2669   if (!Const)
2670     return SDValue();
2671
2672   EVT VT = V.getValueType();
2673
2674   bool IsFlip = false;
2675   switch(TLI.getBooleanContents(VT)) {
2676     case TargetLowering::ZeroOrOneBooleanContent:
2677       IsFlip = Const->isOne();
2678       break;
2679     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2680       IsFlip = Const->isAllOnesValue();
2681       break;
2682     case TargetLowering::UndefinedBooleanContent:
2683       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2684       break;
2685   }
2686
2687   if (IsFlip)
2688     return V.getOperand(0);
2689   if (Force)
2690     return flipBoolean(V, SDLoc(V), DAG, TLI);
2691   return SDValue();
2692 }
2693
2694 SDValue DAGCombiner::visitADDO(SDNode *N) {
2695   SDValue N0 = N->getOperand(0);
2696   SDValue N1 = N->getOperand(1);
2697   EVT VT = N0.getValueType();
2698   bool IsSigned = (ISD::SADDO == N->getOpcode());
2699
2700   EVT CarryVT = N->getValueType(1);
2701   SDLoc DL(N);
2702
2703   // If the flag result is dead, turn this into an ADD.
2704   if (!N->hasAnyUseOfValue(1))
2705     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2706                      DAG.getUNDEF(CarryVT));
2707
2708   // canonicalize constant to RHS.
2709   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2710       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2711     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2712
2713   // fold (addo x, 0) -> x + no carry out
2714   if (isNullOrNullSplat(N1))
2715     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2716
2717   if (!IsSigned) {
2718     // If it cannot overflow, transform into an add.
2719     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2720       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2721                        DAG.getConstant(0, DL, CarryVT));
2722
2723     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2724     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2725       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2726                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2727       return CombineTo(N, Sub,
2728                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2729     }
2730
2731     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2732       return Combined;
2733
2734     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2735       return Combined;
2736   }
2737
2738   return SDValue();
2739 }
2740
2741 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2742   EVT VT = N0.getValueType();
2743   if (VT.isVector())
2744     return SDValue();
2745
2746   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2747   // If Y + 1 cannot overflow.
2748   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2749     SDValue Y = N1.getOperand(0);
2750     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2751     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2752       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2753                          N1.getOperand(2));
2754   }
2755
2756   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2757   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2758     if (SDValue Carry = getAsCarry(TLI, N1))
2759       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2760                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2761
2762   return SDValue();
2763 }
2764
2765 SDValue DAGCombiner::visitADDE(SDNode *N) {
2766   SDValue N0 = N->getOperand(0);
2767   SDValue N1 = N->getOperand(1);
2768   SDValue CarryIn = N->getOperand(2);
2769
2770   // canonicalize constant to RHS
2771   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2772   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2773   if (N0C && !N1C)
2774     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2775                        N1, N0, CarryIn);
2776
2777   // fold (adde x, y, false) -> (addc x, y)
2778   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2779     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2780
2781   return SDValue();
2782 }
2783
2784 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2785   SDValue N0 = N->getOperand(0);
2786   SDValue N1 = N->getOperand(1);
2787   SDValue CarryIn = N->getOperand(2);
2788   SDLoc DL(N);
2789
2790   // canonicalize constant to RHS
2791   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2792   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2793   if (N0C && !N1C)
2794     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2795
2796   // fold (addcarry x, y, false) -> (uaddo x, y)
2797   if (isNullConstant(CarryIn)) {
2798     if (!LegalOperations ||
2799         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2800       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2801   }
2802
2803   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2804   if (isNullConstant(N0) && isNullConstant(N1)) {
2805     EVT VT = N0.getValueType();
2806     EVT CarryVT = CarryIn.getValueType();
2807     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2808     AddToWorklist(CarryExt.getNode());
2809     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2810                                     DAG.getConstant(1, DL, VT)),
2811                      DAG.getConstant(0, DL, CarryVT));
2812   }
2813
2814   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2815     return Combined;
2816
2817   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2818     return Combined;
2819
2820   return SDValue();
2821 }
2822
2823 /**
2824  * If we are facing some sort of diamond carry propapagtion pattern try to
2825  * break it up to generate something like:
2826  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2827  *
2828  * The end result is usually an increase in operation required, but because the
2829  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2830  *
2831  * Patterns typically look something like
2832  *            (uaddo A, B)
2833  *             /       \
2834  *          Carry      Sum
2835  *            |          \
2836  *            | (addcarry *, 0, Z)
2837  *            |       /
2838  *             \   Carry
2839  *              |   /
2840  * (addcarry X, *, *)
2841  *
2842  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2843  * produce a combine with a single path for carry propagation.
2844  */
2845 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2846                                       SDValue X, SDValue Carry0, SDValue Carry1,
2847                                       SDNode *N) {
2848   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2849     return SDValue();
2850   if (Carry1.getOpcode() != ISD::UADDO)
2851     return SDValue();
2852
2853   SDValue Z;
2854
2855   /**
2856    * First look for a suitable Z. It will present itself in the form of
2857    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2858    */
2859   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2860       isNullConstant(Carry0.getOperand(1))) {
2861     Z = Carry0.getOperand(2);
2862   } else if (Carry0.getOpcode() == ISD::UADDO &&
2863              isOneConstant(Carry0.getOperand(1))) {
2864     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2865     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2866   } else {
2867     // We couldn't find a suitable Z.
2868     return SDValue();
2869   }
2870
2871
2872   auto cancelDiamond = [&](SDValue A,SDValue B) {
2873     SDLoc DL(N);
2874     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2875     Combiner.AddToWorklist(NewY.getNode());
2876     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2877                        DAG.getConstant(0, DL, X.getValueType()),
2878                        NewY.getValue(1));
2879   };
2880
2881   /**
2882    *      (uaddo A, B)
2883    *           |
2884    *          Sum
2885    *           |
2886    * (addcarry *, 0, Z)
2887    */
2888   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2889     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2890   }
2891
2892   /**
2893    * (addcarry A, 0, Z)
2894    *         |
2895    *        Sum
2896    *         |
2897    *  (uaddo *, B)
2898    */
2899   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2900     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2901   }
2902
2903   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2904     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2905   }
2906
2907   return SDValue();
2908 }
2909
2910 // If we are facing some sort of diamond carry/borrow in/out pattern try to
2911 // match patterns like:
2912 //
2913 //          (uaddo A, B)            CarryIn
2914 //            |  \                     |
2915 //            |   \                    |
2916 //    PartialSum   PartialCarryOutX   /
2917 //            |        |             /
2918 //            |    ____|____________/
2919 //            |   /    |
2920 //     (uaddo *, *)    \________
2921 //       |  \                   \
2922 //       |   \                   |
2923 //       |    PartialCarryOutY   |
2924 //       |        \              |
2925 //       |         \            /
2926 //   AddCarrySum    |    ______/
2927 //                  |   /
2928 //   CarryOut = (or *, *)
2929 //
2930 // And generate ADDCARRY (or SUBCARRY) with two result values:
2931 //
2932 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
2933 //
2934 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
2935 // a single path for carry/borrow out propagation:
2936 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2937                                    const TargetLowering &TLI, SDValue Carry0,
2938                                    SDValue Carry1, SDNode *N) {
2939   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
2940     return SDValue();
2941   unsigned Opcode = Carry0.getOpcode();
2942   if (Opcode != Carry1.getOpcode())
2943     return SDValue();
2944   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
2945     return SDValue();
2946
2947   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
2948   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
2949   // the above ASCII art.)
2950   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2951       Carry1.getOperand(1) != Carry0.getValue(0))
2952     std::swap(Carry0, Carry1);
2953   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2954       Carry1.getOperand(1) != Carry0.getValue(0))
2955     return SDValue();
2956
2957   // The carry in value must be on the righthand side for subtraction.
2958   unsigned CarryInOperandNum =
2959       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
2960   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
2961     return SDValue();
2962   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
2963
2964   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
2965   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
2966     return SDValue();
2967
2968   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
2969   // TODO: make getAsCarry() aware of how partial carries are merged.
2970   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
2971     return SDValue();
2972   CarryIn = CarryIn.getOperand(0);
2973   if (CarryIn.getValueType() != MVT::i1)
2974     return SDValue();
2975
2976   SDLoc DL(N);
2977   SDValue Merged =
2978       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
2979                   Carry0.getOperand(1), CarryIn);
2980
2981   // Please note that because we have proven that the result of the UADDO/USUBO
2982   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
2983   // therefore prove that if the first UADDO/USUBO overflows, the second
2984   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
2985   // maximum value.
2986   //
2987   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
2988   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
2989   //
2990   // This is important because it means that OR and XOR can be used to merge
2991   // carry flags; and that AND can return a constant zero.
2992   //
2993   // TODO: match other operations that can merge flags (ADD, etc)
2994   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
2995   if (N->getOpcode() == ISD::AND)
2996     return DAG.getConstant(0, DL, MVT::i1);
2997   return Merged.getValue(1);
2998 }
2999
3000 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3001                                        SDNode *N) {
3002   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3003   if (isBitwiseNot(N0))
3004     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3005       SDLoc DL(N);
3006       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3007                                 N0.getOperand(0), NotC);
3008       return CombineTo(N, Sub,
3009                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
3010     }
3011
3012   // Iff the flag result is dead:
3013   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3014   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3015   // or the dependency between the instructions.
3016   if ((N0.getOpcode() == ISD::ADD ||
3017        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3018         N0.getValue(1) != CarryIn)) &&
3019       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3020     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3021                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3022
3023   /**
3024    * When one of the addcarry argument is itself a carry, we may be facing
3025    * a diamond carry propagation. In which case we try to transform the DAG
3026    * to ensure linear carry propagation if that is possible.
3027    */
3028   if (auto Y = getAsCarry(TLI, N1)) {
3029     // Because both are carries, Y and Z can be swapped.
3030     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3031       return R;
3032     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3033       return R;
3034   }
3035
3036   return SDValue();
3037 }
3038
3039 // Since it may not be valid to emit a fold to zero for vector initializers
3040 // check if we can before folding.
3041 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3042                              SelectionDAG &DAG, bool LegalOperations) {
3043   if (!VT.isVector())
3044     return DAG.getConstant(0, DL, VT);
3045   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3046     return DAG.getConstant(0, DL, VT);
3047   return SDValue();
3048 }
3049
3050 SDValue DAGCombiner::visitSUB(SDNode *N) {
3051   SDValue N0 = N->getOperand(0);
3052   SDValue N1 = N->getOperand(1);
3053   EVT VT = N0.getValueType();
3054   SDLoc DL(N);
3055
3056   // fold vector ops
3057   if (VT.isVector()) {
3058     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3059       return FoldedVOp;
3060
3061     // fold (sub x, 0) -> x, vector edition
3062     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3063       return N0;
3064   }
3065
3066   // fold (sub x, x) -> 0
3067   // FIXME: Refactor this and xor and other similar operations together.
3068   if (N0 == N1)
3069     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3070
3071   // fold (sub c1, c2) -> c3
3072   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3073     return C;
3074
3075   if (SDValue NewSel = foldBinOpIntoSelect(N))
3076     return NewSel;
3077
3078   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3079
3080   // fold (sub x, c) -> (add x, -c)
3081   if (N1C) {
3082     return DAG.getNode(ISD::ADD, DL, VT, N0,
3083                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3084   }
3085
3086   if (isNullOrNullSplat(N0)) {
3087     unsigned BitWidth = VT.getScalarSizeInBits();
3088     // Right-shifting everything out but the sign bit followed by negation is
3089     // the same as flipping arithmetic/logical shift type without the negation:
3090     // -(X >>u 31) -> (X >>s 31)
3091     // -(X >>s 31) -> (X >>u 31)
3092     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3093       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3094       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3095         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3096         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3097           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3098       }
3099     }
3100
3101     // 0 - X --> 0 if the sub is NUW.
3102     if (N->getFlags().hasNoUnsignedWrap())
3103       return N0;
3104
3105     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3106       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3107       // N1 must be 0 because negating the minimum signed value is undefined.
3108       if (N->getFlags().hasNoSignedWrap())
3109         return N0;
3110
3111       // 0 - X --> X if X is 0 or the minimum signed value.
3112       return N1;
3113     }
3114   }
3115
3116   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3117   if (isAllOnesOrAllOnesSplat(N0))
3118     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3119
3120   // fold (A - (0-B)) -> A+B
3121   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3122     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3123
3124   // fold A-(A-B) -> B
3125   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3126     return N1.getOperand(1);
3127
3128   // fold (A+B)-A -> B
3129   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3130     return N0.getOperand(1);
3131
3132   // fold (A+B)-B -> A
3133   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3134     return N0.getOperand(0);
3135
3136   // fold (A+C1)-C2 -> A+(C1-C2)
3137   if (N0.getOpcode() == ISD::ADD &&
3138       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3139       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3140     SDValue NewC =
3141         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3142     assert(NewC && "Constant folding failed");
3143     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3144   }
3145
3146   // fold C2-(A+C1) -> (C2-C1)-A
3147   if (N1.getOpcode() == ISD::ADD) {
3148     SDValue N11 = N1.getOperand(1);
3149     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3150         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3151       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3152       assert(NewC && "Constant folding failed");
3153       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3154     }
3155   }
3156
3157   // fold (A-C1)-C2 -> A-(C1+C2)
3158   if (N0.getOpcode() == ISD::SUB &&
3159       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3160       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3161     SDValue NewC =
3162         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3163     assert(NewC && "Constant folding failed");
3164     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3165   }
3166
3167   // fold (c1-A)-c2 -> (c1-c2)-A
3168   if (N0.getOpcode() == ISD::SUB &&
3169       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3170       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3171     SDValue NewC =
3172         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3173     assert(NewC && "Constant folding failed");
3174     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3175   }
3176
3177   // fold ((A+(B+or-C))-B) -> A+or-C
3178   if (N0.getOpcode() == ISD::ADD &&
3179       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3180        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3181       N0.getOperand(1).getOperand(0) == N1)
3182     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3183                        N0.getOperand(1).getOperand(1));
3184
3185   // fold ((A+(C+B))-B) -> A+C
3186   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3187       N0.getOperand(1).getOperand(1) == N1)
3188     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3189                        N0.getOperand(1).getOperand(0));
3190
3191   // fold ((A-(B-C))-C) -> A-B
3192   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3193       N0.getOperand(1).getOperand(1) == N1)
3194     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3195                        N0.getOperand(1).getOperand(0));
3196
3197   // fold (A-(B-C)) -> A+(C-B)
3198   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3199     return DAG.getNode(ISD::ADD, DL, VT, N0,
3200                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3201                                    N1.getOperand(0)));
3202
3203   // A - (A & B)  ->  A & (~B)
3204   if (N1.getOpcode() == ISD::AND) {
3205     SDValue A = N1.getOperand(0);
3206     SDValue B = N1.getOperand(1);
3207     if (A != N0)
3208       std::swap(A, B);
3209     if (A == N0 &&
3210         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3211       SDValue InvB =
3212           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3213       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3214     }
3215   }
3216
3217   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3218   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3219     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3220         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3221       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3222                                 N1.getOperand(0).getOperand(1),
3223                                 N1.getOperand(1));
3224       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3225     }
3226     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3227         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3228       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3229                                 N1.getOperand(0),
3230                                 N1.getOperand(1).getOperand(1));
3231       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3232     }
3233   }
3234
3235   // If either operand of a sub is undef, the result is undef
3236   if (N0.isUndef())
3237     return N0;
3238   if (N1.isUndef())
3239     return N1;
3240
3241   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3242     return V;
3243
3244   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3245     return V;
3246
3247   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3248     return V;
3249
3250   // (x - y) - 1  ->  add (xor y, -1), x
3251   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3252     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3253                               DAG.getAllOnesConstant(DL, VT));
3254     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3255   }
3256
3257   // Look for:
3258   //   sub y, (xor x, -1)
3259   // And if the target does not like this form then turn into:
3260   //   add (add x, y), 1
3261   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3262     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3263     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3264   }
3265
3266   // Hoist one-use addition by non-opaque constant:
3267   //   (x + C) - y  ->  (x - y) + C
3268   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3269       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3270     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3271     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3272   }
3273   // y - (x + C)  ->  (y - x) - C
3274   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3275       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3276     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3277     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3278   }
3279   // (x - C) - y  ->  (x - y) - C
3280   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3281   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3282       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3283     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3284     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3285   }
3286   // (C - x) - y  ->  C - (x + y)
3287   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3288       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3289     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3290     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3291   }
3292
3293   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3294   // rather than 'sub 0/1' (the sext should get folded).
3295   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3296   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3297       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3298       TLI.getBooleanContents(VT) ==
3299           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3300     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3301     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3302   }
3303
3304   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3305   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3306     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3307       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3308       SDValue S0 = N1.getOperand(0);
3309       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3310         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3311         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3312           if (C->getAPIntValue() == (OpSizeInBits - 1))
3313             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3314       }
3315     }
3316   }
3317
3318   // If the relocation model supports it, consider symbol offsets.
3319   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3320     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3321       // fold (sub Sym, c) -> Sym-c
3322       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3323         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3324                                     GA->getOffset() -
3325                                         (uint64_t)N1C->getSExtValue());
3326       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3327       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3328         if (GA->getGlobal() == GB->getGlobal())
3329           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3330                                  DL, VT);
3331     }
3332
3333   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3334   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3335     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3336     if (TN->getVT() == MVT::i1) {
3337       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3338                                  DAG.getConstant(1, DL, VT));
3339       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3340     }
3341   }
3342
3343   // canonicalize (sub X, (vscale * C)) to (add X,  (vscale * -C))
3344   if (N1.getOpcode() == ISD::VSCALE) {
3345     APInt IntVal = N1.getConstantOperandAPInt(0);
3346     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3347   }
3348
3349   // Prefer an add for more folding potential and possibly better codegen:
3350   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3351   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3352     SDValue ShAmt = N1.getOperand(1);
3353     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3354     if (ShAmtC &&
3355         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3356       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3357       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3358     }
3359   }
3360
3361   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3362     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3363     if (SDValue Carry = getAsCarry(TLI, N0)) {
3364       SDValue X = N1;
3365       SDValue Zero = DAG.getConstant(0, DL, VT);
3366       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3367       return DAG.getNode(ISD::ADDCARRY, DL,
3368                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3369                          Carry);
3370     }
3371   }
3372
3373   return SDValue();
3374 }
3375
3376 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3377   SDValue N0 = N->getOperand(0);
3378   SDValue N1 = N->getOperand(1);
3379   EVT VT = N0.getValueType();
3380   SDLoc DL(N);
3381
3382   // fold vector ops
3383   if (VT.isVector()) {
3384     // TODO SimplifyVBinOp
3385
3386     // fold (sub_sat x, 0) -> x, vector edition
3387     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3388       return N0;
3389   }
3390
3391   // fold (sub_sat x, undef) -> 0
3392   if (N0.isUndef() || N1.isUndef())
3393     return DAG.getConstant(0, DL, VT);
3394
3395   // fold (sub_sat x, x) -> 0
3396   if (N0 == N1)
3397     return DAG.getConstant(0, DL, VT);
3398
3399   // fold (sub_sat c1, c2) -> c3
3400   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3401     return C;
3402
3403   // fold (sub_sat x, 0) -> x
3404   if (isNullConstant(N1))
3405     return N0;
3406
3407   return SDValue();
3408 }
3409
3410 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3411   SDValue N0 = N->getOperand(0);
3412   SDValue N1 = N->getOperand(1);
3413   EVT VT = N0.getValueType();
3414   SDLoc DL(N);
3415
3416   // If the flag result is dead, turn this into an SUB.
3417   if (!N->hasAnyUseOfValue(1))
3418     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3419                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3420
3421   // fold (subc x, x) -> 0 + no borrow
3422   if (N0 == N1)
3423     return CombineTo(N, DAG.getConstant(0, DL, VT),
3424                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3425
3426   // fold (subc x, 0) -> x + no borrow
3427   if (isNullConstant(N1))
3428     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3429
3430   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3431   if (isAllOnesConstant(N0))
3432     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3433                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3434
3435   return SDValue();
3436 }
3437
3438 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3439   SDValue N0 = N->getOperand(0);
3440   SDValue N1 = N->getOperand(1);
3441   EVT VT = N0.getValueType();
3442   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3443
3444   EVT CarryVT = N->getValueType(1);
3445   SDLoc DL(N);
3446
3447   // If the flag result is dead, turn this into an SUB.
3448   if (!N->hasAnyUseOfValue(1))
3449     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3450                      DAG.getUNDEF(CarryVT));
3451
3452   // fold (subo x, x) -> 0 + no borrow
3453   if (N0 == N1)
3454     return CombineTo(N, DAG.getConstant(0, DL, VT),
3455                      DAG.getConstant(0, DL, CarryVT));
3456
3457   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3458
3459   // fold (subox, c) -> (addo x, -c)
3460   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3461     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3462                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3463   }
3464
3465   // fold (subo x, 0) -> x + no borrow
3466   if (isNullOrNullSplat(N1))
3467     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3468
3469   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3470   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3471     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3472                      DAG.getConstant(0, DL, CarryVT));
3473
3474   return SDValue();
3475 }
3476
3477 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3478   SDValue N0 = N->getOperand(0);
3479   SDValue N1 = N->getOperand(1);
3480   SDValue CarryIn = N->getOperand(2);
3481
3482   // fold (sube x, y, false) -> (subc x, y)
3483   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3484     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3485
3486   return SDValue();
3487 }
3488
3489 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3490   SDValue N0 = N->getOperand(0);
3491   SDValue N1 = N->getOperand(1);
3492   SDValue CarryIn = N->getOperand(2);
3493
3494   // fold (subcarry x, y, false) -> (usubo x, y)
3495   if (isNullConstant(CarryIn)) {
3496     if (!LegalOperations ||
3497         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3498       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3499   }
3500
3501   return SDValue();
3502 }
3503
3504 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3505 // UMULFIXSAT here.
3506 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3507   SDValue N0 = N->getOperand(0);
3508   SDValue N1 = N->getOperand(1);
3509   SDValue Scale = N->getOperand(2);
3510   EVT VT = N0.getValueType();
3511
3512   // fold (mulfix x, undef, scale) -> 0
3513   if (N0.isUndef() || N1.isUndef())
3514     return DAG.getConstant(0, SDLoc(N), VT);
3515
3516   // Canonicalize constant to RHS (vector doesn't have to splat)
3517   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3518      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3519     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3520
3521   // fold (mulfix x, 0, scale) -> 0
3522   if (isNullConstant(N1))
3523     return DAG.getConstant(0, SDLoc(N), VT);
3524
3525   return SDValue();
3526 }
3527
3528 SDValue DAGCombiner::visitMUL(SDNode *N) {
3529   SDValue N0 = N->getOperand(0);
3530   SDValue N1 = N->getOperand(1);
3531   EVT VT = N0.getValueType();
3532
3533   // fold (mul x, undef) -> 0
3534   if (N0.isUndef() || N1.isUndef())
3535     return DAG.getConstant(0, SDLoc(N), VT);
3536
3537   bool N1IsConst = false;
3538   bool N1IsOpaqueConst = false;
3539   APInt ConstValue1;
3540
3541   // fold vector ops
3542   if (VT.isVector()) {
3543     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3544       return FoldedVOp;
3545
3546     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3547     assert((!N1IsConst ||
3548             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3549            "Splat APInt should be element width");
3550   } else {
3551     N1IsConst = isa<ConstantSDNode>(N1);
3552     if (N1IsConst) {
3553       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3554       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3555     }
3556   }
3557
3558   // fold (mul c1, c2) -> c1*c2
3559   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3560     return C;
3561
3562   // canonicalize constant to RHS (vector doesn't have to splat)
3563   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3564      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3565     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3566
3567   // fold (mul x, 0) -> 0
3568   if (N1IsConst && ConstValue1.isNullValue())
3569     return N1;
3570
3571   // fold (mul x, 1) -> x
3572   if (N1IsConst && ConstValue1.isOneValue())
3573     return N0;
3574
3575   if (SDValue NewSel = foldBinOpIntoSelect(N))
3576     return NewSel;
3577
3578   // fold (mul x, -1) -> 0-x
3579   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3580     SDLoc DL(N);
3581     return DAG.getNode(ISD::SUB, DL, VT,
3582                        DAG.getConstant(0, DL, VT), N0);
3583   }
3584
3585   // fold (mul x, (1 << c)) -> x << c
3586   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3587       DAG.isKnownToBeAPowerOfTwo(N1) &&
3588       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3589     SDLoc DL(N);
3590     SDValue LogBase2 = BuildLogBase2(N1, DL);
3591     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3592     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3593     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3594   }
3595
3596   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3597   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3598     unsigned Log2Val = (-ConstValue1).logBase2();
3599     SDLoc DL(N);
3600     // FIXME: If the input is something that is easily negated (e.g. a
3601     // single-use add), we should put the negate there.
3602     return DAG.getNode(ISD::SUB, DL, VT,
3603                        DAG.getConstant(0, DL, VT),
3604                        DAG.getNode(ISD::SHL, DL, VT, N0,
3605                             DAG.getConstant(Log2Val, DL,
3606                                       getShiftAmountTy(N0.getValueType()))));
3607   }
3608
3609   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3610   // mul x, (2^N + 1) --> add (shl x, N), x
3611   // mul x, (2^N - 1) --> sub (shl x, N), x
3612   // Examples: x * 33 --> (x << 5) + x
3613   //           x * 15 --> (x << 4) - x
3614   //           x * -33 --> -((x << 5) + x)
3615   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3616   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3617     // TODO: We could handle more general decomposition of any constant by
3618     //       having the target set a limit on number of ops and making a
3619     //       callback to determine that sequence (similar to sqrt expansion).
3620     unsigned MathOp = ISD::DELETED_NODE;
3621     APInt MulC = ConstValue1.abs();
3622     if ((MulC - 1).isPowerOf2())
3623       MathOp = ISD::ADD;
3624     else if ((MulC + 1).isPowerOf2())
3625       MathOp = ISD::SUB;
3626
3627     if (MathOp != ISD::DELETED_NODE) {
3628       unsigned ShAmt =
3629           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3630       assert(ShAmt < VT.getScalarSizeInBits() &&
3631              "multiply-by-constant generated out of bounds shift");
3632       SDLoc DL(N);
3633       SDValue Shl =
3634           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3635       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3636       if (ConstValue1.isNegative())
3637         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3638       return R;
3639     }
3640   }
3641
3642   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3643   if (N0.getOpcode() == ISD::SHL &&
3644       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3645       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3646     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3647     if (isConstantOrConstantVector(C3))
3648       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3649   }
3650
3651   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3652   // use.
3653   {
3654     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3655
3656     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3657     if (N0.getOpcode() == ISD::SHL &&
3658         isConstantOrConstantVector(N0.getOperand(1)) &&
3659         N0.getNode()->hasOneUse()) {
3660       Sh = N0; Y = N1;
3661     } else if (N1.getOpcode() == ISD::SHL &&
3662                isConstantOrConstantVector(N1.getOperand(1)) &&
3663                N1.getNode()->hasOneUse()) {
3664       Sh = N1; Y = N0;
3665     }
3666
3667     if (Sh.getNode()) {
3668       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3669       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3670     }
3671   }
3672
3673   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3674   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3675       N0.getOpcode() == ISD::ADD &&
3676       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3677       isMulAddWithConstProfitable(N, N0, N1))
3678       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3679                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3680                                      N0.getOperand(0), N1),
3681                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3682                                      N0.getOperand(1), N1));
3683
3684   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3685   if (N0.getOpcode() == ISD::VSCALE)
3686     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3687       APInt C0 = N0.getConstantOperandAPInt(0);
3688       APInt C1 = NC1->getAPIntValue();
3689       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3690     }
3691
3692   // reassociate mul
3693   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3694     return RMUL;
3695
3696   return SDValue();
3697 }
3698
3699 /// Return true if divmod libcall is available.
3700 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3701                                      const TargetLowering &TLI) {
3702   RTLIB::Libcall LC;
3703   EVT NodeType = Node->getValueType(0);
3704   if (!NodeType.isSimple())
3705     return false;
3706   switch (NodeType.getSimpleVT().SimpleTy) {
3707   default: return false; // No libcall for vector types.
3708   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3709   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3710   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3711   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3712   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3713   }
3714
3715   return TLI.getLibcallName(LC) != nullptr;
3716 }
3717
3718 /// Issue divrem if both quotient and remainder are needed.
3719 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3720   if (Node->use_empty())
3721     return SDValue(); // This is a dead node, leave it alone.
3722
3723   unsigned Opcode = Node->getOpcode();
3724   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3725   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3726
3727   // DivMod lib calls can still work on non-legal types if using lib-calls.
3728   EVT VT = Node->getValueType(0);
3729   if (VT.isVector() || !VT.isInteger())
3730     return SDValue();
3731
3732   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3733     return SDValue();
3734
3735   // If DIVREM is going to get expanded into a libcall,
3736   // but there is no libcall available, then don't combine.
3737   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3738       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3739     return SDValue();
3740
3741   // If div is legal, it's better to do the normal expansion
3742   unsigned OtherOpcode = 0;
3743   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3744     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3745     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3746       return SDValue();
3747   } else {
3748     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3749     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3750       return SDValue();
3751   }
3752
3753   SDValue Op0 = Node->getOperand(0);
3754   SDValue Op1 = Node->getOperand(1);
3755   SDValue combined;
3756   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3757          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3758     SDNode *User = *UI;
3759     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3760         User->use_empty())
3761       continue;
3762     // Convert the other matching node(s), too;
3763     // otherwise, the DIVREM may get target-legalized into something
3764     // target-specific that we won't be able to recognize.
3765     unsigned UserOpc = User->getOpcode();
3766     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3767         User->getOperand(0) == Op0 &&
3768         User->getOperand(1) == Op1) {
3769       if (!combined) {
3770         if (UserOpc == OtherOpcode) {
3771           SDVTList VTs = DAG.getVTList(VT, VT);
3772           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3773         } else if (UserOpc == DivRemOpc) {
3774           combined = SDValue(User, 0);
3775         } else {
3776           assert(UserOpc == Opcode);
3777           continue;
3778         }
3779       }
3780       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3781         CombineTo(User, combined);
3782       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3783         CombineTo(User, combined.getValue(1));
3784     }
3785   }
3786   return combined;
3787 }
3788
3789 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3790   SDValue N0 = N->getOperand(0);
3791   SDValue N1 = N->getOperand(1);
3792   EVT VT = N->getValueType(0);
3793   SDLoc DL(N);
3794
3795   unsigned Opc = N->getOpcode();
3796   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3797   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3798
3799   // X / undef -> undef
3800   // X % undef -> undef
3801   // X / 0 -> undef
3802   // X % 0 -> undef
3803   // NOTE: This includes vectors where any divisor element is zero/undef.
3804   if (DAG.isUndef(Opc, {N0, N1}))
3805     return DAG.getUNDEF(VT);
3806
3807   // undef / X -> 0
3808   // undef % X -> 0
3809   if (N0.isUndef())
3810     return DAG.getConstant(0, DL, VT);
3811
3812   // 0 / X -> 0
3813   // 0 % X -> 0
3814   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3815   if (N0C && N0C->isNullValue())
3816     return N0;
3817
3818   // X / X -> 1
3819   // X % X -> 0
3820   if (N0 == N1)
3821     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3822
3823   // X / 1 -> X
3824   // X % 1 -> 0
3825   // If this is a boolean op (single-bit element type), we can't have
3826   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3827   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3828   // it's a 1.
3829   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3830     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3831
3832   return SDValue();
3833 }
3834
3835 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3836   SDValue N0 = N->getOperand(0);
3837   SDValue N1 = N->getOperand(1);
3838   EVT VT = N->getValueType(0);
3839   EVT CCVT = getSetCCResultType(VT);
3840
3841   // fold vector ops
3842   if (VT.isVector())
3843     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3844       return FoldedVOp;
3845
3846   SDLoc DL(N);
3847
3848   // fold (sdiv c1, c2) -> c1/c2
3849   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3850   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
3851     return C;
3852
3853   // fold (sdiv X, -1) -> 0-X
3854   if (N1C && N1C->isAllOnesValue())
3855     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3856
3857   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3858   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3859     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3860                          DAG.getConstant(1, DL, VT),
3861                          DAG.getConstant(0, DL, VT));
3862
3863   if (SDValue V = simplifyDivRem(N, DAG))
3864     return V;
3865
3866   if (SDValue NewSel = foldBinOpIntoSelect(N))
3867     return NewSel;
3868
3869   // If we know the sign bits of both operands are zero, strength reduce to a
3870   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3871   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3872     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3873
3874   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3875     // If the corresponding remainder node exists, update its users with
3876     // (Dividend - (Quotient * Divisor).
3877     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3878                                               { N0, N1 })) {
3879       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3880       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3881       AddToWorklist(Mul.getNode());
3882       AddToWorklist(Sub.getNode());
3883       CombineTo(RemNode, Sub);
3884     }
3885     return V;
3886   }
3887
3888   // sdiv, srem -> sdivrem
3889   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3890   // true.  Otherwise, we break the simplification logic in visitREM().
3891   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3892   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3893     if (SDValue DivRem = useDivRem(N))
3894         return DivRem;
3895
3896   return SDValue();
3897 }
3898
3899 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3900   SDLoc DL(N);
3901   EVT VT = N->getValueType(0);
3902   EVT CCVT = getSetCCResultType(VT);
3903   unsigned BitWidth = VT.getScalarSizeInBits();
3904
3905   // Helper for determining whether a value is a power-2 constant scalar or a
3906   // vector of such elements.
3907   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3908     if (C->isNullValue() || C->isOpaque())
3909       return false;
3910     if (C->getAPIntValue().isPowerOf2())
3911       return true;
3912     if ((-C->getAPIntValue()).isPowerOf2())
3913       return true;
3914     return false;
3915   };
3916
3917   // fold (sdiv X, pow2) -> simple ops after legalize
3918   // FIXME: We check for the exact bit here because the generic lowering gives
3919   // better results in that case. The target-specific lowering should learn how
3920   // to handle exact sdivs efficiently.
3921   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3922     // Target-specific implementation of sdiv x, pow2.
3923     if (SDValue Res = BuildSDIVPow2(N))
3924       return Res;
3925
3926     // Create constants that are functions of the shift amount value.
3927     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3928     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3929     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3930     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3931     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3932     if (!isConstantOrConstantVector(Inexact))
3933       return SDValue();
3934
3935     // Splat the sign bit into the register
3936     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3937                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3938     AddToWorklist(Sign.getNode());
3939
3940     // Add (N0 < 0) ? abs2 - 1 : 0;
3941     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3942     AddToWorklist(Srl.getNode());
3943     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3944     AddToWorklist(Add.getNode());
3945     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3946     AddToWorklist(Sra.getNode());
3947
3948     // Special case: (sdiv X, 1) -> X
3949     // Special Case: (sdiv X, -1) -> 0-X
3950     SDValue One = DAG.getConstant(1, DL, VT);
3951     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3952     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3953     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3954     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3955     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3956
3957     // If dividing by a positive value, we're done. Otherwise, the result must
3958     // be negated.
3959     SDValue Zero = DAG.getConstant(0, DL, VT);
3960     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3961
3962     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3963     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3964     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3965     return Res;
3966   }
3967
3968   // If integer divide is expensive and we satisfy the requirements, emit an
3969   // alternate sequence.  Targets may check function attributes for size/speed
3970   // trade-offs.
3971   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3972   if (isConstantOrConstantVector(N1) &&
3973       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3974     if (SDValue Op = BuildSDIV(N))
3975       return Op;
3976
3977   return SDValue();
3978 }
3979
3980 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3981   SDValue N0 = N->getOperand(0);
3982   SDValue N1 = N->getOperand(1);
3983   EVT VT = N->getValueType(0);
3984   EVT CCVT = getSetCCResultType(VT);
3985
3986   // fold vector ops
3987   if (VT.isVector())
3988     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3989       return FoldedVOp;
3990
3991   SDLoc DL(N);
3992
3993   // fold (udiv c1, c2) -> c1/c2
3994   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3995   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
3996     return C;
3997
3998   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3999   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4000     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4001                          DAG.getConstant(1, DL, VT),
4002                          DAG.getConstant(0, DL, VT));
4003
4004   if (SDValue V = simplifyDivRem(N, DAG))
4005     return V;
4006
4007   if (SDValue NewSel = foldBinOpIntoSelect(N))
4008     return NewSel;
4009
4010   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4011     // If the corresponding remainder node exists, update its users with
4012     // (Dividend - (Quotient * Divisor).
4013     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4014                                               { N0, N1 })) {
4015       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4016       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4017       AddToWorklist(Mul.getNode());
4018       AddToWorklist(Sub.getNode());
4019       CombineTo(RemNode, Sub);
4020     }
4021     return V;
4022   }
4023
4024   // sdiv, srem -> sdivrem
4025   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4026   // true.  Otherwise, we break the simplification logic in visitREM().
4027   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4028   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4029     if (SDValue DivRem = useDivRem(N))
4030         return DivRem;
4031
4032   return SDValue();
4033 }
4034
4035 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4036   SDLoc DL(N);
4037   EVT VT = N->getValueType(0);
4038
4039   // fold (udiv x, (1 << c)) -> x >>u c
4040   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4041       DAG.isKnownToBeAPowerOfTwo(N1)) {
4042     SDValue LogBase2 = BuildLogBase2(N1, DL);
4043     AddToWorklist(LogBase2.getNode());
4044
4045     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4046     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4047     AddToWorklist(Trunc.getNode());
4048     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4049   }
4050
4051   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4052   if (N1.getOpcode() == ISD::SHL) {
4053     SDValue N10 = N1.getOperand(0);
4054     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4055         DAG.isKnownToBeAPowerOfTwo(N10)) {
4056       SDValue LogBase2 = BuildLogBase2(N10, DL);
4057       AddToWorklist(LogBase2.getNode());
4058
4059       EVT ADDVT = N1.getOperand(1).getValueType();
4060       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4061       AddToWorklist(Trunc.getNode());
4062       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4063       AddToWorklist(Add.getNode());
4064       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4065     }
4066   }
4067
4068   // fold (udiv x, c) -> alternate
4069   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4070   if (isConstantOrConstantVector(N1) &&
4071       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4072     if (SDValue Op = BuildUDIV(N))
4073       return Op;
4074
4075   return SDValue();
4076 }
4077
4078 // handles ISD::SREM and ISD::UREM
4079 SDValue DAGCombiner::visitREM(SDNode *N) {
4080   unsigned Opcode = N->getOpcode();
4081   SDValue N0 = N->getOperand(0);
4082   SDValue N1 = N->getOperand(1);
4083   EVT VT = N->getValueType(0);
4084   EVT CCVT = getSetCCResultType(VT);
4085
4086   bool isSigned = (Opcode == ISD::SREM);
4087   SDLoc DL(N);
4088
4089   // fold (rem c1, c2) -> c1%c2
4090   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4091   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4092     return C;
4093
4094   // fold (urem X, -1) -> select(X == -1, 0, x)
4095   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4096     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4097                          DAG.getConstant(0, DL, VT), N0);
4098
4099   if (SDValue V = simplifyDivRem(N, DAG))
4100     return V;
4101
4102   if (SDValue NewSel = foldBinOpIntoSelect(N))
4103     return NewSel;
4104
4105   if (isSigned) {
4106     // If we know the sign bits of both operands are zero, strength reduce to a
4107     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4108     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4109       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4110   } else {
4111     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4112     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4113       // fold (urem x, pow2) -> (and x, pow2-1)
4114       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4115       AddToWorklist(Add.getNode());
4116       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4117     }
4118     if (N1.getOpcode() == ISD::SHL &&
4119         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4120       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4121       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4122       AddToWorklist(Add.getNode());
4123       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4124     }
4125   }
4126
4127   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4128
4129   // If X/C can be simplified by the division-by-constant logic, lower
4130   // X%C to the equivalent of X-X/C*C.
4131   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4132   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4133   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4134   // combine will not return a DIVREM.  Regardless, checking cheapness here
4135   // makes sense since the simplification results in fatter code.
4136   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4137     SDValue OptimizedDiv =
4138         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4139     if (OptimizedDiv.getNode()) {
4140       // If the equivalent Div node also exists, update its users.
4141       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4142       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4143                                                 { N0, N1 }))
4144         CombineTo(DivNode, OptimizedDiv);
4145       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4146       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4147       AddToWorklist(OptimizedDiv.getNode());
4148       AddToWorklist(Mul.getNode());
4149       return Sub;
4150     }
4151   }
4152
4153   // sdiv, srem -> sdivrem
4154   if (SDValue DivRem = useDivRem(N))
4155     return DivRem.getValue(1);
4156
4157   return SDValue();
4158 }
4159
4160 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4161   SDValue N0 = N->getOperand(0);
4162   SDValue N1 = N->getOperand(1);
4163   EVT VT = N->getValueType(0);
4164   SDLoc DL(N);
4165
4166   if (VT.isVector()) {
4167     // fold (mulhs x, 0) -> 0
4168     // do not return N0/N1, because undef node may exist.
4169     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4170         ISD::isBuildVectorAllZeros(N1.getNode()))
4171       return DAG.getConstant(0, DL, VT);
4172   }
4173
4174   // fold (mulhs x, 0) -> 0
4175   if (isNullConstant(N1))
4176     return N1;
4177   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4178   if (isOneConstant(N1))
4179     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4180                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4181                                        getShiftAmountTy(N0.getValueType())));
4182
4183   // fold (mulhs x, undef) -> 0
4184   if (N0.isUndef() || N1.isUndef())
4185     return DAG.getConstant(0, DL, VT);
4186
4187   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4188   // plus a shift.
4189   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4190     MVT Simple = VT.getSimpleVT();
4191     unsigned SimpleSize = Simple.getSizeInBits();
4192     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4193     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4194       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4195       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4196       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4197       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4198             DAG.getConstant(SimpleSize, DL,
4199                             getShiftAmountTy(N1.getValueType())));
4200       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4201     }
4202   }
4203
4204   return SDValue();
4205 }
4206
4207 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4208   SDValue N0 = N->getOperand(0);
4209   SDValue N1 = N->getOperand(1);
4210   EVT VT = N->getValueType(0);
4211   SDLoc DL(N);
4212
4213   if (VT.isVector()) {
4214     // fold (mulhu x, 0) -> 0
4215     // do not return N0/N1, because undef node may exist.
4216     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4217         ISD::isBuildVectorAllZeros(N1.getNode()))
4218       return DAG.getConstant(0, DL, VT);
4219   }
4220
4221   // fold (mulhu x, 0) -> 0
4222   if (isNullConstant(N1))
4223     return N1;
4224   // fold (mulhu x, 1) -> 0
4225   if (isOneConstant(N1))
4226     return DAG.getConstant(0, DL, N0.getValueType());
4227   // fold (mulhu x, undef) -> 0
4228   if (N0.isUndef() || N1.isUndef())
4229     return DAG.getConstant(0, DL, VT);
4230
4231   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4232   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4233       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4234     unsigned NumEltBits = VT.getScalarSizeInBits();
4235     SDValue LogBase2 = BuildLogBase2(N1, DL);
4236     SDValue SRLAmt = DAG.getNode(
4237         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4238     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4239     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4240     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4241   }
4242
4243   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4244   // plus a shift.
4245   if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4246     MVT Simple = VT.getSimpleVT();
4247     unsigned SimpleSize = Simple.getSizeInBits();
4248     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4249     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4250       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4251       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4252       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4253       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4254             DAG.getConstant(SimpleSize, DL,
4255                             getShiftAmountTy(N1.getValueType())));
4256       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4257     }
4258   }
4259
4260   return SDValue();
4261 }
4262
4263 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4264 /// give the opcodes for the two computations that are being performed. Return
4265 /// true if a simplification was made.
4266 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4267                                                 unsigned HiOp) {
4268   // If the high half is not needed, just compute the low half.
4269   bool HiExists = N->hasAnyUseOfValue(1);
4270   if (!HiExists && (!LegalOperations ||
4271                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4272     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4273     return CombineTo(N, Res, Res);
4274   }
4275
4276   // If the low half is not needed, just compute the high half.
4277   bool LoExists = N->hasAnyUseOfValue(0);
4278   if (!LoExists && (!LegalOperations ||
4279                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4280     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4281     return CombineTo(N, Res, Res);
4282   }
4283
4284   // If both halves are used, return as it is.
4285   if (LoExists && HiExists)
4286     return SDValue();
4287
4288   // If the two computed results can be simplified separately, separate them.
4289   if (LoExists) {
4290     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4291     AddToWorklist(Lo.getNode());
4292     SDValue LoOpt = combine(Lo.getNode());
4293     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4294         (!LegalOperations ||
4295          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4296       return CombineTo(N, LoOpt, LoOpt);
4297   }
4298
4299   if (HiExists) {
4300     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4301     AddToWorklist(Hi.getNode());
4302     SDValue HiOpt = combine(Hi.getNode());
4303     if (HiOpt.getNode() && HiOpt != Hi &&
4304         (!LegalOperations ||
4305          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4306       return CombineTo(N, HiOpt, HiOpt);
4307   }
4308
4309   return SDValue();
4310 }
4311
4312 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4313   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4314     return Res;
4315
4316   EVT VT = N->getValueType(0);
4317   SDLoc DL(N);
4318
4319   // If the type is twice as wide is legal, transform the mulhu to a wider
4320   // multiply plus a shift.
4321   if (VT.isSimple() && !VT.isVector()) {
4322     MVT Simple = VT.getSimpleVT();
4323     unsigned SimpleSize = Simple.getSizeInBits();
4324     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4325     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4326       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4327       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4328       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4329       // Compute the high part as N1.
4330       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4331             DAG.getConstant(SimpleSize, DL,
4332                             getShiftAmountTy(Lo.getValueType())));
4333       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4334       // Compute the low part as N0.
4335       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4336       return CombineTo(N, Lo, Hi);
4337     }
4338   }
4339
4340   return SDValue();
4341 }
4342
4343 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4344   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4345     return Res;
4346
4347   EVT VT = N->getValueType(0);
4348   SDLoc DL(N);
4349
4350   // (umul_lohi N0, 0) -> (0, 0)
4351   if (isNullConstant(N->getOperand(1))) {
4352     SDValue Zero = DAG.getConstant(0, DL, VT);
4353     return CombineTo(N, Zero, Zero);
4354   }
4355
4356   // (umul_lohi N0, 1) -> (N0, 0)
4357   if (isOneConstant(N->getOperand(1))) {
4358     SDValue Zero = DAG.getConstant(0, DL, VT);
4359     return CombineTo(N, N->getOperand(0), Zero);
4360   }
4361
4362   // If the type is twice as wide is legal, transform the mulhu to a wider
4363   // multiply plus a shift.
4364   if (VT.isSimple() && !VT.isVector()) {
4365     MVT Simple = VT.getSimpleVT();
4366     unsigned SimpleSize = Simple.getSizeInBits();
4367     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4368     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4369       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4370       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4371       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4372       // Compute the high part as N1.
4373       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4374             DAG.getConstant(SimpleSize, DL,
4375                             getShiftAmountTy(Lo.getValueType())));
4376       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4377       // Compute the low part as N0.
4378       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4379       return CombineTo(N, Lo, Hi);
4380     }
4381   }
4382
4383   return SDValue();
4384 }
4385
4386 SDValue DAGCombiner::visitMULO(SDNode *N) {
4387   SDValue N0 = N->getOperand(0);
4388   SDValue N1 = N->getOperand(1);
4389   EVT VT = N0.getValueType();
4390   bool IsSigned = (ISD::SMULO == N->getOpcode());
4391
4392   EVT CarryVT = N->getValueType(1);
4393   SDLoc DL(N);
4394
4395   // canonicalize constant to RHS.
4396   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4397       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4398     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4399
4400   // fold (mulo x, 0) -> 0 + no carry out
4401   if (isNullOrNullSplat(N1))
4402     return CombineTo(N, DAG.getConstant(0, DL, VT),
4403                      DAG.getConstant(0, DL, CarryVT));
4404
4405   // (mulo x, 2) -> (addo x, x)
4406   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4407     if (C2->getAPIntValue() == 2)
4408       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4409                          N->getVTList(), N0, N0);
4410
4411   return SDValue();
4412 }
4413
4414 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4415   SDValue N0 = N->getOperand(0);
4416   SDValue N1 = N->getOperand(1);
4417   EVT VT = N0.getValueType();
4418   unsigned Opcode = N->getOpcode();
4419
4420   // fold vector ops
4421   if (VT.isVector())
4422     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4423       return FoldedVOp;
4424
4425   // fold operation with constant operands.
4426   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4427     return C;
4428
4429   // canonicalize constant to RHS
4430   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4431       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4432     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4433
4434   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4435   // Only do this if the current op isn't legal and the flipped is.
4436   if (!TLI.isOperationLegal(Opcode, VT) &&
4437       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4438       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4439     unsigned AltOpcode;
4440     switch (Opcode) {
4441     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4442     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4443     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4444     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4445     default: llvm_unreachable("Unknown MINMAX opcode");
4446     }
4447     if (TLI.isOperationLegal(AltOpcode, VT))
4448       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4449   }
4450
4451   return SDValue();
4452 }
4453
4454 /// If this is a bitwise logic instruction and both operands have the same
4455 /// opcode, try to sink the other opcode after the logic instruction.
4456 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4457   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4458   EVT VT = N0.getValueType();
4459   unsigned LogicOpcode = N->getOpcode();
4460   unsigned HandOpcode = N0.getOpcode();
4461   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4462           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4463   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4464
4465   // Bail early if none of these transforms apply.
4466   if (N0.getNumOperands() == 0)
4467     return SDValue();
4468
4469   // FIXME: We should check number of uses of the operands to not increase
4470   //        the instruction count for all transforms.
4471
4472   // Handle size-changing casts.
4473   SDValue X = N0.getOperand(0);
4474   SDValue Y = N1.getOperand(0);
4475   EVT XVT = X.getValueType();
4476   SDLoc DL(N);
4477   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4478       HandOpcode == ISD::SIGN_EXTEND) {
4479     // If both operands have other uses, this transform would create extra
4480     // instructions without eliminating anything.
4481     if (!N0.hasOneUse() && !N1.hasOneUse())
4482       return SDValue();
4483     // We need matching integer source types.
4484     if (XVT != Y.getValueType())
4485       return SDValue();
4486     // Don't create an illegal op during or after legalization. Don't ever
4487     // create an unsupported vector op.
4488     if ((VT.isVector() || LegalOperations) &&
4489         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4490       return SDValue();
4491     // Avoid infinite looping with PromoteIntBinOp.
4492     // TODO: Should we apply desirable/legal constraints to all opcodes?
4493     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4494         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4495       return SDValue();
4496     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4497     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4498     return DAG.getNode(HandOpcode, DL, VT, Logic);
4499   }
4500
4501   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4502   if (HandOpcode == ISD::TRUNCATE) {
4503     // If both operands have other uses, this transform would create extra
4504     // instructions without eliminating anything.
4505     if (!N0.hasOneUse() && !N1.hasOneUse())
4506       return SDValue();
4507     // We need matching source types.
4508     if (XVT != Y.getValueType())
4509       return SDValue();
4510     // Don't create an illegal op during or after legalization.
4511     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4512       return SDValue();
4513     // Be extra careful sinking truncate. If it's free, there's no benefit in
4514     // widening a binop. Also, don't create a logic op on an illegal type.
4515     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4516       return SDValue();
4517     if (!TLI.isTypeLegal(XVT))
4518       return SDValue();
4519     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4520     return DAG.getNode(HandOpcode, DL, VT, Logic);
4521   }
4522
4523   // For binops SHL/SRL/SRA/AND:
4524   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4525   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4526        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4527       N0.getOperand(1) == N1.getOperand(1)) {
4528     // If either operand has other uses, this transform is not an improvement.
4529     if (!N0.hasOneUse() || !N1.hasOneUse())
4530       return SDValue();
4531     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4532     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4533   }
4534
4535   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4536   if (HandOpcode == ISD::BSWAP) {
4537     // If either operand has other uses, this transform is not an improvement.
4538     if (!N0.hasOneUse() || !N1.hasOneUse())
4539       return SDValue();
4540     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4541     return DAG.getNode(HandOpcode, DL, VT, Logic);
4542   }
4543
4544   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4545   // Only perform this optimization up until type legalization, before
4546   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4547   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4548   // we don't want to undo this promotion.
4549   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4550   // on scalars.
4551   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4552        Level <= AfterLegalizeTypes) {
4553     // Input types must be integer and the same.
4554     if (XVT.isInteger() && XVT == Y.getValueType() &&
4555         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4556           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4557       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4558       return DAG.getNode(HandOpcode, DL, VT, Logic);
4559     }
4560   }
4561
4562   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4563   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4564   // If both shuffles use the same mask, and both shuffle within a single
4565   // vector, then it is worthwhile to move the swizzle after the operation.
4566   // The type-legalizer generates this pattern when loading illegal
4567   // vector types from memory. In many cases this allows additional shuffle
4568   // optimizations.
4569   // There are other cases where moving the shuffle after the xor/and/or
4570   // is profitable even if shuffles don't perform a swizzle.
4571   // If both shuffles use the same mask, and both shuffles have the same first
4572   // or second operand, then it might still be profitable to move the shuffle
4573   // after the xor/and/or operation.
4574   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4575     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4576     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4577     assert(X.getValueType() == Y.getValueType() &&
4578            "Inputs to shuffles are not the same type");
4579
4580     // Check that both shuffles use the same mask. The masks are known to be of
4581     // the same length because the result vector type is the same.
4582     // Check also that shuffles have only one use to avoid introducing extra
4583     // instructions.
4584     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4585         !SVN0->getMask().equals(SVN1->getMask()))
4586       return SDValue();
4587
4588     // Don't try to fold this node if it requires introducing a
4589     // build vector of all zeros that might be illegal at this stage.
4590     SDValue ShOp = N0.getOperand(1);
4591     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4592       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4593
4594     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4595     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4596       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4597                                   N0.getOperand(0), N1.getOperand(0));
4598       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4599     }
4600
4601     // Don't try to fold this node if it requires introducing a
4602     // build vector of all zeros that might be illegal at this stage.
4603     ShOp = N0.getOperand(0);
4604     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4605       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4606
4607     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4608     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4609       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4610                                   N1.getOperand(1));
4611       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4612     }
4613   }
4614
4615   return SDValue();
4616 }
4617
4618 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4619 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4620                                        const SDLoc &DL) {
4621   SDValue LL, LR, RL, RR, N0CC, N1CC;
4622   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4623       !isSetCCEquivalent(N1, RL, RR, N1CC))
4624     return SDValue();
4625
4626   assert(N0.getValueType() == N1.getValueType() &&
4627          "Unexpected operand types for bitwise logic op");
4628   assert(LL.getValueType() == LR.getValueType() &&
4629          RL.getValueType() == RR.getValueType() &&
4630          "Unexpected operand types for setcc");
4631
4632   // If we're here post-legalization or the logic op type is not i1, the logic
4633   // op type must match a setcc result type. Also, all folds require new
4634   // operations on the left and right operands, so those types must match.
4635   EVT VT = N0.getValueType();
4636   EVT OpVT = LL.getValueType();
4637   if (LegalOperations || VT.getScalarType() != MVT::i1)
4638     if (VT != getSetCCResultType(OpVT))
4639       return SDValue();
4640   if (OpVT != RL.getValueType())
4641     return SDValue();
4642
4643   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4644   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4645   bool IsInteger = OpVT.isInteger();
4646   if (LR == RR && CC0 == CC1 && IsInteger) {
4647     bool IsZero = isNullOrNullSplat(LR);
4648     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4649
4650     // All bits clear?
4651     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4652     // All sign bits clear?
4653     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4654     // Any bits set?
4655     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4656     // Any sign bits set?
4657     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4658
4659     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4660     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4661     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4662     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4663     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4664       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4665       AddToWorklist(Or.getNode());
4666       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4667     }
4668
4669     // All bits set?
4670     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4671     // All sign bits set?
4672     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4673     // Any bits clear?
4674     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4675     // Any sign bits clear?
4676     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4677
4678     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4679     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4680     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4681     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4682     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4683       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4684       AddToWorklist(And.getNode());
4685       return DAG.getSetCC(DL, VT, And, LR, CC1);
4686     }
4687   }
4688
4689   // TODO: What is the 'or' equivalent of this fold?
4690   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4691   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4692       IsInteger && CC0 == ISD::SETNE &&
4693       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4694        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4695     SDValue One = DAG.getConstant(1, DL, OpVT);
4696     SDValue Two = DAG.getConstant(2, DL, OpVT);
4697     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4698     AddToWorklist(Add.getNode());
4699     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4700   }
4701
4702   // Try more general transforms if the predicates match and the only user of
4703   // the compares is the 'and' or 'or'.
4704   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4705       N0.hasOneUse() && N1.hasOneUse()) {
4706     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4707     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4708     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4709       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4710       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4711       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4712       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4713       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4714     }
4715
4716     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4717     // TODO - support non-uniform vector amounts.
4718     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4719       // Match a shared variable operand and 2 non-opaque constant operands.
4720       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4721       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4722       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4723         // Canonicalize larger constant as C0.
4724         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4725           std::swap(C0, C1);
4726
4727         // The difference of the constants must be a single bit.
4728         const APInt &C0Val = C0->getAPIntValue();
4729         const APInt &C1Val = C1->getAPIntValue();
4730         if ((C0Val - C1Val).isPowerOf2()) {
4731           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4732           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4733           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4734           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4735           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4736           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4737           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4738           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4739         }
4740       }
4741     }
4742   }
4743
4744   // Canonicalize equivalent operands to LL == RL.
4745   if (LL == RR && LR == RL) {
4746     CC1 = ISD::getSetCCSwappedOperands(CC1);
4747     std::swap(RL, RR);
4748   }
4749
4750   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4751   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4752   if (LL == RL && LR == RR) {
4753     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4754                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4755     if (NewCC != ISD::SETCC_INVALID &&
4756         (!LegalOperations ||
4757          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4758           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4759       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4760   }
4761
4762   return SDValue();
4763 }
4764
4765 /// This contains all DAGCombine rules which reduce two values combined by
4766 /// an And operation to a single value. This makes them reusable in the context
4767 /// of visitSELECT(). Rules involving constants are not included as
4768 /// visitSELECT() already handles those cases.
4769 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4770   EVT VT = N1.getValueType();
4771   SDLoc DL(N);
4772
4773   // fold (and x, undef) -> 0
4774   if (N0.isUndef() || N1.isUndef())
4775     return DAG.getConstant(0, DL, VT);
4776
4777   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4778     return V;
4779
4780   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4781       VT.getSizeInBits() <= 64) {
4782     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4783       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4784         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4785         // immediate for an add, but it is legal if its top c2 bits are set,
4786         // transform the ADD so the immediate doesn't need to be materialized
4787         // in a register.
4788         APInt ADDC = ADDI->getAPIntValue();
4789         APInt SRLC = SRLI->getAPIntValue();
4790         if (ADDC.getMinSignedBits() <= 64 &&
4791             SRLC.ult(VT.getSizeInBits()) &&
4792             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4793           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4794                                              SRLC.getZExtValue());
4795           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4796             ADDC |= Mask;
4797             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4798               SDLoc DL0(N0);
4799               SDValue NewAdd =
4800                 DAG.getNode(ISD::ADD, DL0, VT,
4801                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4802               CombineTo(N0.getNode(), NewAdd);
4803               // Return N so it doesn't get rechecked!
4804               return SDValue(N, 0);
4805             }
4806           }
4807         }
4808       }
4809     }
4810   }
4811
4812   // Reduce bit extract of low half of an integer to the narrower type.
4813   // (and (srl i64:x, K), KMask) ->
4814   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4815   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4816     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4817       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4818         unsigned Size = VT.getSizeInBits();
4819         const APInt &AndMask = CAnd->getAPIntValue();
4820         unsigned ShiftBits = CShift->getZExtValue();
4821
4822         // Bail out, this node will probably disappear anyway.
4823         if (ShiftBits == 0)
4824           return SDValue();
4825
4826         unsigned MaskBits = AndMask.countTrailingOnes();
4827         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4828
4829         if (AndMask.isMask() &&
4830             // Required bits must not span the two halves of the integer and
4831             // must fit in the half size type.
4832             (ShiftBits + MaskBits <= Size / 2) &&
4833             TLI.isNarrowingProfitable(VT, HalfVT) &&
4834             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4835             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4836             TLI.isTruncateFree(VT, HalfVT) &&
4837             TLI.isZExtFree(HalfVT, VT)) {
4838           // The isNarrowingProfitable is to avoid regressions on PPC and
4839           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4840           // on downstream users of this. Those patterns could probably be
4841           // extended to handle extensions mixed in.
4842
4843           SDValue SL(N0);
4844           assert(MaskBits <= Size);
4845
4846           // Extracting the highest bit of the low half.
4847           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4848           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4849                                       N0.getOperand(0));
4850
4851           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4852           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4853           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4854           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4855           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4856         }
4857       }
4858     }
4859   }
4860
4861   return SDValue();
4862 }
4863
4864 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4865                                    EVT LoadResultTy, EVT &ExtVT) {
4866   if (!AndC->getAPIntValue().isMask())
4867     return false;
4868
4869   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4870
4871   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4872   EVT LoadedVT = LoadN->getMemoryVT();
4873
4874   if (ExtVT == LoadedVT &&
4875       (!LegalOperations ||
4876        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4877     // ZEXTLOAD will match without needing to change the size of the value being
4878     // loaded.
4879     return true;
4880   }
4881
4882   // Do not change the width of a volatile or atomic loads.
4883   if (!LoadN->isSimple())
4884     return false;
4885
4886   // Do not generate loads of non-round integer types since these can
4887   // be expensive (and would be wrong if the type is not byte sized).
4888   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4889     return false;
4890
4891   if (LegalOperations &&
4892       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4893     return false;
4894
4895   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4896     return false;
4897
4898   return true;
4899 }
4900
4901 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4902                                     ISD::LoadExtType ExtType, EVT &MemVT,
4903                                     unsigned ShAmt) {
4904   if (!LDST)
4905     return false;
4906   // Only allow byte offsets.
4907   if (ShAmt % 8)
4908     return false;
4909
4910   // Do not generate loads of non-round integer types since these can
4911   // be expensive (and would be wrong if the type is not byte sized).
4912   if (!MemVT.isRound())
4913     return false;
4914
4915   // Don't change the width of a volatile or atomic loads.
4916   if (!LDST->isSimple())
4917     return false;
4918
4919   // Verify that we are actually reducing a load width here.
4920   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4921     return false;
4922
4923   // Ensure that this isn't going to produce an unsupported memory access.
4924   if (ShAmt) {
4925     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
4926     const unsigned ByteShAmt = ShAmt / 8;
4927     const Align LDSTAlign = LDST->getAlign();
4928     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
4929     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4930                                 LDST->getAddressSpace(), NarrowAlign,
4931                                 LDST->getMemOperand()->getFlags()))
4932       return false;
4933   }
4934
4935   // It's not possible to generate a constant of extended or untyped type.
4936   EVT PtrType = LDST->getBasePtr().getValueType();
4937   if (PtrType == MVT::Untyped || PtrType.isExtended())
4938     return false;
4939
4940   if (isa<LoadSDNode>(LDST)) {
4941     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4942     // Don't transform one with multiple uses, this would require adding a new
4943     // load.
4944     if (!SDValue(Load, 0).hasOneUse())
4945       return false;
4946
4947     if (LegalOperations &&
4948         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4949       return false;
4950
4951     // For the transform to be legal, the load must produce only two values
4952     // (the value loaded and the chain).  Don't transform a pre-increment
4953     // load, for example, which produces an extra value.  Otherwise the
4954     // transformation is not equivalent, and the downstream logic to replace
4955     // uses gets things wrong.
4956     if (Load->getNumValues() > 2)
4957       return false;
4958
4959     // If the load that we're shrinking is an extload and we're not just
4960     // discarding the extension we can't simply shrink the load. Bail.
4961     // TODO: It would be possible to merge the extensions in some cases.
4962     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4963         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4964       return false;
4965
4966     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4967       return false;
4968   } else {
4969     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4970     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4971     // Can't write outside the original store
4972     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4973       return false;
4974
4975     if (LegalOperations &&
4976         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4977       return false;
4978   }
4979   return true;
4980 }
4981
4982 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4983                                     SmallVectorImpl<LoadSDNode*> &Loads,
4984                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4985                                     ConstantSDNode *Mask,
4986                                     SDNode *&NodeToMask) {
4987   // Recursively search for the operands, looking for loads which can be
4988   // narrowed.
4989   for (SDValue Op : N->op_values()) {
4990     if (Op.getValueType().isVector())
4991       return false;
4992
4993     // Some constants may need fixing up later if they are too large.
4994     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4995       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4996           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4997         NodesWithConsts.insert(N);
4998       continue;
4999     }
5000
5001     if (!Op.hasOneUse())
5002       return false;
5003
5004     switch(Op.getOpcode()) {
5005     case ISD::LOAD: {
5006       auto *Load = cast<LoadSDNode>(Op);
5007       EVT ExtVT;
5008       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5009           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5010
5011         // ZEXTLOAD is already small enough.
5012         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5013             ExtVT.bitsGE(Load->getMemoryVT()))
5014           continue;
5015
5016         // Use LE to convert equal sized loads to zext.
5017         if (ExtVT.bitsLE(Load->getMemoryVT()))
5018           Loads.push_back(Load);
5019
5020         continue;
5021       }
5022       return false;
5023     }
5024     case ISD::ZERO_EXTEND:
5025     case ISD::AssertZext: {
5026       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5027       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5028       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5029         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5030         Op.getOperand(0).getValueType();
5031
5032       // We can accept extending nodes if the mask is wider or an equal
5033       // width to the original type.
5034       if (ExtVT.bitsGE(VT))
5035         continue;
5036       break;
5037     }
5038     case ISD::OR:
5039     case ISD::XOR:
5040     case ISD::AND:
5041       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5042                              NodeToMask))
5043         return false;
5044       continue;
5045     }
5046
5047     // Allow one node which will masked along with any loads found.
5048     if (NodeToMask)
5049       return false;
5050
5051     // Also ensure that the node to be masked only produces one data result.
5052     NodeToMask = Op.getNode();
5053     if (NodeToMask->getNumValues() > 1) {
5054       bool HasValue = false;
5055       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5056         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5057         if (VT != MVT::Glue && VT != MVT::Other) {
5058           if (HasValue) {
5059             NodeToMask = nullptr;
5060             return false;
5061           }
5062           HasValue = true;
5063         }
5064       }
5065       assert(HasValue && "Node to be masked has no data result?");
5066     }
5067   }
5068   return true;
5069 }
5070
5071 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5072   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5073   if (!Mask)
5074     return false;
5075
5076   if (!Mask->getAPIntValue().isMask())
5077     return false;
5078
5079   // No need to do anything if the and directly uses a load.
5080   if (isa<LoadSDNode>(N->getOperand(0)))
5081     return false;
5082
5083   SmallVector<LoadSDNode*, 8> Loads;
5084   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5085   SDNode *FixupNode = nullptr;
5086   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5087     if (Loads.size() == 0)
5088       return false;
5089
5090     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5091     SDValue MaskOp = N->getOperand(1);
5092
5093     // If it exists, fixup the single node we allow in the tree that needs
5094     // masking.
5095     if (FixupNode) {
5096       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5097       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5098                                 FixupNode->getValueType(0),
5099                                 SDValue(FixupNode, 0), MaskOp);
5100       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5101       if (And.getOpcode() == ISD ::AND)
5102         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5103     }
5104
5105     // Narrow any constants that need it.
5106     for (auto *LogicN : NodesWithConsts) {
5107       SDValue Op0 = LogicN->getOperand(0);
5108       SDValue Op1 = LogicN->getOperand(1);
5109
5110       if (isa<ConstantSDNode>(Op0))
5111           std::swap(Op0, Op1);
5112
5113       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5114                                 Op1, MaskOp);
5115
5116       DAG.UpdateNodeOperands(LogicN, Op0, And);
5117     }
5118
5119     // Create narrow loads.
5120     for (auto *Load : Loads) {
5121       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5122       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5123                                 SDValue(Load, 0), MaskOp);
5124       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5125       if (And.getOpcode() == ISD ::AND)
5126         And = SDValue(
5127             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5128       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5129       assert(NewLoad &&
5130              "Shouldn't be masking the load if it can't be narrowed");
5131       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5132     }
5133     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5134     return true;
5135   }
5136   return false;
5137 }
5138
5139 // Unfold
5140 //    x &  (-1 'logical shift' y)
5141 // To
5142 //    (x 'opposite logical shift' y) 'logical shift' y
5143 // if it is better for performance.
5144 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5145   assert(N->getOpcode() == ISD::AND);
5146
5147   SDValue N0 = N->getOperand(0);
5148   SDValue N1 = N->getOperand(1);
5149
5150   // Do we actually prefer shifts over mask?
5151   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5152     return SDValue();
5153
5154   // Try to match  (-1 '[outer] logical shift' y)
5155   unsigned OuterShift;
5156   unsigned InnerShift; // The opposite direction to the OuterShift.
5157   SDValue Y;           // Shift amount.
5158   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5159     if (!M.hasOneUse())
5160       return false;
5161     OuterShift = M->getOpcode();
5162     if (OuterShift == ISD::SHL)
5163       InnerShift = ISD::SRL;
5164     else if (OuterShift == ISD::SRL)
5165       InnerShift = ISD::SHL;
5166     else
5167       return false;
5168     if (!isAllOnesConstant(M->getOperand(0)))
5169       return false;
5170     Y = M->getOperand(1);
5171     return true;
5172   };
5173
5174   SDValue X;
5175   if (matchMask(N1))
5176     X = N0;
5177   else if (matchMask(N0))
5178     X = N1;
5179   else
5180     return SDValue();
5181
5182   SDLoc DL(N);
5183   EVT VT = N->getValueType(0);
5184
5185   //     tmp = x   'opposite logical shift' y
5186   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5187   //     ret = tmp 'logical shift' y
5188   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5189
5190   return T1;
5191 }
5192
5193 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5194 /// For a target with a bit test, this is expected to become test + set and save
5195 /// at least 1 instruction.
5196 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5197   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5198
5199   // This is probably not worthwhile without a supported type.
5200   EVT VT = And->getValueType(0);
5201   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5202   if (!TLI.isTypeLegal(VT))
5203     return SDValue();
5204
5205   // Look through an optional extension and find a 'not'.
5206   // TODO: Should we favor test+set even without the 'not' op?
5207   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5208   if (Not.getOpcode() == ISD::ANY_EXTEND)
5209     Not = Not.getOperand(0);
5210   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5211     return SDValue();
5212
5213   // Look though an optional truncation. The source operand may not be the same
5214   // type as the original 'and', but that is ok because we are masking off
5215   // everything but the low bit.
5216   SDValue Srl = Not.getOperand(0);
5217   if (Srl.getOpcode() == ISD::TRUNCATE)
5218     Srl = Srl.getOperand(0);
5219
5220   // Match a shift-right by constant.
5221   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5222       !isa<ConstantSDNode>(Srl.getOperand(1)))
5223     return SDValue();
5224
5225   // We might have looked through casts that make this transform invalid.
5226   // TODO: If the source type is wider than the result type, do the mask and
5227   //       compare in the source type.
5228   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5229   unsigned VTBitWidth = VT.getSizeInBits();
5230   if (ShiftAmt.uge(VTBitWidth))
5231     return SDValue();
5232
5233   // Turn this into a bit-test pattern using mask op + setcc:
5234   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5235   SDLoc DL(And);
5236   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5237   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5238   SDValue Mask = DAG.getConstant(
5239       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5240   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5241   SDValue Zero = DAG.getConstant(0, DL, VT);
5242   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5243   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5244 }
5245
5246 SDValue DAGCombiner::visitAND(SDNode *N) {
5247   SDValue N0 = N->getOperand(0);
5248   SDValue N1 = N->getOperand(1);
5249   EVT VT = N1.getValueType();
5250
5251   // x & x --> x
5252   if (N0 == N1)
5253     return N0;
5254
5255   // fold vector ops
5256   if (VT.isVector()) {
5257     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5258       return FoldedVOp;
5259
5260     // fold (and x, 0) -> 0, vector edition
5261     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5262       // do not return N0, because undef node may exist in N0
5263       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5264                              SDLoc(N), N0.getValueType());
5265     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5266       // do not return N1, because undef node may exist in N1
5267       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5268                              SDLoc(N), N1.getValueType());
5269
5270     // fold (and x, -1) -> x, vector edition
5271     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5272       return N1;
5273     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5274       return N0;
5275   }
5276
5277   // fold (and c1, c2) -> c1&c2
5278   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5279   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5280     return C;
5281
5282   // canonicalize constant to RHS
5283   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5284       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5285     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5286
5287   // fold (and x, -1) -> x
5288   if (isAllOnesConstant(N1))
5289     return N0;
5290
5291   // if (and x, c) is known to be zero, return 0
5292   unsigned BitWidth = VT.getScalarSizeInBits();
5293   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5294                                    APInt::getAllOnesValue(BitWidth)))
5295     return DAG.getConstant(0, SDLoc(N), VT);
5296
5297   if (SDValue NewSel = foldBinOpIntoSelect(N))
5298     return NewSel;
5299
5300   // reassociate and
5301   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5302     return RAND;
5303
5304   // Try to convert a constant mask AND into a shuffle clear mask.
5305   if (VT.isVector())
5306     if (SDValue Shuffle = XformToShuffleWithZero(N))
5307       return Shuffle;
5308
5309   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5310     return Combined;
5311
5312   // fold (and (or x, C), D) -> D if (C & D) == D
5313   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5314     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5315   };
5316   if (N0.getOpcode() == ISD::OR &&
5317       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5318     return N1;
5319   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5320   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5321     SDValue N0Op0 = N0.getOperand(0);
5322     APInt Mask = ~N1C->getAPIntValue();
5323     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5324     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5325       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5326                                  N0.getValueType(), N0Op0);
5327
5328       // Replace uses of the AND with uses of the Zero extend node.
5329       CombineTo(N, Zext);
5330
5331       // We actually want to replace all uses of the any_extend with the
5332       // zero_extend, to avoid duplicating things.  This will later cause this
5333       // AND to be folded.
5334       CombineTo(N0.getNode(), Zext);
5335       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5336     }
5337   }
5338
5339   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5340   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5341   // already be zero by virtue of the width of the base type of the load.
5342   //
5343   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5344   // more cases.
5345   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5346        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5347        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5348        N0.getOperand(0).getResNo() == 0) ||
5349       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5350     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5351                                          N0 : N0.getOperand(0) );
5352
5353     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5354     // This can be a pure constant or a vector splat, in which case we treat the
5355     // vector as a scalar and use the splat value.
5356     APInt Constant = APInt::getNullValue(1);
5357     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5358       Constant = C->getAPIntValue();
5359     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5360       APInt SplatValue, SplatUndef;
5361       unsigned SplatBitSize;
5362       bool HasAnyUndefs;
5363       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5364                                              SplatBitSize, HasAnyUndefs);
5365       if (IsSplat) {
5366         // Undef bits can contribute to a possible optimisation if set, so
5367         // set them.
5368         SplatValue |= SplatUndef;
5369
5370         // The splat value may be something like "0x00FFFFFF", which means 0 for
5371         // the first vector value and FF for the rest, repeating. We need a mask
5372         // that will apply equally to all members of the vector, so AND all the
5373         // lanes of the constant together.
5374         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5375
5376         // If the splat value has been compressed to a bitlength lower
5377         // than the size of the vector lane, we need to re-expand it to
5378         // the lane size.
5379         if (EltBitWidth > SplatBitSize)
5380           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5381                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5382             SplatValue |= SplatValue.shl(SplatBitSize);
5383
5384         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5385         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5386         if ((SplatBitSize % EltBitWidth) == 0) {
5387           Constant = APInt::getAllOnesValue(EltBitWidth);
5388           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5389             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5390         }
5391       }
5392     }
5393
5394     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5395     // actually legal and isn't going to get expanded, else this is a false
5396     // optimisation.
5397     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5398                                                     Load->getValueType(0),
5399                                                     Load->getMemoryVT());
5400
5401     // Resize the constant to the same size as the original memory access before
5402     // extension. If it is still the AllOnesValue then this AND is completely
5403     // unneeded.
5404     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5405
5406     bool B;
5407     switch (Load->getExtensionType()) {
5408     default: B = false; break;
5409     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5410     case ISD::ZEXTLOAD:
5411     case ISD::NON_EXTLOAD: B = true; break;
5412     }
5413
5414     if (B && Constant.isAllOnesValue()) {
5415       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5416       // preserve semantics once we get rid of the AND.
5417       SDValue NewLoad(Load, 0);
5418
5419       // Fold the AND away. NewLoad may get replaced immediately.
5420       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5421
5422       if (Load->getExtensionType() == ISD::EXTLOAD) {
5423         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5424                               Load->getValueType(0), SDLoc(Load),
5425                               Load->getChain(), Load->getBasePtr(),
5426                               Load->getOffset(), Load->getMemoryVT(),
5427                               Load->getMemOperand());
5428         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5429         if (Load->getNumValues() == 3) {
5430           // PRE/POST_INC loads have 3 values.
5431           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5432                            NewLoad.getValue(2) };
5433           CombineTo(Load, To, 3, true);
5434         } else {
5435           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5436         }
5437       }
5438
5439       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5440     }
5441   }
5442
5443   // fold (and (load x), 255) -> (zextload x, i8)
5444   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5445   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5446   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5447                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5448                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5449     if (SDValue Res = ReduceLoadWidth(N)) {
5450       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5451         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5452       AddToWorklist(N);
5453       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5454       return SDValue(N, 0);
5455     }
5456   }
5457
5458   if (LegalTypes) {
5459     // Attempt to propagate the AND back up to the leaves which, if they're
5460     // loads, can be combined to narrow loads and the AND node can be removed.
5461     // Perform after legalization so that extend nodes will already be
5462     // combined into the loads.
5463     if (BackwardsPropagateMask(N))
5464       return SDValue(N, 0);
5465   }
5466
5467   if (SDValue Combined = visitANDLike(N0, N1, N))
5468     return Combined;
5469
5470   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5471   if (N0.getOpcode() == N1.getOpcode())
5472     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5473       return V;
5474
5475   // Masking the negated extension of a boolean is just the zero-extended
5476   // boolean:
5477   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5478   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5479   //
5480   // Note: the SimplifyDemandedBits fold below can make an information-losing
5481   // transform, and then we have no way to find this better fold.
5482   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5483     if (isNullOrNullSplat(N0.getOperand(0))) {
5484       SDValue SubRHS = N0.getOperand(1);
5485       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5486           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5487         return SubRHS;
5488       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5489           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5490         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5491     }
5492   }
5493
5494   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5495   // fold (and (sra)) -> (and (srl)) when possible.
5496   if (SimplifyDemandedBits(SDValue(N, 0)))
5497     return SDValue(N, 0);
5498
5499   // fold (zext_inreg (extload x)) -> (zextload x)
5500   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5501   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5502       (ISD::isEXTLoad(N0.getNode()) ||
5503        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5504     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5505     EVT MemVT = LN0->getMemoryVT();
5506     // If we zero all the possible extended bits, then we can turn this into
5507     // a zextload if we are running before legalize or the operation is legal.
5508     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5509     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5510     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5511     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5512         ((!LegalOperations && LN0->isSimple()) ||
5513          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5514       SDValue ExtLoad =
5515           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5516                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5517       AddToWorklist(N);
5518       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5519       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5520     }
5521   }
5522
5523   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5524   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5525     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5526                                            N0.getOperand(1), false))
5527       return BSwap;
5528   }
5529
5530   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5531     return Shifts;
5532
5533   if (TLI.hasBitTest(N0, N1))
5534     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5535       return V;
5536
5537   return SDValue();
5538 }
5539
5540 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5541 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5542                                         bool DemandHighBits) {
5543   if (!LegalOperations)
5544     return SDValue();
5545
5546   EVT VT = N->getValueType(0);
5547   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5548     return SDValue();
5549   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5550     return SDValue();
5551
5552   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5553   bool LookPassAnd0 = false;
5554   bool LookPassAnd1 = false;
5555   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5556       std::swap(N0, N1);
5557   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5558       std::swap(N0, N1);
5559   if (N0.getOpcode() == ISD::AND) {
5560     if (!N0.getNode()->hasOneUse())
5561       return SDValue();
5562     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5563     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5564     // This is needed for X86.
5565     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5566                   N01C->getZExtValue() != 0xFFFF))
5567       return SDValue();
5568     N0 = N0.getOperand(0);
5569     LookPassAnd0 = true;
5570   }
5571
5572   if (N1.getOpcode() == ISD::AND) {
5573     if (!N1.getNode()->hasOneUse())
5574       return SDValue();
5575     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5576     if (!N11C || N11C->getZExtValue() != 0xFF)
5577       return SDValue();
5578     N1 = N1.getOperand(0);
5579     LookPassAnd1 = true;
5580   }
5581
5582   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5583     std::swap(N0, N1);
5584   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5585     return SDValue();
5586   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5587     return SDValue();
5588
5589   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5590   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5591   if (!N01C || !N11C)
5592     return SDValue();
5593   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5594     return SDValue();
5595
5596   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5597   SDValue N00 = N0->getOperand(0);
5598   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5599     if (!N00.getNode()->hasOneUse())
5600       return SDValue();
5601     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5602     if (!N001C || N001C->getZExtValue() != 0xFF)
5603       return SDValue();
5604     N00 = N00.getOperand(0);
5605     LookPassAnd0 = true;
5606   }
5607
5608   SDValue N10 = N1->getOperand(0);
5609   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5610     if (!N10.getNode()->hasOneUse())
5611       return SDValue();
5612     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5613     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5614     // for X86.
5615     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5616                    N101C->getZExtValue() != 0xFFFF))
5617       return SDValue();
5618     N10 = N10.getOperand(0);
5619     LookPassAnd1 = true;
5620   }
5621
5622   if (N00 != N10)
5623     return SDValue();
5624
5625   // Make sure everything beyond the low halfword gets set to zero since the SRL
5626   // 16 will clear the top bits.
5627   unsigned OpSizeInBits = VT.getSizeInBits();
5628   if (DemandHighBits && OpSizeInBits > 16) {
5629     // If the left-shift isn't masked out then the only way this is a bswap is
5630     // if all bits beyond the low 8 are 0. In that case the entire pattern
5631     // reduces to a left shift anyway: leave it for other parts of the combiner.
5632     if (!LookPassAnd0)
5633       return SDValue();
5634
5635     // However, if the right shift isn't masked out then it might be because
5636     // it's not needed. See if we can spot that too.
5637     if (!LookPassAnd1 &&
5638         !DAG.MaskedValueIsZero(
5639             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5640       return SDValue();
5641   }
5642
5643   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5644   if (OpSizeInBits > 16) {
5645     SDLoc DL(N);
5646     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5647                       DAG.getConstant(OpSizeInBits - 16, DL,
5648                                       getShiftAmountTy(VT)));
5649   }
5650   return Res;
5651 }
5652
5653 /// Return true if the specified node is an element that makes up a 32-bit
5654 /// packed halfword byteswap.
5655 /// ((x & 0x000000ff) << 8) |
5656 /// ((x & 0x0000ff00) >> 8) |
5657 /// ((x & 0x00ff0000) << 8) |
5658 /// ((x & 0xff000000) >> 8)
5659 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5660   if (!N.getNode()->hasOneUse())
5661     return false;
5662
5663   unsigned Opc = N.getOpcode();
5664   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5665     return false;
5666
5667   SDValue N0 = N.getOperand(0);
5668   unsigned Opc0 = N0.getOpcode();
5669   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5670     return false;
5671
5672   ConstantSDNode *N1C = nullptr;
5673   // SHL or SRL: look upstream for AND mask operand
5674   if (Opc == ISD::AND)
5675     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5676   else if (Opc0 == ISD::AND)
5677     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5678   if (!N1C)
5679     return false;
5680
5681   unsigned MaskByteOffset;
5682   switch (N1C->getZExtValue()) {
5683   default:
5684     return false;
5685   case 0xFF:       MaskByteOffset = 0; break;
5686   case 0xFF00:     MaskByteOffset = 1; break;
5687   case 0xFFFF:
5688     // In case demanded bits didn't clear the bits that will be shifted out.
5689     // This is needed for X86.
5690     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5691       MaskByteOffset = 1;
5692       break;
5693     }
5694     return false;
5695   case 0xFF0000:   MaskByteOffset = 2; break;
5696   case 0xFF000000: MaskByteOffset = 3; break;
5697   }
5698
5699   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5700   if (Opc == ISD::AND) {
5701     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5702       // (x >> 8) & 0xff
5703       // (x >> 8) & 0xff0000
5704       if (Opc0 != ISD::SRL)
5705         return false;
5706       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5707       if (!C || C->getZExtValue() != 8)
5708         return false;
5709     } else {
5710       // (x << 8) & 0xff00
5711       // (x << 8) & 0xff000000
5712       if (Opc0 != ISD::SHL)
5713         return false;
5714       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5715       if (!C || C->getZExtValue() != 8)
5716         return false;
5717     }
5718   } else if (Opc == ISD::SHL) {
5719     // (x & 0xff) << 8
5720     // (x & 0xff0000) << 8
5721     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5722       return false;
5723     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5724     if (!C || C->getZExtValue() != 8)
5725       return false;
5726   } else { // Opc == ISD::SRL
5727     // (x & 0xff00) >> 8
5728     // (x & 0xff000000) >> 8
5729     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5730       return false;
5731     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5732     if (!C || C->getZExtValue() != 8)
5733       return false;
5734   }
5735
5736   if (Parts[MaskByteOffset])
5737     return false;
5738
5739   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5740   return true;
5741 }
5742
5743 // Match 2 elements of a packed halfword bswap.
5744 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5745   if (N.getOpcode() == ISD::OR)
5746     return isBSwapHWordElement(N.getOperand(0), Parts) &&
5747            isBSwapHWordElement(N.getOperand(1), Parts);
5748
5749   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5750     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5751     if (!C || C->getAPIntValue() != 16)
5752       return false;
5753     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5754     return true;
5755   }
5756
5757   return false;
5758 }
5759
5760 // Match this pattern:
5761 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
5762 // And rewrite this to:
5763 //   (rotr (bswap A), 16)
5764 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
5765                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
5766                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
5767   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
5768          "MatchBSwapHWordOrAndAnd: expecting i32");
5769   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5770     return SDValue();
5771   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
5772     return SDValue();
5773   // TODO: this is too restrictive; lifting this restriction requires more tests
5774   if (!N0->hasOneUse() || !N1->hasOneUse())
5775     return SDValue();
5776   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
5777   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
5778   if (!Mask0 || !Mask1)
5779     return SDValue();
5780   if (Mask0->getAPIntValue() != 0xff00ff00 ||
5781       Mask1->getAPIntValue() != 0x00ff00ff)
5782     return SDValue();
5783   SDValue Shift0 = N0.getOperand(0);
5784   SDValue Shift1 = N1.getOperand(0);
5785   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
5786     return SDValue();
5787   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
5788   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
5789   if (!ShiftAmt0 || !ShiftAmt1)
5790     return SDValue();
5791   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
5792     return SDValue();
5793   if (Shift0.getOperand(0) != Shift1.getOperand(0))
5794     return SDValue();
5795
5796   SDLoc DL(N);
5797   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
5798   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
5799   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5800 }
5801
5802 /// Match a 32-bit packed halfword bswap. That is
5803 /// ((x & 0x000000ff) << 8) |
5804 /// ((x & 0x0000ff00) >> 8) |
5805 /// ((x & 0x00ff0000) << 8) |
5806 /// ((x & 0xff000000) >> 8)
5807 /// => (rotl (bswap x), 16)
5808 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5809   if (!LegalOperations)
5810     return SDValue();
5811
5812   EVT VT = N->getValueType(0);
5813   if (VT != MVT::i32)
5814     return SDValue();
5815   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5816     return SDValue();
5817
5818   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
5819                                               getShiftAmountTy(VT)))
5820   return BSwap;
5821
5822   // Try again with commuted operands.
5823   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
5824                                               getShiftAmountTy(VT)))
5825   return BSwap;
5826
5827
5828   // Look for either
5829   // (or (bswaphpair), (bswaphpair))
5830   // (or (or (bswaphpair), (and)), (and))
5831   // (or (or (and), (bswaphpair)), (and))
5832   SDNode *Parts[4] = {};
5833
5834   if (isBSwapHWordPair(N0, Parts)) {
5835     // (or (or (and), (and)), (or (and), (and)))
5836     if (!isBSwapHWordPair(N1, Parts))
5837       return SDValue();
5838   } else if (N0.getOpcode() == ISD::OR) {
5839     // (or (or (or (and), (and)), (and)), (and))
5840     if (!isBSwapHWordElement(N1, Parts))
5841       return SDValue();
5842     SDValue N00 = N0.getOperand(0);
5843     SDValue N01 = N0.getOperand(1);
5844     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5845         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5846       return SDValue();
5847   } else
5848     return SDValue();
5849
5850   // Make sure the parts are all coming from the same node.
5851   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5852     return SDValue();
5853
5854   SDLoc DL(N);
5855   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5856                               SDValue(Parts[0], 0));
5857
5858   // Result of the bswap should be rotated by 16. If it's not legal, then
5859   // do  (x << 16) | (x >> 16).
5860   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5861   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5862     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5863   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5864     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5865   return DAG.getNode(ISD::OR, DL, VT,
5866                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5867                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5868 }
5869
5870 /// This contains all DAGCombine rules which reduce two values combined by
5871 /// an Or operation to a single value \see visitANDLike().
5872 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5873   EVT VT = N1.getValueType();
5874   SDLoc DL(N);
5875
5876   // fold (or x, undef) -> -1
5877   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5878     return DAG.getAllOnesConstant(DL, VT);
5879
5880   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5881     return V;
5882
5883   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5884   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5885       // Don't increase # computations.
5886       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5887     // We can only do this xform if we know that bits from X that are set in C2
5888     // but not in C1 are already zero.  Likewise for Y.
5889     if (const ConstantSDNode *N0O1C =
5890         getAsNonOpaqueConstant(N0.getOperand(1))) {
5891       if (const ConstantSDNode *N1O1C =
5892           getAsNonOpaqueConstant(N1.getOperand(1))) {
5893         // We can only do this xform if we know that bits from X that are set in
5894         // C2 but not in C1 are already zero.  Likewise for Y.
5895         const APInt &LHSMask = N0O1C->getAPIntValue();
5896         const APInt &RHSMask = N1O1C->getAPIntValue();
5897
5898         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5899             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5900           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5901                                   N0.getOperand(0), N1.getOperand(0));
5902           return DAG.getNode(ISD::AND, DL, VT, X,
5903                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5904         }
5905       }
5906     }
5907   }
5908
5909   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5910   if (N0.getOpcode() == ISD::AND &&
5911       N1.getOpcode() == ISD::AND &&
5912       N0.getOperand(0) == N1.getOperand(0) &&
5913       // Don't increase # computations.
5914       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5915     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5916                             N0.getOperand(1), N1.getOperand(1));
5917     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5918   }
5919
5920   return SDValue();
5921 }
5922
5923 /// OR combines for which the commuted variant will be tried as well.
5924 static SDValue visitORCommutative(
5925     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5926   EVT VT = N0.getValueType();
5927   if (N0.getOpcode() == ISD::AND) {
5928     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5929     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5930       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5931
5932     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5933     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5934       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5935   }
5936
5937   return SDValue();
5938 }
5939
5940 SDValue DAGCombiner::visitOR(SDNode *N) {
5941   SDValue N0 = N->getOperand(0);
5942   SDValue N1 = N->getOperand(1);
5943   EVT VT = N1.getValueType();
5944
5945   // x | x --> x
5946   if (N0 == N1)
5947     return N0;
5948
5949   // fold vector ops
5950   if (VT.isVector()) {
5951     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5952       return FoldedVOp;
5953
5954     // fold (or x, 0) -> x, vector edition
5955     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5956       return N1;
5957     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5958       return N0;
5959
5960     // fold (or x, -1) -> -1, vector edition
5961     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5962       // do not return N0, because undef node may exist in N0
5963       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5964     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5965       // do not return N1, because undef node may exist in N1
5966       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5967
5968     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5969     // Do this only if the resulting shuffle is legal.
5970     if (isa<ShuffleVectorSDNode>(N0) &&
5971         isa<ShuffleVectorSDNode>(N1) &&
5972         // Avoid folding a node with illegal type.
5973         TLI.isTypeLegal(VT)) {
5974       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5975       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5976       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5977       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5978       // Ensure both shuffles have a zero input.
5979       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5980         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5981         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5982         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5983         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5984         bool CanFold = true;
5985         int NumElts = VT.getVectorNumElements();
5986         SmallVector<int, 4> Mask(NumElts);
5987
5988         for (int i = 0; i != NumElts; ++i) {
5989           int M0 = SV0->getMaskElt(i);
5990           int M1 = SV1->getMaskElt(i);
5991
5992           // Determine if either index is pointing to a zero vector.
5993           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5994           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5995
5996           // If one element is zero and the otherside is undef, keep undef.
5997           // This also handles the case that both are undef.
5998           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5999             Mask[i] = -1;
6000             continue;
6001           }
6002
6003           // Make sure only one of the elements is zero.
6004           if (M0Zero == M1Zero) {
6005             CanFold = false;
6006             break;
6007           }
6008
6009           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6010
6011           // We have a zero and non-zero element. If the non-zero came from
6012           // SV0 make the index a LHS index. If it came from SV1, make it
6013           // a RHS index. We need to mod by NumElts because we don't care
6014           // which operand it came from in the original shuffles.
6015           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6016         }
6017
6018         if (CanFold) {
6019           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6020           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6021
6022           SDValue LegalShuffle =
6023               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6024                                           Mask, DAG);
6025           if (LegalShuffle)
6026             return LegalShuffle;
6027         }
6028       }
6029     }
6030   }
6031
6032   // fold (or c1, c2) -> c1|c2
6033   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6034   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6035     return C;
6036
6037   // canonicalize constant to RHS
6038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6039      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6040     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6041
6042   // fold (or x, 0) -> x
6043   if (isNullConstant(N1))
6044     return N0;
6045
6046   // fold (or x, -1) -> -1
6047   if (isAllOnesConstant(N1))
6048     return N1;
6049
6050   if (SDValue NewSel = foldBinOpIntoSelect(N))
6051     return NewSel;
6052
6053   // fold (or x, c) -> c iff (x & ~c) == 0
6054   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6055     return N1;
6056
6057   if (SDValue Combined = visitORLike(N0, N1, N))
6058     return Combined;
6059
6060   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6061     return Combined;
6062
6063   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6064   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6065     return BSwap;
6066   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6067     return BSwap;
6068
6069   // reassociate or
6070   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6071     return ROR;
6072
6073   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6074   // iff (c1 & c2) != 0 or c1/c2 are undef.
6075   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6076     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6077   };
6078   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6079       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6080     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6081                                                  {N1, N0.getOperand(1)})) {
6082       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6083       AddToWorklist(IOR.getNode());
6084       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6085     }
6086   }
6087
6088   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6089     return Combined;
6090   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6091     return Combined;
6092
6093   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6094   if (N0.getOpcode() == N1.getOpcode())
6095     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6096       return V;
6097
6098   // See if this is some rotate idiom.
6099   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6100     return Rot;
6101
6102   if (SDValue Load = MatchLoadCombine(N))
6103     return Load;
6104
6105   // Simplify the operands using demanded-bits information.
6106   if (SimplifyDemandedBits(SDValue(N, 0)))
6107     return SDValue(N, 0);
6108
6109   // If OR can be rewritten into ADD, try combines based on ADD.
6110   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6111       DAG.haveNoCommonBitsSet(N0, N1))
6112     if (SDValue Combined = visitADDLike(N))
6113       return Combined;
6114
6115   return SDValue();
6116 }
6117
6118 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6119   if (Op.getOpcode() == ISD::AND &&
6120       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6121     Mask = Op.getOperand(1);
6122     return Op.getOperand(0);
6123   }
6124   return Op;
6125 }
6126
6127 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6128 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6129                             SDValue &Mask) {
6130   Op = stripConstantMask(DAG, Op, Mask);
6131   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6132     Shift = Op;
6133     return true;
6134   }
6135   return false;
6136 }
6137
6138 /// Helper function for visitOR to extract the needed side of a rotate idiom
6139 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6140 /// InstCombine merged some outside op with one of the shifts from
6141 /// the rotate pattern.
6142 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6143 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6144 /// patterns:
6145 ///
6146 ///   (or (add v v) (shrl v bitwidth-1)):
6147 ///     expands (add v v) -> (shl v 1)
6148 ///
6149 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6150 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6151 ///
6152 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6153 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6154 ///
6155 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6156 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6157 ///
6158 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6159 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6160 ///
6161 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6162 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6163                                      SDValue ExtractFrom, SDValue &Mask,
6164                                      const SDLoc &DL) {
6165   assert(OppShift && ExtractFrom && "Empty SDValue");
6166   assert(
6167       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6168       "Existing shift must be valid as a rotate half");
6169
6170   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6171
6172   // Value and Type of the shift.
6173   SDValue OppShiftLHS = OppShift.getOperand(0);
6174   EVT ShiftedVT = OppShiftLHS.getValueType();
6175
6176   // Amount of the existing shift.
6177   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6178
6179   // (add v v) -> (shl v 1)
6180   // TODO: Should this be a general DAG canonicalization?
6181   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6182       ExtractFrom.getOpcode() == ISD::ADD &&
6183       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6184       ExtractFrom.getOperand(0) == OppShiftLHS &&
6185       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6186     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6187                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6188
6189   // Preconditions:
6190   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6191   //
6192   // Find opcode of the needed shift to be extracted from (op0 v c0).
6193   unsigned Opcode = ISD::DELETED_NODE;
6194   bool IsMulOrDiv = false;
6195   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6196   // opcode or its arithmetic (mul or udiv) variant.
6197   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6198     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6199     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6200       return false;
6201     Opcode = NeededShift;
6202     return true;
6203   };
6204   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6205   // that the needed shift can be extracted from.
6206   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6207       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6208     return SDValue();
6209
6210   // op0 must be the same opcode on both sides, have the same LHS argument,
6211   // and produce the same value type.
6212   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6213       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6214       ShiftedVT != ExtractFrom.getValueType())
6215     return SDValue();
6216
6217   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6218   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6219   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6220   ConstantSDNode *ExtractFromCst =
6221       isConstOrConstSplat(ExtractFrom.getOperand(1));
6222   // TODO: We should be able to handle non-uniform constant vectors for these values
6223   // Check that we have constant values.
6224   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6225       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6226       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6227     return SDValue();
6228
6229   // Compute the shift amount we need to extract to complete the rotate.
6230   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6231   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6232     return SDValue();
6233   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6234   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6235   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6236   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6237   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6238
6239   // Now try extract the needed shift from the ExtractFrom op and see if the
6240   // result matches up with the existing shift's LHS op.
6241   if (IsMulOrDiv) {
6242     // Op to extract from is a mul or udiv by a constant.
6243     // Check:
6244     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6245     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6246     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6247                                                  NeededShiftAmt.getZExtValue());
6248     APInt ResultAmt;
6249     APInt Rem;
6250     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6251     if (Rem != 0 || ResultAmt != OppLHSAmt)
6252       return SDValue();
6253   } else {
6254     // Op to extract from is a shift by a constant.
6255     // Check:
6256     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6257     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6258                                           ExtractFromAmt.getBitWidth()))
6259       return SDValue();
6260   }
6261
6262   // Return the expanded shift op that should allow a rotate to be formed.
6263   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6264   EVT ResVT = ExtractFrom.getValueType();
6265   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6266   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6267 }
6268
6269 // Return true if we can prove that, whenever Neg and Pos are both in the
6270 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6271 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6272 //
6273 //     (or (shift1 X, Neg), (shift2 X, Pos))
6274 //
6275 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6276 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6277 // to consider shift amounts with defined behavior.
6278 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6279                            SelectionDAG &DAG) {
6280   // If EltSize is a power of 2 then:
6281   //
6282   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6283   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6284   //
6285   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6286   // for the stronger condition:
6287   //
6288   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6289   //
6290   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6291   // we can just replace Neg with Neg' for the rest of the function.
6292   //
6293   // In other cases we check for the even stronger condition:
6294   //
6295   //     Neg == EltSize - Pos                                    [B]
6296   //
6297   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6298   // behavior if Pos == 0 (and consequently Neg == EltSize).
6299   //
6300   // We could actually use [A] whenever EltSize is a power of 2, but the
6301   // only extra cases that it would match are those uninteresting ones
6302   // where Neg and Pos are never in range at the same time.  E.g. for
6303   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6304   // as well as (sub 32, Pos), but:
6305   //
6306   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6307   //
6308   // always invokes undefined behavior for 32-bit X.
6309   //
6310   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6311   unsigned MaskLoBits = 0;
6312   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6313     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6314       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6315       unsigned Bits = Log2_64(EltSize);
6316       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6317           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6318         Neg = Neg.getOperand(0);
6319         MaskLoBits = Bits;
6320       }
6321     }
6322   }
6323
6324   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6325   if (Neg.getOpcode() != ISD::SUB)
6326     return false;
6327   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6328   if (!NegC)
6329     return false;
6330   SDValue NegOp1 = Neg.getOperand(1);
6331
6332   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6333   // Pos'.  The truncation is redundant for the purpose of the equality.
6334   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6335     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6336       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6337       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6338           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6339            MaskLoBits))
6340         Pos = Pos.getOperand(0);
6341     }
6342   }
6343
6344   // The condition we need is now:
6345   //
6346   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6347   //
6348   // If NegOp1 == Pos then we need:
6349   //
6350   //              EltSize & Mask == NegC & Mask
6351   //
6352   // (because "x & Mask" is a truncation and distributes through subtraction).
6353   //
6354   // We also need to account for a potential truncation of NegOp1 if the amount
6355   // has already been legalized to a shift amount type.
6356   APInt Width;
6357   if ((Pos == NegOp1) ||
6358       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6359     Width = NegC->getAPIntValue();
6360
6361   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6362   // Then the condition we want to prove becomes:
6363   //
6364   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6365   //
6366   // which, again because "x & Mask" is a truncation, becomes:
6367   //
6368   //                NegC & Mask == (EltSize - PosC) & Mask
6369   //             EltSize & Mask == (NegC + PosC) & Mask
6370   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6371     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6372       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6373     else
6374       return false;
6375   } else
6376     return false;
6377
6378   // Now we just need to check that EltSize & Mask == Width & Mask.
6379   if (MaskLoBits)
6380     // EltSize & Mask is 0 since Mask is EltSize - 1.
6381     return Width.getLoBits(MaskLoBits) == 0;
6382   return Width == EltSize;
6383 }
6384
6385 // A subroutine of MatchRotate used once we have found an OR of two opposite
6386 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6387 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6388 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6389 // Neg with outer conversions stripped away.
6390 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6391                                        SDValue Neg, SDValue InnerPos,
6392                                        SDValue InnerNeg, unsigned PosOpcode,
6393                                        unsigned NegOpcode, const SDLoc &DL) {
6394   // fold (or (shl x, (*ext y)),
6395   //          (srl x, (*ext (sub 32, y)))) ->
6396   //   (rotl x, y) or (rotr x, (sub 32, y))
6397   //
6398   // fold (or (shl x, (*ext (sub 32, y))),
6399   //          (srl x, (*ext y))) ->
6400   //   (rotr x, y) or (rotl x, (sub 32, y))
6401   EVT VT = Shifted.getValueType();
6402   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6403     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6404     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6405                        HasPos ? Pos : Neg);
6406   }
6407
6408   return SDValue();
6409 }
6410
6411 // A subroutine of MatchRotate used once we have found an OR of two opposite
6412 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6413 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6414 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6415 // Neg with outer conversions stripped away.
6416 // TODO: Merge with MatchRotatePosNeg.
6417 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6418                                        SDValue Neg, SDValue InnerPos,
6419                                        SDValue InnerNeg, unsigned PosOpcode,
6420                                        unsigned NegOpcode, const SDLoc &DL) {
6421   EVT VT = N0.getValueType();
6422   unsigned EltBits = VT.getScalarSizeInBits();
6423
6424   // fold (or (shl x0, (*ext y)),
6425   //          (srl x1, (*ext (sub 32, y)))) ->
6426   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6427   //
6428   // fold (or (shl x0, (*ext (sub 32, y))),
6429   //          (srl x1, (*ext y))) ->
6430   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6431   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
6432     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6433     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6434                        HasPos ? Pos : Neg);
6435   }
6436
6437   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6438   // so for now just use the PosOpcode case if its legal.
6439   // TODO: When can we use the NegOpcode case?
6440   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6441     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6442       if (Op.getOpcode() != BinOpc)
6443         return false;
6444       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6445       return Cst && (Cst->getAPIntValue() == Imm);
6446     };
6447
6448     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6449     //   -> (fshl x0, x1, y)
6450     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6451         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6452         InnerPos == InnerNeg.getOperand(0) &&
6453         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6454       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6455     }
6456
6457     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6458     //   -> (fshr x0, x1, y)
6459     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6460         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6461         InnerNeg == InnerPos.getOperand(0) &&
6462         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6463       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6464     }
6465
6466     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6467     //   -> (fshr x0, x1, y)
6468     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6469     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6470         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6471         InnerNeg == InnerPos.getOperand(0) &&
6472         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6473       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6474     }
6475   }
6476
6477   return SDValue();
6478 }
6479
6480 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6481 // idioms for rotate, and if the target supports rotation instructions, generate
6482 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6483 // with different shifted sources.
6484 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6485   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6486   EVT VT = LHS.getValueType();
6487   if (!TLI.isTypeLegal(VT))
6488     return SDValue();
6489
6490   // The target must have at least one rotate/funnel flavor.
6491   bool HasROTL = hasOperation(ISD::ROTL, VT);
6492   bool HasROTR = hasOperation(ISD::ROTR, VT);
6493   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6494   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6495   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6496     return SDValue();
6497
6498   // Check for truncated rotate.
6499   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6500       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6501     assert(LHS.getValueType() == RHS.getValueType());
6502     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6503       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6504     }
6505   }
6506
6507   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6508   SDValue LHSShift;   // The shift.
6509   SDValue LHSMask;    // AND value if any.
6510   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6511
6512   SDValue RHSShift;   // The shift.
6513   SDValue RHSMask;    // AND value if any.
6514   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6515
6516   // If neither side matched a rotate half, bail
6517   if (!LHSShift && !RHSShift)
6518     return SDValue();
6519
6520   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6521   // side of the rotate, so try to handle that here. In all cases we need to
6522   // pass the matched shift from the opposite side to compute the opcode and
6523   // needed shift amount to extract.  We still want to do this if both sides
6524   // matched a rotate half because one half may be a potential overshift that
6525   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6526   // single one).
6527
6528   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6529   if (LHSShift)
6530     if (SDValue NewRHSShift =
6531             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6532       RHSShift = NewRHSShift;
6533   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6534   if (RHSShift)
6535     if (SDValue NewLHSShift =
6536             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6537       LHSShift = NewLHSShift;
6538
6539   // If a side is still missing, nothing else we can do.
6540   if (!RHSShift || !LHSShift)
6541     return SDValue();
6542
6543   // At this point we've matched or extracted a shift op on each side.
6544
6545   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6546     return SDValue(); // Shifts must disagree.
6547
6548   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6549   if (!IsRotate && !(HasFSHL || HasFSHR))
6550     return SDValue(); // Requires funnel shift support.
6551
6552   // Canonicalize shl to left side in a shl/srl pair.
6553   if (RHSShift.getOpcode() == ISD::SHL) {
6554     std::swap(LHS, RHS);
6555     std::swap(LHSShift, RHSShift);
6556     std::swap(LHSMask, RHSMask);
6557   }
6558
6559   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6560   SDValue LHSShiftArg = LHSShift.getOperand(0);
6561   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6562   SDValue RHSShiftArg = RHSShift.getOperand(0);
6563   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6564
6565   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6566   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6567   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6568   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6569   // iff C1+C2 == EltSizeInBits
6570   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6571                                         ConstantSDNode *RHS) {
6572     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6573   };
6574   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6575     SDValue Res;
6576     if (IsRotate && (HasROTL || HasROTR))
6577       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6578                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
6579     else
6580       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6581                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6582
6583     // If there is an AND of either shifted operand, apply it to the result.
6584     if (LHSMask.getNode() || RHSMask.getNode()) {
6585       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6586       SDValue Mask = AllOnes;
6587
6588       if (LHSMask.getNode()) {
6589         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6590         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6591                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6592       }
6593       if (RHSMask.getNode()) {
6594         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6595         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6596                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6597       }
6598
6599       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6600     }
6601
6602     return Res;
6603   }
6604
6605   // If there is a mask here, and we have a variable shift, we can't be sure
6606   // that we're masking out the right stuff.
6607   if (LHSMask.getNode() || RHSMask.getNode())
6608     return SDValue();
6609
6610   // If the shift amount is sign/zext/any-extended just peel it off.
6611   SDValue LExtOp0 = LHSShiftAmt;
6612   SDValue RExtOp0 = RHSShiftAmt;
6613   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6614        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6615        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6616        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6617       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6618        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6619        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6620        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6621     LExtOp0 = LHSShiftAmt.getOperand(0);
6622     RExtOp0 = RHSShiftAmt.getOperand(0);
6623   }
6624
6625   if (IsRotate && (HasROTL || HasROTR)) {
6626     SDValue TryL =
6627         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6628                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6629     if (TryL)
6630       return TryL;
6631
6632     SDValue TryR =
6633         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6634                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6635     if (TryR)
6636       return TryR;
6637   }
6638
6639   SDValue TryL =
6640       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6641                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
6642   if (TryL)
6643     return TryL;
6644
6645   SDValue TryR =
6646       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6647                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
6648   if (TryR)
6649     return TryR;
6650
6651   return SDValue();
6652 }
6653
6654 namespace {
6655
6656 /// Represents known origin of an individual byte in load combine pattern. The
6657 /// value of the byte is either constant zero or comes from memory.
6658 struct ByteProvider {
6659   // For constant zero providers Load is set to nullptr. For memory providers
6660   // Load represents the node which loads the byte from memory.
6661   // ByteOffset is the offset of the byte in the value produced by the load.
6662   LoadSDNode *Load = nullptr;
6663   unsigned ByteOffset = 0;
6664
6665   ByteProvider() = default;
6666
6667   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6668     return ByteProvider(Load, ByteOffset);
6669   }
6670
6671   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6672
6673   bool isConstantZero() const { return !Load; }
6674   bool isMemory() const { return Load; }
6675
6676   bool operator==(const ByteProvider &Other) const {
6677     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6678   }
6679
6680 private:
6681   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6682       : Load(Load), ByteOffset(ByteOffset) {}
6683 };
6684
6685 } // end anonymous namespace
6686
6687 /// Recursively traverses the expression calculating the origin of the requested
6688 /// byte of the given value. Returns None if the provider can't be calculated.
6689 ///
6690 /// For all the values except the root of the expression verifies that the value
6691 /// has exactly one use and if it's not true return None. This way if the origin
6692 /// of the byte is returned it's guaranteed that the values which contribute to
6693 /// the byte are not used outside of this expression.
6694 ///
6695 /// Because the parts of the expression are not allowed to have more than one
6696 /// use this function iterates over trees, not DAGs. So it never visits the same
6697 /// node more than once.
6698 static const Optional<ByteProvider>
6699 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6700                       bool Root = false) {
6701   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6702   if (Depth == 10)
6703     return None;
6704
6705   if (!Root && !Op.hasOneUse())
6706     return None;
6707
6708   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6709   unsigned BitWidth = Op.getValueSizeInBits();
6710   if (BitWidth % 8 != 0)
6711     return None;
6712   unsigned ByteWidth = BitWidth / 8;
6713   assert(Index < ByteWidth && "invalid index requested");
6714   (void) ByteWidth;
6715
6716   switch (Op.getOpcode()) {
6717   case ISD::OR: {
6718     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6719     if (!LHS)
6720       return None;
6721     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6722     if (!RHS)
6723       return None;
6724
6725     if (LHS->isConstantZero())
6726       return RHS;
6727     if (RHS->isConstantZero())
6728       return LHS;
6729     return None;
6730   }
6731   case ISD::SHL: {
6732     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6733     if (!ShiftOp)
6734       return None;
6735
6736     uint64_t BitShift = ShiftOp->getZExtValue();
6737     if (BitShift % 8 != 0)
6738       return None;
6739     uint64_t ByteShift = BitShift / 8;
6740
6741     return Index < ByteShift
6742                ? ByteProvider::getConstantZero()
6743                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6744                                        Depth + 1);
6745   }
6746   case ISD::ANY_EXTEND:
6747   case ISD::SIGN_EXTEND:
6748   case ISD::ZERO_EXTEND: {
6749     SDValue NarrowOp = Op->getOperand(0);
6750     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6751     if (NarrowBitWidth % 8 != 0)
6752       return None;
6753     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6754
6755     if (Index >= NarrowByteWidth)
6756       return Op.getOpcode() == ISD::ZERO_EXTEND
6757                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6758                  : None;
6759     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6760   }
6761   case ISD::BSWAP:
6762     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6763                                  Depth + 1);
6764   case ISD::LOAD: {
6765     auto L = cast<LoadSDNode>(Op.getNode());
6766     if (!L->isSimple() || L->isIndexed())
6767       return None;
6768
6769     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6770     if (NarrowBitWidth % 8 != 0)
6771       return None;
6772     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6773
6774     if (Index >= NarrowByteWidth)
6775       return L->getExtensionType() == ISD::ZEXTLOAD
6776                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6777                  : None;
6778     return ByteProvider::getMemory(L, Index);
6779   }
6780   }
6781
6782   return None;
6783 }
6784
6785 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6786   return i;
6787 }
6788
6789 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6790   return BW - i - 1;
6791 }
6792
6793 // Check if the bytes offsets we are looking at match with either big or
6794 // little endian value loaded. Return true for big endian, false for little
6795 // endian, and None if match failed.
6796 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
6797                                   int64_t FirstOffset) {
6798   // The endian can be decided only when it is 2 bytes at least.
6799   unsigned Width = ByteOffsets.size();
6800   if (Width < 2)
6801     return None;
6802
6803   bool BigEndian = true, LittleEndian = true;
6804   for (unsigned i = 0; i < Width; i++) {
6805     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6806     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6807     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6808     if (!BigEndian && !LittleEndian)
6809       return None;
6810   }
6811
6812   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6813                                         "little endian");
6814   return BigEndian;
6815 }
6816
6817 static SDValue stripTruncAndExt(SDValue Value) {
6818   switch (Value.getOpcode()) {
6819   case ISD::TRUNCATE:
6820   case ISD::ZERO_EXTEND:
6821   case ISD::SIGN_EXTEND:
6822   case ISD::ANY_EXTEND:
6823     return stripTruncAndExt(Value.getOperand(0));
6824   }
6825   return Value;
6826 }
6827
6828 /// Match a pattern where a wide type scalar value is stored by several narrow
6829 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6830 /// supports it.
6831 ///
6832 /// Assuming little endian target:
6833 ///  i8 *p = ...
6834 ///  i32 val = ...
6835 ///  p[0] = (val >> 0) & 0xFF;
6836 ///  p[1] = (val >> 8) & 0xFF;
6837 ///  p[2] = (val >> 16) & 0xFF;
6838 ///  p[3] = (val >> 24) & 0xFF;
6839 /// =>
6840 ///  *((i32)p) = val;
6841 ///
6842 ///  i8 *p = ...
6843 ///  i32 val = ...
6844 ///  p[0] = (val >> 24) & 0xFF;
6845 ///  p[1] = (val >> 16) & 0xFF;
6846 ///  p[2] = (val >> 8) & 0xFF;
6847 ///  p[3] = (val >> 0) & 0xFF;
6848 /// =>
6849 ///  *((i32)p) = BSWAP(val);
6850 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6851   // Collect all the stores in the chain.
6852   SDValue Chain;
6853   SmallVector<StoreSDNode *, 8> Stores;
6854   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6855     // TODO: Allow unordered atomics when wider type is legal (see D66309)
6856     if (Store->getMemoryVT() != MVT::i8 ||
6857         !Store->isSimple() || Store->isIndexed())
6858       return SDValue();
6859     Stores.push_back(Store);
6860     Chain = Store->getChain();
6861   }
6862   // Handle the simple type only.
6863   unsigned Width = Stores.size();
6864   EVT VT = EVT::getIntegerVT(
6865     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6866   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6867     return SDValue();
6868
6869   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6870     return SDValue();
6871
6872   // Check if all the bytes of the combined value we are looking at are stored
6873   // to the same base address. Collect bytes offsets from Base address into
6874   // ByteOffsets.
6875   SDValue CombinedValue;
6876   SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
6877   int64_t FirstOffset = INT64_MAX;
6878   StoreSDNode *FirstStore = nullptr;
6879   Optional<BaseIndexOffset> Base;
6880   for (auto Store : Stores) {
6881     // All the stores store different byte of the CombinedValue. A truncate is
6882     // required to get that byte value.
6883     SDValue Trunc = Store->getValue();
6884     if (Trunc.getOpcode() != ISD::TRUNCATE)
6885       return SDValue();
6886     // A shift operation is required to get the right byte offset, except the
6887     // first byte.
6888     int64_t Offset = 0;
6889     SDValue Value = Trunc.getOperand(0);
6890     if (Value.getOpcode() == ISD::SRL ||
6891         Value.getOpcode() == ISD::SRA) {
6892       auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
6893       // Trying to match the following pattern. The shift offset must be
6894       // a constant and a multiple of 8. It is the byte offset in "y".
6895       //
6896       // x = srl y, offset
6897       // i8 z = trunc x
6898       // store z, ...
6899       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6900         return SDValue();
6901
6902      Offset = ShiftOffset->getSExtValue()/8;
6903      Value = Value.getOperand(0);
6904     }
6905
6906     // Stores must share the same combined value with different offsets.
6907     if (!CombinedValue)
6908       CombinedValue = Value;
6909     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6910       return SDValue();
6911
6912     // The trunc and all the extend operation should be stripped to get the
6913     // real value we are stored.
6914     else if (CombinedValue.getValueType() != VT) {
6915       if (Value.getValueType() == VT ||
6916           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6917         CombinedValue = Value;
6918       // Give up if the combined value type is smaller than the store size.
6919       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6920         return SDValue();
6921     }
6922
6923     // Stores must share the same base address
6924     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6925     int64_t ByteOffsetFromBase = 0;
6926     if (!Base)
6927       Base = Ptr;
6928     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6929       return SDValue();
6930
6931     // Remember the first byte store
6932     if (ByteOffsetFromBase < FirstOffset) {
6933       FirstStore = Store;
6934       FirstOffset = ByteOffsetFromBase;
6935     }
6936     // Map the offset in the store and the offset in the combined value, and
6937     // early return if it has been set before.
6938     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6939       return SDValue();
6940     ByteOffsets[Offset] = ByteOffsetFromBase;
6941   }
6942
6943   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6944   assert(FirstStore && "First store must be set");
6945
6946   // Check if the bytes of the combined value we are looking at match with
6947   // either big or little endian value store.
6948   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6949   if (!IsBigEndian.hasValue())
6950     return SDValue();
6951
6952   // The node we are looking at matches with the pattern, check if we can
6953   // replace it with a single bswap if needed and store.
6954
6955   // If the store needs byte swap check if the target supports it
6956   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6957
6958   // Before legalize we can introduce illegal bswaps which will be later
6959   // converted to an explicit bswap sequence. This way we end up with a single
6960   // store and byte shuffling instead of several stores and byte shuffling.
6961   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6962     return SDValue();
6963
6964   // Check that a store of the wide type is both allowed and fast on the target
6965   bool Fast = false;
6966   bool Allowed =
6967       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6968                              *FirstStore->getMemOperand(), &Fast);
6969   if (!Allowed || !Fast)
6970     return SDValue();
6971
6972   if (VT != CombinedValue.getValueType()) {
6973     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6974            "Get unexpected store value to combine");
6975     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6976                              CombinedValue);
6977   }
6978
6979   if (NeedsBswap)
6980     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6981
6982   SDValue NewStore =
6983     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6984                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6985
6986   // Rely on other DAG combine rules to remove the other individual stores.
6987   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6988   return NewStore;
6989 }
6990
6991 /// Match a pattern where a wide type scalar value is loaded by several narrow
6992 /// loads and combined by shifts and ors. Fold it into a single load or a load
6993 /// and a BSWAP if the targets supports it.
6994 ///
6995 /// Assuming little endian target:
6996 ///  i8 *a = ...
6997 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6998 /// =>
6999 ///  i32 val = *((i32)a)
7000 ///
7001 ///  i8 *a = ...
7002 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7003 /// =>
7004 ///  i32 val = BSWAP(*((i32)a))
7005 ///
7006 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7007 /// interact well with the worklist mechanism. When a part of the pattern is
7008 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7009 /// but the root node of the pattern which triggers the load combine is not
7010 /// necessarily a direct user of the changed node. For example, once the address
7011 /// of t28 load is reassociated load combine won't be triggered:
7012 ///             t25: i32 = add t4, Constant:i32<2>
7013 ///           t26: i64 = sign_extend t25
7014 ///        t27: i64 = add t2, t26
7015 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7016 ///     t29: i32 = zero_extend t28
7017 ///   t32: i32 = shl t29, Constant:i8<8>
7018 /// t33: i32 = or t23, t32
7019 /// As a possible fix visitLoad can check if the load can be a part of a load
7020 /// combine pattern and add corresponding OR roots to the worklist.
7021 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7022   assert(N->getOpcode() == ISD::OR &&
7023          "Can only match load combining against OR nodes");
7024
7025   // Handles simple types only
7026   EVT VT = N->getValueType(0);
7027   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7028     return SDValue();
7029   unsigned ByteWidth = VT.getSizeInBits() / 8;
7030
7031   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7032   auto MemoryByteOffset = [&] (ByteProvider P) {
7033     assert(P.isMemory() && "Must be a memory byte provider");
7034     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7035     assert(LoadBitWidth % 8 == 0 &&
7036            "can only analyze providers for individual bytes not bit");
7037     unsigned LoadByteWidth = LoadBitWidth / 8;
7038     return IsBigEndianTarget
7039             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
7040             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
7041   };
7042
7043   Optional<BaseIndexOffset> Base;
7044   SDValue Chain;
7045
7046   SmallPtrSet<LoadSDNode *, 8> Loads;
7047   Optional<ByteProvider> FirstByteProvider;
7048   int64_t FirstOffset = INT64_MAX;
7049
7050   // Check if all the bytes of the OR we are looking at are loaded from the same
7051   // base address. Collect bytes offsets from Base address in ByteOffsets.
7052   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7053   unsigned ZeroExtendedBytes = 0;
7054   for (int i = ByteWidth - 1; i >= 0; --i) {
7055     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7056     if (!P)
7057       return SDValue();
7058
7059     if (P->isConstantZero()) {
7060       // It's OK for the N most significant bytes to be 0, we can just
7061       // zero-extend the load.
7062       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7063         return SDValue();
7064       continue;
7065     }
7066     assert(P->isMemory() && "provenance should either be memory or zero");
7067
7068     LoadSDNode *L = P->Load;
7069     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7070            !L->isIndexed() &&
7071            "Must be enforced by calculateByteProvider");
7072     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7073
7074     // All loads must share the same chain
7075     SDValue LChain = L->getChain();
7076     if (!Chain)
7077       Chain = LChain;
7078     else if (Chain != LChain)
7079       return SDValue();
7080
7081     // Loads must share the same base address
7082     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7083     int64_t ByteOffsetFromBase = 0;
7084     if (!Base)
7085       Base = Ptr;
7086     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7087       return SDValue();
7088
7089     // Calculate the offset of the current byte from the base address
7090     ByteOffsetFromBase += MemoryByteOffset(*P);
7091     ByteOffsets[i] = ByteOffsetFromBase;
7092
7093     // Remember the first byte load
7094     if (ByteOffsetFromBase < FirstOffset) {
7095       FirstByteProvider = P;
7096       FirstOffset = ByteOffsetFromBase;
7097     }
7098
7099     Loads.insert(L);
7100   }
7101   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7102          "memory, so there must be at least one load which produces the value");
7103   assert(Base && "Base address of the accessed memory location must be set");
7104   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7105
7106   bool NeedsZext = ZeroExtendedBytes > 0;
7107
7108   EVT MemVT =
7109       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7110
7111   if (!MemVT.isSimple())
7112     return SDValue();
7113
7114   // Before legalize we can introduce too wide illegal loads which will be later
7115   // split into legal sized loads. This enables us to combine i64 load by i8
7116   // patterns to a couple of i32 loads on 32 bit targets.
7117   if (LegalOperations &&
7118       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7119                             MemVT))
7120     return SDValue();
7121
7122   // Check if the bytes of the OR we are looking at match with either big or
7123   // little endian value load
7124   Optional<bool> IsBigEndian = isBigEndian(
7125       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7126   if (!IsBigEndian.hasValue())
7127     return SDValue();
7128
7129   assert(FirstByteProvider && "must be set");
7130
7131   // Ensure that the first byte is loaded from zero offset of the first load.
7132   // So the combined value can be loaded from the first load address.
7133   if (MemoryByteOffset(*FirstByteProvider) != 0)
7134     return SDValue();
7135   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7136
7137   // The node we are looking at matches with the pattern, check if we can
7138   // replace it with a single (possibly zero-extended) load and bswap + shift if
7139   // needed.
7140
7141   // If the load needs byte swap check if the target supports it
7142   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7143
7144   // Before legalize we can introduce illegal bswaps which will be later
7145   // converted to an explicit bswap sequence. This way we end up with a single
7146   // load and byte shuffling instead of several loads and byte shuffling.
7147   // We do not introduce illegal bswaps when zero-extending as this tends to
7148   // introduce too many arithmetic instructions.
7149   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7150       !TLI.isOperationLegal(ISD::BSWAP, VT))
7151     return SDValue();
7152
7153   // If we need to bswap and zero extend, we have to insert a shift. Check that
7154   // it is legal.
7155   if (NeedsBswap && NeedsZext && LegalOperations &&
7156       !TLI.isOperationLegal(ISD::SHL, VT))
7157     return SDValue();
7158
7159   // Check that a load of the wide type is both allowed and fast on the target
7160   bool Fast = false;
7161   bool Allowed =
7162       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7163                              *FirstLoad->getMemOperand(), &Fast);
7164   if (!Allowed || !Fast)
7165     return SDValue();
7166
7167   SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7168                                    SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
7169                                    FirstLoad->getPointerInfo(), MemVT,
7170                                    FirstLoad->getAlignment());
7171
7172   // Transfer chain users from old loads to the new load.
7173   for (LoadSDNode *L : Loads)
7174     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7175
7176   if (!NeedsBswap)
7177     return NewLoad;
7178
7179   SDValue ShiftedLoad =
7180       NeedsZext
7181           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7182                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7183                                                    SDLoc(N), LegalOperations))
7184           : NewLoad;
7185   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7186 }
7187
7188 // If the target has andn, bsl, or a similar bit-select instruction,
7189 // we want to unfold masked merge, with canonical pattern of:
7190 //   |        A  |  |B|
7191 //   ((x ^ y) & m) ^ y
7192 //    |  D  |
7193 // Into:
7194 //   (x & m) | (y & ~m)
7195 // If y is a constant, and the 'andn' does not work with immediates,
7196 // we unfold into a different pattern:
7197 //   ~(~x & m) & (m | y)
7198 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7199 //       the very least that breaks andnpd / andnps patterns, and because those
7200 //       patterns are simplified in IR and shouldn't be created in the DAG
7201 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7202   assert(N->getOpcode() == ISD::XOR);
7203
7204   // Don't touch 'not' (i.e. where y = -1).
7205   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7206     return SDValue();
7207
7208   EVT VT = N->getValueType(0);
7209
7210   // There are 3 commutable operators in the pattern,
7211   // so we have to deal with 8 possible variants of the basic pattern.
7212   SDValue X, Y, M;
7213   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7214     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7215       return false;
7216     SDValue Xor = And.getOperand(XorIdx);
7217     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7218       return false;
7219     SDValue Xor0 = Xor.getOperand(0);
7220     SDValue Xor1 = Xor.getOperand(1);
7221     // Don't touch 'not' (i.e. where y = -1).
7222     if (isAllOnesOrAllOnesSplat(Xor1))
7223       return false;
7224     if (Other == Xor0)
7225       std::swap(Xor0, Xor1);
7226     if (Other != Xor1)
7227       return false;
7228     X = Xor0;
7229     Y = Xor1;
7230     M = And.getOperand(XorIdx ? 0 : 1);
7231     return true;
7232   };
7233
7234   SDValue N0 = N->getOperand(0);
7235   SDValue N1 = N->getOperand(1);
7236   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7237       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7238     return SDValue();
7239
7240   // Don't do anything if the mask is constant. This should not be reachable.
7241   // InstCombine should have already unfolded this pattern, and DAGCombiner
7242   // probably shouldn't produce it, too.
7243   if (isa<ConstantSDNode>(M.getNode()))
7244     return SDValue();
7245
7246   // We can transform if the target has AndNot
7247   if (!TLI.hasAndNot(M))
7248     return SDValue();
7249
7250   SDLoc DL(N);
7251
7252   // If Y is a constant, check that 'andn' works with immediates.
7253   if (!TLI.hasAndNot(Y)) {
7254     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7255     // If not, we need to do a bit more work to make sure andn is still used.
7256     SDValue NotX = DAG.getNOT(DL, X, VT);
7257     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7258     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7259     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7260     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7261   }
7262
7263   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7264   SDValue NotM = DAG.getNOT(DL, M, VT);
7265   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7266
7267   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7268 }
7269
7270 SDValue DAGCombiner::visitXOR(SDNode *N) {
7271   SDValue N0 = N->getOperand(0);
7272   SDValue N1 = N->getOperand(1);
7273   EVT VT = N0.getValueType();
7274
7275   // fold vector ops
7276   if (VT.isVector()) {
7277     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7278       return FoldedVOp;
7279
7280     // fold (xor x, 0) -> x, vector edition
7281     if (ISD::isBuildVectorAllZeros(N0.getNode()))
7282       return N1;
7283     if (ISD::isBuildVectorAllZeros(N1.getNode()))
7284       return N0;
7285   }
7286
7287   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7288   SDLoc DL(N);
7289   if (N0.isUndef() && N1.isUndef())
7290     return DAG.getConstant(0, DL, VT);
7291
7292   // fold (xor x, undef) -> undef
7293   if (N0.isUndef())
7294     return N0;
7295   if (N1.isUndef())
7296     return N1;
7297
7298   // fold (xor c1, c2) -> c1^c2
7299   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7300     return C;
7301
7302   // canonicalize constant to RHS
7303   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7304      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7305     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7306
7307   // fold (xor x, 0) -> x
7308   if (isNullConstant(N1))
7309     return N0;
7310
7311   if (SDValue NewSel = foldBinOpIntoSelect(N))
7312     return NewSel;
7313
7314   // reassociate xor
7315   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7316     return RXOR;
7317
7318   // fold !(x cc y) -> (x !cc y)
7319   unsigned N0Opcode = N0.getOpcode();
7320   SDValue LHS, RHS, CC;
7321   if (TLI.isConstTrueVal(N1.getNode()) &&
7322       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7323     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7324                                                LHS.getValueType());
7325     if (!LegalOperations ||
7326         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7327       switch (N0Opcode) {
7328       default:
7329         llvm_unreachable("Unhandled SetCC Equivalent!");
7330       case ISD::SETCC:
7331         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7332       case ISD::SELECT_CC:
7333         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7334                                N0.getOperand(3), NotCC);
7335       case ISD::STRICT_FSETCC:
7336       case ISD::STRICT_FSETCCS: {
7337         if (N0.hasOneUse()) {
7338           // FIXME Can we handle multiple uses? Could we token factor the chain
7339           // results from the new/old setcc?
7340           SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7341                                        N0.getOperand(0),
7342                                        N0Opcode == ISD::STRICT_FSETCCS);
7343           CombineTo(N, SetCC);
7344           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7345           recursivelyDeleteUnusedNodes(N0.getNode());
7346           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7347         }
7348         break;
7349       }
7350       }
7351     }
7352   }
7353
7354   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7355   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7356       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7357     SDValue V = N0.getOperand(0);
7358     SDLoc DL0(N0);
7359     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7360                     DAG.getConstant(1, DL0, V.getValueType()));
7361     AddToWorklist(V.getNode());
7362     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7363   }
7364
7365   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7366   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7367       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7368     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7369     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7370       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7371       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7372       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7373       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7374       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7375     }
7376   }
7377   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7378   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7379       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7380     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7381     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7382       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7383       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7384       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7385       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7386       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7387     }
7388   }
7389
7390   // fold (not (neg x)) -> (add X, -1)
7391   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7392   // Y is a constant or the subtract has a single use.
7393   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7394       isNullConstant(N0.getOperand(0))) {
7395     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7396                        DAG.getAllOnesConstant(DL, VT));
7397   }
7398
7399   // fold (not (add X, -1)) -> (neg X)
7400   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7401       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7402     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7403                        N0.getOperand(0));
7404   }
7405
7406   // fold (xor (and x, y), y) -> (and (not x), y)
7407   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7408     SDValue X = N0.getOperand(0);
7409     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7410     AddToWorklist(NotX.getNode());
7411     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7412   }
7413
7414   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7415     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7416     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7417     unsigned BitWidth = VT.getScalarSizeInBits();
7418     if (XorC && ShiftC) {
7419       // Don't crash on an oversized shift. We can not guarantee that a bogus
7420       // shift has been simplified to undef.
7421       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7422       if (ShiftAmt < BitWidth) {
7423         APInt Ones = APInt::getAllOnesValue(BitWidth);
7424         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7425         if (XorC->getAPIntValue() == Ones) {
7426           // If the xor constant is a shifted -1, do a 'not' before the shift:
7427           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7428           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7429           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7430           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7431         }
7432       }
7433     }
7434   }
7435
7436   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7437   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7438     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7439     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7440     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7441       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7442       SDValue S0 = S.getOperand(0);
7443       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7444         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7445         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7446           if (C->getAPIntValue() == (OpSizeInBits - 1))
7447             return DAG.getNode(ISD::ABS, DL, VT, S0);
7448       }
7449     }
7450   }
7451
7452   // fold (xor x, x) -> 0
7453   if (N0 == N1)
7454     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7455
7456   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7457   // Here is a concrete example of this equivalence:
7458   // i16   x ==  14
7459   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7460   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7461   //
7462   // =>
7463   //
7464   // i16     ~1      == 0b1111111111111110
7465   // i16 rol(~1, 14) == 0b1011111111111111
7466   //
7467   // Some additional tips to help conceptualize this transform:
7468   // - Try to see the operation as placing a single zero in a value of all ones.
7469   // - There exists no value for x which would allow the result to contain zero.
7470   // - Values of x larger than the bitwidth are undefined and do not require a
7471   //   consistent result.
7472   // - Pushing the zero left requires shifting one bits in from the right.
7473   // A rotate left of ~1 is a nice way of achieving the desired result.
7474   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7475       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7476     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7477                        N0.getOperand(1));
7478   }
7479
7480   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7481   if (N0Opcode == N1.getOpcode())
7482     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7483       return V;
7484
7485   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7486   if (SDValue MM = unfoldMaskedMerge(N))
7487     return MM;
7488
7489   // Simplify the expression using non-local knowledge.
7490   if (SimplifyDemandedBits(SDValue(N, 0)))
7491     return SDValue(N, 0);
7492
7493   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7494     return Combined;
7495
7496   return SDValue();
7497 }
7498
7499 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7500 /// shift-by-constant operand with identical opcode, we may be able to convert
7501 /// that into 2 independent shifts followed by the logic op. This is a
7502 /// throughput improvement.
7503 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7504   // Match a one-use bitwise logic op.
7505   SDValue LogicOp = Shift->getOperand(0);
7506   if (!LogicOp.hasOneUse())
7507     return SDValue();
7508
7509   unsigned LogicOpcode = LogicOp.getOpcode();
7510   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7511       LogicOpcode != ISD::XOR)
7512     return SDValue();
7513
7514   // Find a matching one-use shift by constant.
7515   unsigned ShiftOpcode = Shift->getOpcode();
7516   SDValue C1 = Shift->getOperand(1);
7517   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7518   assert(C1Node && "Expected a shift with constant operand");
7519   const APInt &C1Val = C1Node->getAPIntValue();
7520   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7521                              const APInt *&ShiftAmtVal) {
7522     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7523       return false;
7524
7525     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7526     if (!ShiftCNode)
7527       return false;
7528
7529     // Capture the shifted operand and shift amount value.
7530     ShiftOp = V.getOperand(0);
7531     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7532
7533     // Shift amount types do not have to match their operand type, so check that
7534     // the constants are the same width.
7535     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7536       return false;
7537
7538     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7539     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7540       return false;
7541
7542     return true;
7543   };
7544
7545   // Logic ops are commutative, so check each operand for a match.
7546   SDValue X, Y;
7547   const APInt *C0Val;
7548   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7549     Y = LogicOp.getOperand(1);
7550   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7551     Y = LogicOp.getOperand(0);
7552   else
7553     return SDValue();
7554
7555   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7556   SDLoc DL(Shift);
7557   EVT VT = Shift->getValueType(0);
7558   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7559   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7560   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7561   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7562   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7563 }
7564
7565 /// Handle transforms common to the three shifts, when the shift amount is a
7566 /// constant.
7567 /// We are looking for: (shift being one of shl/sra/srl)
7568 ///   shift (binop X, C0), C1
7569 /// And want to transform into:
7570 ///   binop (shift X, C1), (shift C0, C1)
7571 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7572   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7573
7574   // Do not turn a 'not' into a regular xor.
7575   if (isBitwiseNot(N->getOperand(0)))
7576     return SDValue();
7577
7578   // The inner binop must be one-use, since we want to replace it.
7579   SDValue LHS = N->getOperand(0);
7580   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7581     return SDValue();
7582
7583   // TODO: This is limited to early combining because it may reveal regressions
7584   //       otherwise. But since we just checked a target hook to see if this is
7585   //       desirable, that should have filtered out cases where this interferes
7586   //       with some other pattern matching.
7587   if (!LegalTypes)
7588     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7589       return R;
7590
7591   // We want to pull some binops through shifts, so that we have (and (shift))
7592   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7593   // thing happens with address calculations, so it's important to canonicalize
7594   // it.
7595   switch (LHS.getOpcode()) {
7596   default:
7597     return SDValue();
7598   case ISD::OR:
7599   case ISD::XOR:
7600   case ISD::AND:
7601     break;
7602   case ISD::ADD:
7603     if (N->getOpcode() != ISD::SHL)
7604       return SDValue(); // only shl(add) not sr[al](add).
7605     break;
7606   }
7607
7608   // We require the RHS of the binop to be a constant and not opaque as well.
7609   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7610   if (!BinOpCst)
7611     return SDValue();
7612
7613   // FIXME: disable this unless the input to the binop is a shift by a constant
7614   // or is copy/select. Enable this in other cases when figure out it's exactly
7615   // profitable.
7616   SDValue BinOpLHSVal = LHS.getOperand(0);
7617   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7618                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7619                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7620                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7621   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7622                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7623
7624   if (!IsShiftByConstant && !IsCopyOrSelect)
7625     return SDValue();
7626
7627   if (IsCopyOrSelect && N->hasOneUse())
7628     return SDValue();
7629
7630   // Fold the constants, shifting the binop RHS by the shift amount.
7631   SDLoc DL(N);
7632   EVT VT = N->getValueType(0);
7633   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7634                                N->getOperand(1));
7635   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7636
7637   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7638                                  N->getOperand(1));
7639   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7640 }
7641
7642 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7643   assert(N->getOpcode() == ISD::TRUNCATE);
7644   assert(N->getOperand(0).getOpcode() == ISD::AND);
7645
7646   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7647   EVT TruncVT = N->getValueType(0);
7648   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7649       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7650     SDValue N01 = N->getOperand(0).getOperand(1);
7651     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7652       SDLoc DL(N);
7653       SDValue N00 = N->getOperand(0).getOperand(0);
7654       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7655       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7656       AddToWorklist(Trunc00.getNode());
7657       AddToWorklist(Trunc01.getNode());
7658       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7659     }
7660   }
7661
7662   return SDValue();
7663 }
7664
7665 SDValue DAGCombiner::visitRotate(SDNode *N) {
7666   SDLoc dl(N);
7667   SDValue N0 = N->getOperand(0);
7668   SDValue N1 = N->getOperand(1);
7669   EVT VT = N->getValueType(0);
7670   unsigned Bitsize = VT.getScalarSizeInBits();
7671
7672   // fold (rot x, 0) -> x
7673   if (isNullOrNullSplat(N1))
7674     return N0;
7675
7676   // fold (rot x, c) -> x iff (c % BitSize) == 0
7677   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7678     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7679     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7680       return N0;
7681   }
7682
7683   // fold (rot x, c) -> (rot x, c % BitSize)
7684   bool OutOfRange = false;
7685   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
7686     OutOfRange |= C->getAPIntValue().uge(Bitsize);
7687     return true;
7688   };
7689   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
7690     EVT AmtVT = N1.getValueType();
7691     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
7692     if (SDValue Amt =
7693             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
7694       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
7695   }
7696
7697   // rot i16 X, 8 --> bswap X
7698   auto *RotAmtC = isConstOrConstSplat(N1);
7699   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
7700       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
7701     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
7702
7703   // Simplify the operands using demanded-bits information.
7704   if (SimplifyDemandedBits(SDValue(N, 0)))
7705     return SDValue(N, 0);
7706
7707   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7708   if (N1.getOpcode() == ISD::TRUNCATE &&
7709       N1.getOperand(0).getOpcode() == ISD::AND) {
7710     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7711       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7712   }
7713
7714   unsigned NextOp = N0.getOpcode();
7715   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7716   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7717     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7718     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7719     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7720       EVT ShiftVT = C1->getValueType(0);
7721       bool SameSide = (N->getOpcode() == NextOp);
7722       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7723       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7724               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7725         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7726         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7727             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
7728         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7729                            CombinedShiftNorm);
7730       }
7731     }
7732   }
7733   return SDValue();
7734 }
7735
7736 SDValue DAGCombiner::visitSHL(SDNode *N) {
7737   SDValue N0 = N->getOperand(0);
7738   SDValue N1 = N->getOperand(1);
7739   if (SDValue V = DAG.simplifyShift(N0, N1))
7740     return V;
7741
7742   EVT VT = N0.getValueType();
7743   EVT ShiftVT = N1.getValueType();
7744   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7745
7746   // fold vector ops
7747   if (VT.isVector()) {
7748     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7749       return FoldedVOp;
7750
7751     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7752     // If setcc produces all-one true value then:
7753     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7754     if (N1CV && N1CV->isConstant()) {
7755       if (N0.getOpcode() == ISD::AND) {
7756         SDValue N00 = N0->getOperand(0);
7757         SDValue N01 = N0->getOperand(1);
7758         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7759
7760         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7761             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7762                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7763           if (SDValue C =
7764                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
7765             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7766         }
7767       }
7768     }
7769   }
7770
7771   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7772
7773   // fold (shl c1, c2) -> c1<<c2
7774   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
7775     return C;
7776
7777   if (SDValue NewSel = foldBinOpIntoSelect(N))
7778     return NewSel;
7779
7780   // if (shl x, c) is known to be zero, return 0
7781   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7782                             APInt::getAllOnesValue(OpSizeInBits)))
7783     return DAG.getConstant(0, SDLoc(N), VT);
7784
7785   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7786   if (N1.getOpcode() == ISD::TRUNCATE &&
7787       N1.getOperand(0).getOpcode() == ISD::AND) {
7788     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7789       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7790   }
7791
7792   if (SimplifyDemandedBits(SDValue(N, 0)))
7793     return SDValue(N, 0);
7794
7795   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7796   if (N0.getOpcode() == ISD::SHL) {
7797     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7798                                           ConstantSDNode *RHS) {
7799       APInt c1 = LHS->getAPIntValue();
7800       APInt c2 = RHS->getAPIntValue();
7801       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7802       return (c1 + c2).uge(OpSizeInBits);
7803     };
7804     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7805       return DAG.getConstant(0, SDLoc(N), VT);
7806
7807     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7808                                        ConstantSDNode *RHS) {
7809       APInt c1 = LHS->getAPIntValue();
7810       APInt c2 = RHS->getAPIntValue();
7811       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7812       return (c1 + c2).ult(OpSizeInBits);
7813     };
7814     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7815       SDLoc DL(N);
7816       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7817       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7818     }
7819   }
7820
7821   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7822   // For this to be valid, the second form must not preserve any of the bits
7823   // that are shifted out by the inner shift in the first form.  This means
7824   // the outer shift size must be >= the number of bits added by the ext.
7825   // As a corollary, we don't care what kind of ext it is.
7826   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7827        N0.getOpcode() == ISD::ANY_EXTEND ||
7828        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7829       N0.getOperand(0).getOpcode() == ISD::SHL) {
7830     SDValue N0Op0 = N0.getOperand(0);
7831     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7832     EVT InnerVT = N0Op0.getValueType();
7833     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7834
7835     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7836                                                          ConstantSDNode *RHS) {
7837       APInt c1 = LHS->getAPIntValue();
7838       APInt c2 = RHS->getAPIntValue();
7839       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7840       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7841              (c1 + c2).uge(OpSizeInBits);
7842     };
7843     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7844                                   /*AllowUndefs*/ false,
7845                                   /*AllowTypeMismatch*/ true))
7846       return DAG.getConstant(0, SDLoc(N), VT);
7847
7848     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7849                                                       ConstantSDNode *RHS) {
7850       APInt c1 = LHS->getAPIntValue();
7851       APInt c2 = RHS->getAPIntValue();
7852       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7853       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7854              (c1 + c2).ult(OpSizeInBits);
7855     };
7856     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7857                                   /*AllowUndefs*/ false,
7858                                   /*AllowTypeMismatch*/ true)) {
7859       SDLoc DL(N);
7860       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7861       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7862       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7863       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7864     }
7865   }
7866
7867   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7868   // Only fold this if the inner zext has no other uses to avoid increasing
7869   // the total number of instructions.
7870   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7871       N0.getOperand(0).getOpcode() == ISD::SRL) {
7872     SDValue N0Op0 = N0.getOperand(0);
7873     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7874
7875     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7876       APInt c1 = LHS->getAPIntValue();
7877       APInt c2 = RHS->getAPIntValue();
7878       zeroExtendToMatch(c1, c2);
7879       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7880     };
7881     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7882                                   /*AllowUndefs*/ false,
7883                                   /*AllowTypeMismatch*/ true)) {
7884       SDLoc DL(N);
7885       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7886       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7887       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7888       AddToWorklist(NewSHL.getNode());
7889       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7890     }
7891   }
7892
7893   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7894   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7895   // TODO - support non-uniform vector shift amounts.
7896   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7897       N0->getFlags().hasExact()) {
7898     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7899       uint64_t C1 = N0C1->getZExtValue();
7900       uint64_t C2 = N1C->getZExtValue();
7901       SDLoc DL(N);
7902       if (C1 <= C2)
7903         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7904                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7905       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7906                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7907     }
7908   }
7909
7910   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7911   //                               (and (srl x, (sub c1, c2), MASK)
7912   // Only fold this if the inner shift has no other uses -- if it does, folding
7913   // this will increase the total number of instructions.
7914   // TODO - drop hasOneUse requirement if c1 == c2?
7915   // TODO - support non-uniform vector shift amounts.
7916   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7917       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7918     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7919       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7920         uint64_t c1 = N0C1->getZExtValue();
7921         uint64_t c2 = N1C->getZExtValue();
7922         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7923         SDValue Shift;
7924         if (c2 > c1) {
7925           Mask <<= c2 - c1;
7926           SDLoc DL(N);
7927           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7928                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7929         } else {
7930           Mask.lshrInPlace(c1 - c2);
7931           SDLoc DL(N);
7932           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7933                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7934         }
7935         SDLoc DL(N0);
7936         return DAG.getNode(ISD::AND, DL, VT, Shift,
7937                            DAG.getConstant(Mask, DL, VT));
7938       }
7939     }
7940   }
7941
7942   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7943   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7944       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7945     SDLoc DL(N);
7946     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7947     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7948     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7949   }
7950
7951   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7952   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7953   // Variant of version done on multiply, except mul by a power of 2 is turned
7954   // into a shift.
7955   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7956       N0.getNode()->hasOneUse() &&
7957       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7958       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7959       TLI.isDesirableToCommuteWithShift(N, Level)) {
7960     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7961     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7962     AddToWorklist(Shl0.getNode());
7963     AddToWorklist(Shl1.getNode());
7964     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7965   }
7966
7967   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7968   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7969       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7970       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7971     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7972     if (isConstantOrConstantVector(Shl))
7973       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7974   }
7975
7976   if (N1C && !N1C->isOpaque())
7977     if (SDValue NewSHL = visitShiftByConstant(N))
7978       return NewSHL;
7979
7980   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
7981   if (N0.getOpcode() == ISD::VSCALE)
7982     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
7983       auto DL = SDLoc(N);
7984       APInt C0 = N0.getConstantOperandAPInt(0);
7985       APInt C1 = NC1->getAPIntValue();
7986       return DAG.getVScale(DL, VT, C0 << C1);
7987     }
7988
7989   return SDValue();
7990 }
7991
7992 // Transform a right shift of a multiply into a multiply-high.
7993 // Examples:
7994 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
7995 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
7996 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
7997                                   const TargetLowering &TLI) {
7998   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
7999          "SRL or SRA node is required here!");
8000
8001   // Check the shift amount. Proceed with the transformation if the shift
8002   // amount is constant.
8003   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8004   if (!ShiftAmtSrc)
8005     return SDValue();
8006
8007   SDLoc DL(N);
8008
8009   // The operation feeding into the shift must be a multiply.
8010   SDValue ShiftOperand = N->getOperand(0);
8011   if (ShiftOperand.getOpcode() != ISD::MUL)
8012     return SDValue();
8013
8014   // Both operands must be equivalent extend nodes.
8015   SDValue LeftOp = ShiftOperand.getOperand(0);
8016   SDValue RightOp = ShiftOperand.getOperand(1);
8017   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8018   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8019
8020   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8021     return SDValue();
8022
8023   EVT WideVT1 = LeftOp.getValueType();
8024   EVT WideVT2 = RightOp.getValueType();
8025   (void)WideVT2;
8026   // Proceed with the transformation if the wide types match.
8027   assert((WideVT1 == WideVT2) &&
8028          "Cannot have a multiply node with two different operand types.");
8029
8030   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8031   // Check that the two extend nodes are the same type.
8032   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8033     return SDValue();
8034
8035   // Only transform into mulh if mulh for the narrow type is cheaper than
8036   // a multiply followed by a shift. This should also check if mulh is
8037   // legal for NarrowVT on the target.
8038   if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
8039       return SDValue();
8040
8041   // Proceed with the transformation if the wide type is twice as large
8042   // as the narrow type.
8043   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8044   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8045     return SDValue();
8046
8047   // Check the shift amount with the narrow type size.
8048   // Proceed with the transformation if the shift amount is the width
8049   // of the narrow type.
8050   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8051   if (ShiftAmt != NarrowVTSize)
8052     return SDValue();
8053
8054   // If the operation feeding into the MUL is a sign extend (sext),
8055   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8056   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8057
8058   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8059                                RightOp.getOperand(0));
8060   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8061                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8062 }
8063
8064 SDValue DAGCombiner::visitSRA(SDNode *N) {
8065   SDValue N0 = N->getOperand(0);
8066   SDValue N1 = N->getOperand(1);
8067   if (SDValue V = DAG.simplifyShift(N0, N1))
8068     return V;
8069
8070   EVT VT = N0.getValueType();
8071   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8072
8073   // Arithmetic shifting an all-sign-bit value is a no-op.
8074   // fold (sra 0, x) -> 0
8075   // fold (sra -1, x) -> -1
8076   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8077     return N0;
8078
8079   // fold vector ops
8080   if (VT.isVector())
8081     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8082       return FoldedVOp;
8083
8084   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8085
8086   // fold (sra c1, c2) -> (sra c1, c2)
8087   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8088     return C;
8089
8090   if (SDValue NewSel = foldBinOpIntoSelect(N))
8091     return NewSel;
8092
8093   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8094   // sext_inreg.
8095   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8096     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8097     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8098     if (VT.isVector())
8099       ExtVT = EVT::getVectorVT(*DAG.getContext(),
8100                                ExtVT, VT.getVectorNumElements());
8101     if (!LegalOperations ||
8102         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8103         TargetLowering::Legal)
8104       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8105                          N0.getOperand(0), DAG.getValueType(ExtVT));
8106   }
8107
8108   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8109   // clamp (add c1, c2) to max shift.
8110   if (N0.getOpcode() == ISD::SRA) {
8111     SDLoc DL(N);
8112     EVT ShiftVT = N1.getValueType();
8113     EVT ShiftSVT = ShiftVT.getScalarType();
8114     SmallVector<SDValue, 16> ShiftValues;
8115
8116     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8117       APInt c1 = LHS->getAPIntValue();
8118       APInt c2 = RHS->getAPIntValue();
8119       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8120       APInt Sum = c1 + c2;
8121       unsigned ShiftSum =
8122           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8123       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8124       return true;
8125     };
8126     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8127       SDValue ShiftValue;
8128       if (VT.isVector())
8129         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8130       else
8131         ShiftValue = ShiftValues[0];
8132       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8133     }
8134   }
8135
8136   // fold (sra (shl X, m), (sub result_size, n))
8137   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8138   // result_size - n != m.
8139   // If truncate is free for the target sext(shl) is likely to result in better
8140   // code.
8141   if (N0.getOpcode() == ISD::SHL && N1C) {
8142     // Get the two constanst of the shifts, CN0 = m, CN = n.
8143     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8144     if (N01C) {
8145       LLVMContext &Ctx = *DAG.getContext();
8146       // Determine what the truncate's result bitsize and type would be.
8147       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8148
8149       if (VT.isVector())
8150         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8151
8152       // Determine the residual right-shift amount.
8153       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8154
8155       // If the shift is not a no-op (in which case this should be just a sign
8156       // extend already), the truncated to type is legal, sign_extend is legal
8157       // on that type, and the truncate to that type is both legal and free,
8158       // perform the transform.
8159       if ((ShiftAmt > 0) &&
8160           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8161           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8162           TLI.isTruncateFree(VT, TruncVT)) {
8163         SDLoc DL(N);
8164         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8165             getShiftAmountTy(N0.getOperand(0).getValueType()));
8166         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8167                                     N0.getOperand(0), Amt);
8168         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8169                                     Shift);
8170         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8171                            N->getValueType(0), Trunc);
8172       }
8173     }
8174   }
8175
8176   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8177   //   sra (add (shl X, N1C), AddC), N1C -->
8178   //   sext (add (trunc X to (width - N1C)), AddC')
8179   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8180       N0.getOperand(0).getOpcode() == ISD::SHL &&
8181       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8182     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8183       SDValue Shl = N0.getOperand(0);
8184       // Determine what the truncate's type would be and ask the target if that
8185       // is a free operation.
8186       LLVMContext &Ctx = *DAG.getContext();
8187       unsigned ShiftAmt = N1C->getZExtValue();
8188       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8189       if (VT.isVector())
8190         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8191
8192       // TODO: The simple type check probably belongs in the default hook
8193       //       implementation and/or target-specific overrides (because
8194       //       non-simple types likely require masking when legalized), but that
8195       //       restriction may conflict with other transforms.
8196       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8197           TLI.isTruncateFree(VT, TruncVT)) {
8198         SDLoc DL(N);
8199         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8200         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8201                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8202         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8203         return DAG.getSExtOrTrunc(Add, DL, VT);
8204       }
8205     }
8206   }
8207
8208   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8209   if (N1.getOpcode() == ISD::TRUNCATE &&
8210       N1.getOperand(0).getOpcode() == ISD::AND) {
8211     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8212       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8213   }
8214
8215   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8216   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8217   //      if c1 is equal to the number of bits the trunc removes
8218   // TODO - support non-uniform vector shift amounts.
8219   if (N0.getOpcode() == ISD::TRUNCATE &&
8220       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8221        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8222       N0.getOperand(0).hasOneUse() &&
8223       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8224     SDValue N0Op0 = N0.getOperand(0);
8225     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8226       EVT LargeVT = N0Op0.getValueType();
8227       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8228       if (LargeShift->getAPIntValue() == TruncBits) {
8229         SDLoc DL(N);
8230         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8231                                       getShiftAmountTy(LargeVT));
8232         SDValue SRA =
8233             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8234         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8235       }
8236     }
8237   }
8238
8239   // Simplify, based on bits shifted out of the LHS.
8240   if (SimplifyDemandedBits(SDValue(N, 0)))
8241     return SDValue(N, 0);
8242
8243   // If the sign bit is known to be zero, switch this to a SRL.
8244   if (DAG.SignBitIsZero(N0))
8245     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8246
8247   if (N1C && !N1C->isOpaque())
8248     if (SDValue NewSRA = visitShiftByConstant(N))
8249       return NewSRA;
8250
8251   // Try to transform this shift into a multiply-high if
8252   // it matches the appropriate pattern detected in combineShiftToMULH.
8253   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8254     return MULH;
8255
8256   return SDValue();
8257 }
8258
8259 SDValue DAGCombiner::visitSRL(SDNode *N) {
8260   SDValue N0 = N->getOperand(0);
8261   SDValue N1 = N->getOperand(1);
8262   if (SDValue V = DAG.simplifyShift(N0, N1))
8263     return V;
8264
8265   EVT VT = N0.getValueType();
8266   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8267
8268   // fold vector ops
8269   if (VT.isVector())
8270     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8271       return FoldedVOp;
8272
8273   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8274
8275   // fold (srl c1, c2) -> c1 >>u c2
8276   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8277     return C;
8278
8279   if (SDValue NewSel = foldBinOpIntoSelect(N))
8280     return NewSel;
8281
8282   // if (srl x, c) is known to be zero, return 0
8283   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8284                                    APInt::getAllOnesValue(OpSizeInBits)))
8285     return DAG.getConstant(0, SDLoc(N), VT);
8286
8287   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8288   if (N0.getOpcode() == ISD::SRL) {
8289     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8290                                           ConstantSDNode *RHS) {
8291       APInt c1 = LHS->getAPIntValue();
8292       APInt c2 = RHS->getAPIntValue();
8293       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8294       return (c1 + c2).uge(OpSizeInBits);
8295     };
8296     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8297       return DAG.getConstant(0, SDLoc(N), VT);
8298
8299     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8300                                        ConstantSDNode *RHS) {
8301       APInt c1 = LHS->getAPIntValue();
8302       APInt c2 = RHS->getAPIntValue();
8303       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8304       return (c1 + c2).ult(OpSizeInBits);
8305     };
8306     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8307       SDLoc DL(N);
8308       EVT ShiftVT = N1.getValueType();
8309       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8310       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8311     }
8312   }
8313
8314   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8315       N0.getOperand(0).getOpcode() == ISD::SRL) {
8316     SDValue InnerShift = N0.getOperand(0);
8317     // TODO - support non-uniform vector shift amounts.
8318     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8319       uint64_t c1 = N001C->getZExtValue();
8320       uint64_t c2 = N1C->getZExtValue();
8321       EVT InnerShiftVT = InnerShift.getValueType();
8322       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8323       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8324       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8325       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8326       if (c1 + OpSizeInBits == InnerShiftSize) {
8327         SDLoc DL(N);
8328         if (c1 + c2 >= InnerShiftSize)
8329           return DAG.getConstant(0, DL, VT);
8330         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8331         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8332                                        InnerShift.getOperand(0), NewShiftAmt);
8333         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8334       }
8335       // In the more general case, we can clear the high bits after the shift:
8336       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8337       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8338           c1 + c2 < InnerShiftSize) {
8339         SDLoc DL(N);
8340         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8341         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8342                                        InnerShift.getOperand(0), NewShiftAmt);
8343         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8344                                                             OpSizeInBits - c2),
8345                                        DL, InnerShiftVT);
8346         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8347         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8348       }
8349     }
8350   }
8351
8352   // fold (srl (shl x, c), c) -> (and x, cst2)
8353   // TODO - (srl (shl x, c1), c2).
8354   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8355       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8356     SDLoc DL(N);
8357     SDValue Mask =
8358         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8359     AddToWorklist(Mask.getNode());
8360     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8361   }
8362
8363   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8364   // TODO - support non-uniform vector shift amounts.
8365   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8366     // Shifting in all undef bits?
8367     EVT SmallVT = N0.getOperand(0).getValueType();
8368     unsigned BitSize = SmallVT.getScalarSizeInBits();
8369     if (N1C->getAPIntValue().uge(BitSize))
8370       return DAG.getUNDEF(VT);
8371
8372     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8373       uint64_t ShiftAmt = N1C->getZExtValue();
8374       SDLoc DL0(N0);
8375       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8376                                        N0.getOperand(0),
8377                           DAG.getConstant(ShiftAmt, DL0,
8378                                           getShiftAmountTy(SmallVT)));
8379       AddToWorklist(SmallShift.getNode());
8380       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8381       SDLoc DL(N);
8382       return DAG.getNode(ISD::AND, DL, VT,
8383                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8384                          DAG.getConstant(Mask, DL, VT));
8385     }
8386   }
8387
8388   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8389   // bit, which is unmodified by sra.
8390   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8391     if (N0.getOpcode() == ISD::SRA)
8392       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8393   }
8394
8395   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8396   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8397       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8398     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8399
8400     // If any of the input bits are KnownOne, then the input couldn't be all
8401     // zeros, thus the result of the srl will always be zero.
8402     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8403
8404     // If all of the bits input the to ctlz node are known to be zero, then
8405     // the result of the ctlz is "32" and the result of the shift is one.
8406     APInt UnknownBits = ~Known.Zero;
8407     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8408
8409     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8410     if (UnknownBits.isPowerOf2()) {
8411       // Okay, we know that only that the single bit specified by UnknownBits
8412       // could be set on input to the CTLZ node. If this bit is set, the SRL
8413       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8414       // to an SRL/XOR pair, which is likely to simplify more.
8415       unsigned ShAmt = UnknownBits.countTrailingZeros();
8416       SDValue Op = N0.getOperand(0);
8417
8418       if (ShAmt) {
8419         SDLoc DL(N0);
8420         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8421                   DAG.getConstant(ShAmt, DL,
8422                                   getShiftAmountTy(Op.getValueType())));
8423         AddToWorklist(Op.getNode());
8424       }
8425
8426       SDLoc DL(N);
8427       return DAG.getNode(ISD::XOR, DL, VT,
8428                          Op, DAG.getConstant(1, DL, VT));
8429     }
8430   }
8431
8432   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8433   if (N1.getOpcode() == ISD::TRUNCATE &&
8434       N1.getOperand(0).getOpcode() == ISD::AND) {
8435     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8436       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8437   }
8438
8439   // fold operands of srl based on knowledge that the low bits are not
8440   // demanded.
8441   if (SimplifyDemandedBits(SDValue(N, 0)))
8442     return SDValue(N, 0);
8443
8444   if (N1C && !N1C->isOpaque())
8445     if (SDValue NewSRL = visitShiftByConstant(N))
8446       return NewSRL;
8447
8448   // Attempt to convert a srl of a load into a narrower zero-extending load.
8449   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8450     return NarrowLoad;
8451
8452   // Here is a common situation. We want to optimize:
8453   //
8454   //   %a = ...
8455   //   %b = and i32 %a, 2
8456   //   %c = srl i32 %b, 1
8457   //   brcond i32 %c ...
8458   //
8459   // into
8460   //
8461   //   %a = ...
8462   //   %b = and %a, 2
8463   //   %c = setcc eq %b, 0
8464   //   brcond %c ...
8465   //
8466   // However when after the source operand of SRL is optimized into AND, the SRL
8467   // itself may not be optimized further. Look for it and add the BRCOND into
8468   // the worklist.
8469   if (N->hasOneUse()) {
8470     SDNode *Use = *N->use_begin();
8471     if (Use->getOpcode() == ISD::BRCOND)
8472       AddToWorklist(Use);
8473     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8474       // Also look pass the truncate.
8475       Use = *Use->use_begin();
8476       if (Use->getOpcode() == ISD::BRCOND)
8477         AddToWorklist(Use);
8478     }
8479   }
8480
8481   // Try to transform this shift into a multiply-high if
8482   // it matches the appropriate pattern detected in combineShiftToMULH.
8483   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8484     return MULH;
8485
8486   return SDValue();
8487 }
8488
8489 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8490   EVT VT = N->getValueType(0);
8491   SDValue N0 = N->getOperand(0);
8492   SDValue N1 = N->getOperand(1);
8493   SDValue N2 = N->getOperand(2);
8494   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8495   unsigned BitWidth = VT.getScalarSizeInBits();
8496
8497   // fold (fshl N0, N1, 0) -> N0
8498   // fold (fshr N0, N1, 0) -> N1
8499   if (isPowerOf2_32(BitWidth))
8500     if (DAG.MaskedValueIsZero(
8501             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8502       return IsFSHL ? N0 : N1;
8503
8504   auto IsUndefOrZero = [](SDValue V) {
8505     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8506   };
8507
8508   // TODO - support non-uniform vector shift amounts.
8509   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8510     EVT ShAmtTy = N2.getValueType();
8511
8512     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8513     if (Cst->getAPIntValue().uge(BitWidth)) {
8514       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8515       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8516                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8517     }
8518
8519     unsigned ShAmt = Cst->getZExtValue();
8520     if (ShAmt == 0)
8521       return IsFSHL ? N0 : N1;
8522
8523     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8524     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8525     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8526     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8527     if (IsUndefOrZero(N0))
8528       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8529                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8530                                          SDLoc(N), ShAmtTy));
8531     if (IsUndefOrZero(N1))
8532       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8533                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8534                                          SDLoc(N), ShAmtTy));
8535
8536     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8537     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8538     // TODO - bigendian support once we have test coverage.
8539     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8540     // TODO - permit LHS EXTLOAD if extensions are shifted out.
8541     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8542         !DAG.getDataLayout().isBigEndian()) {
8543       auto *LHS = dyn_cast<LoadSDNode>(N0);
8544       auto *RHS = dyn_cast<LoadSDNode>(N1);
8545       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8546           LHS->getAddressSpace() == RHS->getAddressSpace() &&
8547           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8548           ISD::isNON_EXTLoad(LHS)) {
8549         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8550           SDLoc DL(RHS);
8551           uint64_t PtrOff =
8552               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8553           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8554           bool Fast = false;
8555           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8556                                      RHS->getAddressSpace(), NewAlign,
8557                                      RHS->getMemOperand()->getFlags(), &Fast) &&
8558               Fast) {
8559             SDValue NewPtr =
8560                 DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
8561             AddToWorklist(NewPtr.getNode());
8562             SDValue Load = DAG.getLoad(
8563                 VT, DL, RHS->getChain(), NewPtr,
8564                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8565                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8566             // Replace the old load's chain with the new load's chain.
8567             WorklistRemover DeadNodes(*this);
8568             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8569             return Load;
8570           }
8571         }
8572       }
8573     }
8574   }
8575
8576   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8577   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8578   // iff We know the shift amount is in range.
8579   // TODO: when is it worth doing SUB(BW, N2) as well?
8580   if (isPowerOf2_32(BitWidth)) {
8581     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8582     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8583       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8584     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8585       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8586   }
8587
8588   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8589   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8590   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8591   // is legal as well we might be better off avoiding non-constant (BW - N2).
8592   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8593   if (N0 == N1 && hasOperation(RotOpc, VT))
8594     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8595
8596   // Simplify, based on bits shifted out of N0/N1.
8597   if (SimplifyDemandedBits(SDValue(N, 0)))
8598     return SDValue(N, 0);
8599
8600   return SDValue();
8601 }
8602
8603 SDValue DAGCombiner::visitABS(SDNode *N) {
8604   SDValue N0 = N->getOperand(0);
8605   EVT VT = N->getValueType(0);
8606
8607   // fold (abs c1) -> c2
8608   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8609     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8610   // fold (abs (abs x)) -> (abs x)
8611   if (N0.getOpcode() == ISD::ABS)
8612     return N0;
8613   // fold (abs x) -> x iff not-negative
8614   if (DAG.SignBitIsZero(N0))
8615     return N0;
8616   return SDValue();
8617 }
8618
8619 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8620   SDValue N0 = N->getOperand(0);
8621   EVT VT = N->getValueType(0);
8622
8623   // fold (bswap c1) -> c2
8624   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8625     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8626   // fold (bswap (bswap x)) -> x
8627   if (N0.getOpcode() == ISD::BSWAP)
8628     return N0->getOperand(0);
8629   return SDValue();
8630 }
8631
8632 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8633   SDValue N0 = N->getOperand(0);
8634   EVT VT = N->getValueType(0);
8635
8636   // fold (bitreverse c1) -> c2
8637   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8638     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8639   // fold (bitreverse (bitreverse x)) -> x
8640   if (N0.getOpcode() == ISD::BITREVERSE)
8641     return N0.getOperand(0);
8642   return SDValue();
8643 }
8644
8645 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8646   SDValue N0 = N->getOperand(0);
8647   EVT VT = N->getValueType(0);
8648
8649   // fold (ctlz c1) -> c2
8650   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8651     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8652
8653   // If the value is known never to be zero, switch to the undef version.
8654   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8655     if (DAG.isKnownNeverZero(N0))
8656       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8657   }
8658
8659   return SDValue();
8660 }
8661
8662 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8663   SDValue N0 = N->getOperand(0);
8664   EVT VT = N->getValueType(0);
8665
8666   // fold (ctlz_zero_undef c1) -> c2
8667   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8668     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8669   return SDValue();
8670 }
8671
8672 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8673   SDValue N0 = N->getOperand(0);
8674   EVT VT = N->getValueType(0);
8675
8676   // fold (cttz c1) -> c2
8677   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8678     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8679
8680   // If the value is known never to be zero, switch to the undef version.
8681   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8682     if (DAG.isKnownNeverZero(N0))
8683       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8684   }
8685
8686   return SDValue();
8687 }
8688
8689 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8690   SDValue N0 = N->getOperand(0);
8691   EVT VT = N->getValueType(0);
8692
8693   // fold (cttz_zero_undef c1) -> c2
8694   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8695     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8696   return SDValue();
8697 }
8698
8699 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8700   SDValue N0 = N->getOperand(0);
8701   EVT VT = N->getValueType(0);
8702
8703   // fold (ctpop c1) -> c2
8704   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8705     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8706   return SDValue();
8707 }
8708
8709 // FIXME: This should be checking for no signed zeros on individual operands, as
8710 // well as no nans.
8711 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8712                                          SDValue RHS,
8713                                          const TargetLowering &TLI) {
8714   const TargetOptions &Options = DAG.getTarget().Options;
8715   EVT VT = LHS.getValueType();
8716
8717   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8718          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8719          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8720 }
8721
8722 /// Generate Min/Max node
8723 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8724                                    SDValue RHS, SDValue True, SDValue False,
8725                                    ISD::CondCode CC, const TargetLowering &TLI,
8726                                    SelectionDAG &DAG) {
8727   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8728     return SDValue();
8729
8730   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8731   switch (CC) {
8732   case ISD::SETOLT:
8733   case ISD::SETOLE:
8734   case ISD::SETLT:
8735   case ISD::SETLE:
8736   case ISD::SETULT:
8737   case ISD::SETULE: {
8738     // Since it's known never nan to get here already, either fminnum or
8739     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8740     // expanded in terms of it.
8741     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8742     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8743       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8744
8745     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8746     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8747       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8748     return SDValue();
8749   }
8750   case ISD::SETOGT:
8751   case ISD::SETOGE:
8752   case ISD::SETGT:
8753   case ISD::SETGE:
8754   case ISD::SETUGT:
8755   case ISD::SETUGE: {
8756     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8757     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8758       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8759
8760     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8761     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8762       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8763     return SDValue();
8764   }
8765   default:
8766     return SDValue();
8767   }
8768 }
8769
8770 /// If a (v)select has a condition value that is a sign-bit test, try to smear
8771 /// the condition operand sign-bit across the value width and use it as a mask.
8772 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8773   SDValue Cond = N->getOperand(0);
8774   SDValue C1 = N->getOperand(1);
8775   SDValue C2 = N->getOperand(2);
8776   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
8777          "Expected select-of-constants");
8778
8779   EVT VT = N->getValueType(0);
8780   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8781       VT != Cond.getOperand(0).getValueType())
8782     return SDValue();
8783
8784   // The inverted-condition + commuted-select variants of these patterns are
8785   // canonicalized to these forms in IR.
8786   SDValue X = Cond.getOperand(0);
8787   SDValue CondC = Cond.getOperand(1);
8788   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8789   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8790       isAllOnesOrAllOnesSplat(C2)) {
8791     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8792     SDLoc DL(N);
8793     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8794     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8795     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8796   }
8797   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8798     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8799     SDLoc DL(N);
8800     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8801     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8802     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8803   }
8804   return SDValue();
8805 }
8806
8807 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8808   SDValue Cond = N->getOperand(0);
8809   SDValue N1 = N->getOperand(1);
8810   SDValue N2 = N->getOperand(2);
8811   EVT VT = N->getValueType(0);
8812   EVT CondVT = Cond.getValueType();
8813   SDLoc DL(N);
8814
8815   if (!VT.isInteger())
8816     return SDValue();
8817
8818   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8819   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8820   if (!C1 || !C2)
8821     return SDValue();
8822
8823   // Only do this before legalization to avoid conflicting with target-specific
8824   // transforms in the other direction (create a select from a zext/sext). There
8825   // is also a target-independent combine here in DAGCombiner in the other
8826   // direction for (select Cond, -1, 0) when the condition is not i1.
8827   if (CondVT == MVT::i1 && !LegalOperations) {
8828     if (C1->isNullValue() && C2->isOne()) {
8829       // select Cond, 0, 1 --> zext (!Cond)
8830       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8831       if (VT != MVT::i1)
8832         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8833       return NotCond;
8834     }
8835     if (C1->isNullValue() && C2->isAllOnesValue()) {
8836       // select Cond, 0, -1 --> sext (!Cond)
8837       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8838       if (VT != MVT::i1)
8839         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8840       return NotCond;
8841     }
8842     if (C1->isOne() && C2->isNullValue()) {
8843       // select Cond, 1, 0 --> zext (Cond)
8844       if (VT != MVT::i1)
8845         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8846       return Cond;
8847     }
8848     if (C1->isAllOnesValue() && C2->isNullValue()) {
8849       // select Cond, -1, 0 --> sext (Cond)
8850       if (VT != MVT::i1)
8851         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8852       return Cond;
8853     }
8854
8855     // Use a target hook because some targets may prefer to transform in the
8856     // other direction.
8857     if (TLI.convertSelectOfConstantsToMath(VT)) {
8858       // For any constants that differ by 1, we can transform the select into an
8859       // extend and add.
8860       const APInt &C1Val = C1->getAPIntValue();
8861       const APInt &C2Val = C2->getAPIntValue();
8862       if (C1Val - 1 == C2Val) {
8863         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8864         if (VT != MVT::i1)
8865           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8866         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8867       }
8868       if (C1Val + 1 == C2Val) {
8869         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8870         if (VT != MVT::i1)
8871           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8872         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8873       }
8874
8875       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
8876       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
8877         if (VT != MVT::i1)
8878           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8879         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
8880         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
8881       }
8882
8883       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8884         return V;
8885     }
8886
8887     return SDValue();
8888   }
8889
8890   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8891   // We can't do this reliably if integer based booleans have different contents
8892   // to floating point based booleans. This is because we can't tell whether we
8893   // have an integer-based boolean or a floating-point-based boolean unless we
8894   // can find the SETCC that produced it and inspect its operands. This is
8895   // fairly easy if C is the SETCC node, but it can potentially be
8896   // undiscoverable (or not reasonably discoverable). For example, it could be
8897   // in another basic block or it could require searching a complicated
8898   // expression.
8899   if (CondVT.isInteger() &&
8900       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8901           TargetLowering::ZeroOrOneBooleanContent &&
8902       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8903           TargetLowering::ZeroOrOneBooleanContent &&
8904       C1->isNullValue() && C2->isOne()) {
8905     SDValue NotCond =
8906         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8907     if (VT.bitsEq(CondVT))
8908       return NotCond;
8909     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8910   }
8911
8912   return SDValue();
8913 }
8914
8915 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8916   SDValue N0 = N->getOperand(0);
8917   SDValue N1 = N->getOperand(1);
8918   SDValue N2 = N->getOperand(2);
8919   EVT VT = N->getValueType(0);
8920   EVT VT0 = N0.getValueType();
8921   SDLoc DL(N);
8922   SDNodeFlags Flags = N->getFlags();
8923
8924   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8925     return V;
8926
8927   // fold (select X, X, Y) -> (or X, Y)
8928   // fold (select X, 1, Y) -> (or C, Y)
8929   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8930     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8931
8932   if (SDValue V = foldSelectOfConstants(N))
8933     return V;
8934
8935   // fold (select C, 0, X) -> (and (not C), X)
8936   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8937     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8938     AddToWorklist(NOTNode.getNode());
8939     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8940   }
8941   // fold (select C, X, 1) -> (or (not C), X)
8942   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8943     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8944     AddToWorklist(NOTNode.getNode());
8945     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8946   }
8947   // fold (select X, Y, X) -> (and X, Y)
8948   // fold (select X, Y, 0) -> (and X, Y)
8949   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8950     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8951
8952   // If we can fold this based on the true/false value, do so.
8953   if (SimplifySelectOps(N, N1, N2))
8954     return SDValue(N, 0); // Don't revisit N.
8955
8956   if (VT0 == MVT::i1) {
8957     // The code in this block deals with the following 2 equivalences:
8958     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8959     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8960     // The target can specify its preferred form with the
8961     // shouldNormalizeToSelectSequence() callback. However we always transform
8962     // to the right anyway if we find the inner select exists in the DAG anyway
8963     // and we always transform to the left side if we know that we can further
8964     // optimize the combination of the conditions.
8965     bool normalizeToSequence =
8966         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8967     // select (and Cond0, Cond1), X, Y
8968     //   -> select Cond0, (select Cond1, X, Y), Y
8969     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8970       SDValue Cond0 = N0->getOperand(0);
8971       SDValue Cond1 = N0->getOperand(1);
8972       SDValue InnerSelect =
8973           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8974       if (normalizeToSequence || !InnerSelect.use_empty())
8975         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8976                            InnerSelect, N2, Flags);
8977       // Cleanup on failure.
8978       if (InnerSelect.use_empty())
8979         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8980     }
8981     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8982     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8983       SDValue Cond0 = N0->getOperand(0);
8984       SDValue Cond1 = N0->getOperand(1);
8985       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8986                                         Cond1, N1, N2, Flags);
8987       if (normalizeToSequence || !InnerSelect.use_empty())
8988         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8989                            InnerSelect, Flags);
8990       // Cleanup on failure.
8991       if (InnerSelect.use_empty())
8992         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8993     }
8994
8995     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8996     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8997       SDValue N1_0 = N1->getOperand(0);
8998       SDValue N1_1 = N1->getOperand(1);
8999       SDValue N1_2 = N1->getOperand(2);
9000       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9001         // Create the actual and node if we can generate good code for it.
9002         if (!normalizeToSequence) {
9003           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9004           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9005                              N2, Flags);
9006         }
9007         // Otherwise see if we can optimize the "and" to a better pattern.
9008         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9009           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9010                              N2, Flags);
9011         }
9012       }
9013     }
9014     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9015     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9016       SDValue N2_0 = N2->getOperand(0);
9017       SDValue N2_1 = N2->getOperand(1);
9018       SDValue N2_2 = N2->getOperand(2);
9019       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9020         // Create the actual or node if we can generate good code for it.
9021         if (!normalizeToSequence) {
9022           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9023           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9024                              N2_2, Flags);
9025         }
9026         // Otherwise see if we can optimize to a better pattern.
9027         if (SDValue Combined = visitORLike(N0, N2_0, N))
9028           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9029                              N2_2, Flags);
9030       }
9031     }
9032   }
9033
9034   // select (not Cond), N1, N2 -> select Cond, N2, N1
9035   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9036     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9037     SelectOp->setFlags(Flags);
9038     return SelectOp;
9039   }
9040
9041   // Fold selects based on a setcc into other things, such as min/max/abs.
9042   if (N0.getOpcode() == ISD::SETCC) {
9043     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9044     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9045
9046     // select (fcmp lt x, y), x, y -> fminnum x, y
9047     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9048     //
9049     // This is OK if we don't care what happens if either operand is a NaN.
9050     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9051       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9052                                                 CC, TLI, DAG))
9053         return FMinMax;
9054
9055     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9056     // This is conservatively limited to pre-legal-operations to give targets
9057     // a chance to reverse the transform if they want to do that. Also, it is
9058     // unlikely that the pattern would be formed late, so it's probably not
9059     // worth going through the other checks.
9060     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9061         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9062         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9063       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9064       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9065       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9066         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9067         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9068         //
9069         // The IR equivalent of this transform would have this form:
9070         //   %a = add %x, C
9071         //   %c = icmp ugt %x, ~C
9072         //   %r = select %c, -1, %a
9073         //   =>
9074         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9075         //   %u0 = extractvalue %u, 0
9076         //   %u1 = extractvalue %u, 1
9077         //   %r = select %u1, -1, %u0
9078         SDVTList VTs = DAG.getVTList(VT, VT0);
9079         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9080         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9081       }
9082     }
9083
9084     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9085         (!LegalOperations &&
9086          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9087       // Any flags available in a select/setcc fold will be on the setcc as they
9088       // migrated from fcmp
9089       Flags = N0.getNode()->getFlags();
9090       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9091                                        N2, N0.getOperand(2));
9092       SelectNode->setFlags(Flags);
9093       return SelectNode;
9094     }
9095
9096     return SimplifySelect(DL, N0, N1, N2);
9097   }
9098
9099   return SDValue();
9100 }
9101
9102 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9103 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9104 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9105   SDLoc DL(N);
9106   SDValue Cond = N->getOperand(0);
9107   SDValue LHS = N->getOperand(1);
9108   SDValue RHS = N->getOperand(2);
9109   EVT VT = N->getValueType(0);
9110   int NumElems = VT.getVectorNumElements();
9111   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9112          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9113          Cond.getOpcode() == ISD::BUILD_VECTOR);
9114
9115   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9116   // binary ones here.
9117   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9118     return SDValue();
9119
9120   // We're sure we have an even number of elements due to the
9121   // concat_vectors we have as arguments to vselect.
9122   // Skip BV elements until we find one that's not an UNDEF
9123   // After we find an UNDEF element, keep looping until we get to half the
9124   // length of the BV and see if all the non-undef nodes are the same.
9125   ConstantSDNode *BottomHalf = nullptr;
9126   for (int i = 0; i < NumElems / 2; ++i) {
9127     if (Cond->getOperand(i)->isUndef())
9128       continue;
9129
9130     if (BottomHalf == nullptr)
9131       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9132     else if (Cond->getOperand(i).getNode() != BottomHalf)
9133       return SDValue();
9134   }
9135
9136   // Do the same for the second half of the BuildVector
9137   ConstantSDNode *TopHalf = nullptr;
9138   for (int i = NumElems / 2; i < NumElems; ++i) {
9139     if (Cond->getOperand(i)->isUndef())
9140       continue;
9141
9142     if (TopHalf == nullptr)
9143       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9144     else if (Cond->getOperand(i).getNode() != TopHalf)
9145       return SDValue();
9146   }
9147
9148   assert(TopHalf && BottomHalf &&
9149          "One half of the selector was all UNDEFs and the other was all the "
9150          "same value. This should have been addressed before this function.");
9151   return DAG.getNode(
9152       ISD::CONCAT_VECTORS, DL, VT,
9153       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9154       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9155 }
9156
9157 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9158   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9159   SDValue Mask = MSC->getMask();
9160   SDValue Chain = MSC->getChain();
9161   SDLoc DL(N);
9162
9163   // Zap scatters with a zero mask.
9164   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9165     return Chain;
9166
9167   return SDValue();
9168 }
9169
9170 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9171   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9172   SDValue Mask = MST->getMask();
9173   SDValue Chain = MST->getChain();
9174   SDLoc DL(N);
9175
9176   // Zap masked stores with a zero mask.
9177   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9178     return Chain;
9179
9180   // Try transforming N to an indexed store.
9181   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9182     return SDValue(N, 0);
9183
9184   return SDValue();
9185 }
9186
9187 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9188   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9189   SDValue Mask = MGT->getMask();
9190   SDLoc DL(N);
9191
9192   // Zap gathers with a zero mask.
9193   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9194     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
9195
9196   return SDValue();
9197 }
9198
9199 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9200   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9201   SDValue Mask = MLD->getMask();
9202   SDLoc DL(N);
9203
9204   // Zap masked loads with a zero mask.
9205   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9206     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9207
9208   // Try transforming N to an indexed load.
9209   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9210     return SDValue(N, 0);
9211
9212   return SDValue();
9213 }
9214
9215 /// A vector select of 2 constant vectors can be simplified to math/logic to
9216 /// avoid a variable select instruction and possibly avoid constant loads.
9217 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9218   SDValue Cond = N->getOperand(0);
9219   SDValue N1 = N->getOperand(1);
9220   SDValue N2 = N->getOperand(2);
9221   EVT VT = N->getValueType(0);
9222   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9223       !TLI.convertSelectOfConstantsToMath(VT) ||
9224       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9225       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9226     return SDValue();
9227
9228   // Check if we can use the condition value to increment/decrement a single
9229   // constant value. This simplifies a select to an add and removes a constant
9230   // load/materialization from the general case.
9231   bool AllAddOne = true;
9232   bool AllSubOne = true;
9233   unsigned Elts = VT.getVectorNumElements();
9234   for (unsigned i = 0; i != Elts; ++i) {
9235     SDValue N1Elt = N1.getOperand(i);
9236     SDValue N2Elt = N2.getOperand(i);
9237     if (N1Elt.isUndef() || N2Elt.isUndef())
9238       continue;
9239     if (N1Elt.getValueType() != N2Elt.getValueType())
9240       continue;
9241
9242     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9243     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9244     if (C1 != C2 + 1)
9245       AllAddOne = false;
9246     if (C1 != C2 - 1)
9247       AllSubOne = false;
9248   }
9249
9250   // Further simplifications for the extra-special cases where the constants are
9251   // all 0 or all -1 should be implemented as folds of these patterns.
9252   SDLoc DL(N);
9253   if (AllAddOne || AllSubOne) {
9254     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9255     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9256     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9257     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9258     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9259   }
9260
9261   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9262   APInt Pow2C;
9263   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9264       isNullOrNullSplat(N2)) {
9265     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9266     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9267     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9268   }
9269
9270   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9271     return V;
9272
9273   // The general case for select-of-constants:
9274   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9275   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9276   // leave that to a machine-specific pass.
9277   return SDValue();
9278 }
9279
9280 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9281   SDValue N0 = N->getOperand(0);
9282   SDValue N1 = N->getOperand(1);
9283   SDValue N2 = N->getOperand(2);
9284   EVT VT = N->getValueType(0);
9285   SDLoc DL(N);
9286
9287   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9288     return V;
9289
9290   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9291   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9292     return DAG.getSelect(DL, VT, F, N2, N1);
9293
9294   // Canonicalize integer abs.
9295   // vselect (setg[te] X,  0),  X, -X ->
9296   // vselect (setgt    X, -1),  X, -X ->
9297   // vselect (setl[te] X,  0), -X,  X ->
9298   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9299   if (N0.getOpcode() == ISD::SETCC) {
9300     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9301     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9302     bool isAbs = false;
9303     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9304
9305     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9306          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9307         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9308       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9309     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9310              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9311       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9312
9313     if (isAbs) {
9314       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9315         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9316
9317       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9318                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9319                                                   DL, getShiftAmountTy(VT)));
9320       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9321       AddToWorklist(Shift.getNode());
9322       AddToWorklist(Add.getNode());
9323       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9324     }
9325
9326     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9327     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9328     //
9329     // This is OK if we don't care about what happens if either operand is a
9330     // NaN.
9331     //
9332     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9333       if (SDValue FMinMax =
9334               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9335         return FMinMax;
9336     }
9337
9338     // If this select has a condition (setcc) with narrower operands than the
9339     // select, try to widen the compare to match the select width.
9340     // TODO: This should be extended to handle any constant.
9341     // TODO: This could be extended to handle non-loading patterns, but that
9342     //       requires thorough testing to avoid regressions.
9343     if (isNullOrNullSplat(RHS)) {
9344       EVT NarrowVT = LHS.getValueType();
9345       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9346       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9347       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9348       unsigned WideWidth = WideVT.getScalarSizeInBits();
9349       bool IsSigned = isSignedIntSetCC(CC);
9350       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9351       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9352           SetCCWidth != 1 && SetCCWidth < WideWidth &&
9353           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9354           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9355         // Both compare operands can be widened for free. The LHS can use an
9356         // extended load, and the RHS is a constant:
9357         //   vselect (ext (setcc load(X), C)), N1, N2 -->
9358         //   vselect (setcc extload(X), C'), N1, N2
9359         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9360         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9361         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9362         EVT WideSetCCVT = getSetCCResultType(WideVT);
9363         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9364         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9365       }
9366     }
9367   }
9368
9369   if (SimplifySelectOps(N, N1, N2))
9370     return SDValue(N, 0);  // Don't revisit N.
9371
9372   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9373   if (ISD::isBuildVectorAllOnes(N0.getNode()))
9374     return N1;
9375   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9376   if (ISD::isBuildVectorAllZeros(N0.getNode()))
9377     return N2;
9378
9379   // The ConvertSelectToConcatVector function is assuming both the above
9380   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9381   // and addressed.
9382   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9383       N2.getOpcode() == ISD::CONCAT_VECTORS &&
9384       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9385     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9386       return CV;
9387   }
9388
9389   if (SDValue V = foldVSelectOfConstants(N))
9390     return V;
9391
9392   return SDValue();
9393 }
9394
9395 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
9396   SDValue N0 = N->getOperand(0);
9397   SDValue N1 = N->getOperand(1);
9398   SDValue N2 = N->getOperand(2);
9399   SDValue N3 = N->getOperand(3);
9400   SDValue N4 = N->getOperand(4);
9401   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9402
9403   // fold select_cc lhs, rhs, x, x, cc -> x
9404   if (N2 == N3)
9405     return N2;
9406
9407   // Determine if the condition we're dealing with is constant
9408   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9409                                   CC, SDLoc(N), false)) {
9410     AddToWorklist(SCC.getNode());
9411
9412     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9413       if (!SCCC->isNullValue())
9414         return N2;    // cond always true -> true val
9415       else
9416         return N3;    // cond always false -> false val
9417     } else if (SCC->isUndef()) {
9418       // When the condition is UNDEF, just return the first operand. This is
9419       // coherent the DAG creation, no setcc node is created in this case
9420       return N2;
9421     } else if (SCC.getOpcode() == ISD::SETCC) {
9422       // Fold to a simpler select_cc
9423       SDValue SelectOp = DAG.getNode(
9424           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9425           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9426       SelectOp->setFlags(SCC->getFlags());
9427       return SelectOp;
9428     }
9429   }
9430
9431   // If we can fold this based on the true/false value, do so.
9432   if (SimplifySelectOps(N, N2, N3))
9433     return SDValue(N, 0);  // Don't revisit N.
9434
9435   // fold select_cc into other things, such as min/max/abs
9436   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9437 }
9438
9439 SDValue DAGCombiner::visitSETCC(SDNode *N) {
9440   // setcc is very commonly used as an argument to brcond. This pattern
9441   // also lend itself to numerous combines and, as a result, it is desired
9442   // we keep the argument to a brcond as a setcc as much as possible.
9443   bool PreferSetCC =
9444       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9445
9446   SDValue Combined = SimplifySetCC(
9447       N->getValueType(0), N->getOperand(0), N->getOperand(1),
9448       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9449
9450   if (!Combined)
9451     return SDValue();
9452
9453   // If we prefer to have a setcc, and we don't, we'll try our best to
9454   // recreate one using rebuildSetCC.
9455   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9456     SDValue NewSetCC = rebuildSetCC(Combined);
9457
9458     // We don't have anything interesting to combine to.
9459     if (NewSetCC.getNode() == N)
9460       return SDValue();
9461
9462     if (NewSetCC)
9463       return NewSetCC;
9464   }
9465
9466   return Combined;
9467 }
9468
9469 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9470   SDValue LHS = N->getOperand(0);
9471   SDValue RHS = N->getOperand(1);
9472   SDValue Carry = N->getOperand(2);
9473   SDValue Cond = N->getOperand(3);
9474
9475   // If Carry is false, fold to a regular SETCC.
9476   if (isNullConstant(Carry))
9477     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9478
9479   return SDValue();
9480 }
9481
9482 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9483 /// a build_vector of constants.
9484 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9485 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9486 /// Vector extends are not folded if operations are legal; this is to
9487 /// avoid introducing illegal build_vector dag nodes.
9488 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9489                                          SelectionDAG &DAG, bool LegalTypes) {
9490   unsigned Opcode = N->getOpcode();
9491   SDValue N0 = N->getOperand(0);
9492   EVT VT = N->getValueType(0);
9493   SDLoc DL(N);
9494
9495   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9496          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9497          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9498          && "Expected EXTEND dag node in input!");
9499
9500   // fold (sext c1) -> c1
9501   // fold (zext c1) -> c1
9502   // fold (aext c1) -> c1
9503   if (isa<ConstantSDNode>(N0))
9504     return DAG.getNode(Opcode, DL, VT, N0);
9505
9506   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9507   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9508   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9509   if (N0->getOpcode() == ISD::SELECT) {
9510     SDValue Op1 = N0->getOperand(1);
9511     SDValue Op2 = N0->getOperand(2);
9512     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9513         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9514       // For any_extend, choose sign extension of the constants to allow a
9515       // possible further transform to sign_extend_inreg.i.e.
9516       //
9517       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9518       // t2: i64 = any_extend t1
9519       // -->
9520       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9521       // -->
9522       // t4: i64 = sign_extend_inreg t3
9523       unsigned FoldOpc = Opcode;
9524       if (FoldOpc == ISD::ANY_EXTEND)
9525         FoldOpc = ISD::SIGN_EXTEND;
9526       return DAG.getSelect(DL, VT, N0->getOperand(0),
9527                            DAG.getNode(FoldOpc, DL, VT, Op1),
9528                            DAG.getNode(FoldOpc, DL, VT, Op2));
9529     }
9530   }
9531
9532   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9533   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9534   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9535   EVT SVT = VT.getScalarType();
9536   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9537       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9538     return SDValue();
9539
9540   // We can fold this node into a build_vector.
9541   unsigned VTBits = SVT.getSizeInBits();
9542   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9543   SmallVector<SDValue, 8> Elts;
9544   unsigned NumElts = VT.getVectorNumElements();
9545
9546   // For zero-extensions, UNDEF elements still guarantee to have the upper
9547   // bits set to zero.
9548   bool IsZext =
9549       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9550
9551   for (unsigned i = 0; i != NumElts; ++i) {
9552     SDValue Op = N0.getOperand(i);
9553     if (Op.isUndef()) {
9554       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9555       continue;
9556     }
9557
9558     SDLoc DL(Op);
9559     // Get the constant value and if needed trunc it to the size of the type.
9560     // Nodes like build_vector might have constants wider than the scalar type.
9561     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9562     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9563       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9564     else
9565       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9566   }
9567
9568   return DAG.getBuildVector(VT, DL, Elts);
9569 }
9570
9571 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9572 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9573 // transformation. Returns true if extension are possible and the above
9574 // mentioned transformation is profitable.
9575 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9576                                     unsigned ExtOpc,
9577                                     SmallVectorImpl<SDNode *> &ExtendNodes,
9578                                     const TargetLowering &TLI) {
9579   bool HasCopyToRegUses = false;
9580   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9581   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9582                             UE = N0.getNode()->use_end();
9583        UI != UE; ++UI) {
9584     SDNode *User = *UI;
9585     if (User == N)
9586       continue;
9587     if (UI.getUse().getResNo() != N0.getResNo())
9588       continue;
9589     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9590     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9591       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9592       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9593         // Sign bits will be lost after a zext.
9594         return false;
9595       bool Add = false;
9596       for (unsigned i = 0; i != 2; ++i) {
9597         SDValue UseOp = User->getOperand(i);
9598         if (UseOp == N0)
9599           continue;
9600         if (!isa<ConstantSDNode>(UseOp))
9601           return false;
9602         Add = true;
9603       }
9604       if (Add)
9605         ExtendNodes.push_back(User);
9606       continue;
9607     }
9608     // If truncates aren't free and there are users we can't
9609     // extend, it isn't worthwhile.
9610     if (!isTruncFree)
9611       return false;
9612     // Remember if this value is live-out.
9613     if (User->getOpcode() == ISD::CopyToReg)
9614       HasCopyToRegUses = true;
9615   }
9616
9617   if (HasCopyToRegUses) {
9618     bool BothLiveOut = false;
9619     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9620          UI != UE; ++UI) {
9621       SDUse &Use = UI.getUse();
9622       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9623         BothLiveOut = true;
9624         break;
9625       }
9626     }
9627     if (BothLiveOut)
9628       // Both unextended and extended values are live out. There had better be
9629       // a good reason for the transformation.
9630       return ExtendNodes.size();
9631   }
9632   return true;
9633 }
9634
9635 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9636                                   SDValue OrigLoad, SDValue ExtLoad,
9637                                   ISD::NodeType ExtType) {
9638   // Extend SetCC uses if necessary.
9639   SDLoc DL(ExtLoad);
9640   for (SDNode *SetCC : SetCCs) {
9641     SmallVector<SDValue, 4> Ops;
9642
9643     for (unsigned j = 0; j != 2; ++j) {
9644       SDValue SOp = SetCC->getOperand(j);
9645       if (SOp == OrigLoad)
9646         Ops.push_back(ExtLoad);
9647       else
9648         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9649     }
9650
9651     Ops.push_back(SetCC->getOperand(2));
9652     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9653   }
9654 }
9655
9656 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9657 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9658   SDValue N0 = N->getOperand(0);
9659   EVT DstVT = N->getValueType(0);
9660   EVT SrcVT = N0.getValueType();
9661
9662   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9663           N->getOpcode() == ISD::ZERO_EXTEND) &&
9664          "Unexpected node type (not an extend)!");
9665
9666   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9667   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9668   //   (v8i32 (sext (v8i16 (load x))))
9669   // into:
9670   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9671   //                          (v4i32 (sextload (x + 16)))))
9672   // Where uses of the original load, i.e.:
9673   //   (v8i16 (load x))
9674   // are replaced with:
9675   //   (v8i16 (truncate
9676   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9677   //                            (v4i32 (sextload (x + 16)))))))
9678   //
9679   // This combine is only applicable to illegal, but splittable, vectors.
9680   // All legal types, and illegal non-vector types, are handled elsewhere.
9681   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9682   //
9683   if (N0->getOpcode() != ISD::LOAD)
9684     return SDValue();
9685
9686   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9687
9688   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9689       !N0.hasOneUse() || !LN0->isSimple() ||
9690       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9691       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9692     return SDValue();
9693
9694   SmallVector<SDNode *, 4> SetCCs;
9695   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9696     return SDValue();
9697
9698   ISD::LoadExtType ExtType =
9699       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9700
9701   // Try to split the vector types to get down to legal types.
9702   EVT SplitSrcVT = SrcVT;
9703   EVT SplitDstVT = DstVT;
9704   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9705          SplitSrcVT.getVectorNumElements() > 1) {
9706     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9707     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9708   }
9709
9710   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9711     return SDValue();
9712
9713   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
9714
9715   SDLoc DL(N);
9716   const unsigned NumSplits =
9717       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9718   const unsigned Stride = SplitSrcVT.getStoreSize();
9719   SmallVector<SDValue, 4> Loads;
9720   SmallVector<SDValue, 4> Chains;
9721
9722   SDValue BasePtr = LN0->getBasePtr();
9723   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9724     const unsigned Offset = Idx * Stride;
9725     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9726
9727     SDValue SplitLoad = DAG.getExtLoad(
9728         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9729         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9730         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9731
9732     BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
9733
9734     Loads.push_back(SplitLoad.getValue(0));
9735     Chains.push_back(SplitLoad.getValue(1));
9736   }
9737
9738   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9739   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9740
9741   // Simplify TF.
9742   AddToWorklist(NewChain.getNode());
9743
9744   CombineTo(N, NewValue);
9745
9746   // Replace uses of the original load (before extension)
9747   // with a truncate of the concatenated sextloaded vectors.
9748   SDValue Trunc =
9749       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9750   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9751   CombineTo(N0.getNode(), Trunc, NewChain);
9752   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9753 }
9754
9755 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9756 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9757 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9758   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9759   EVT VT = N->getValueType(0);
9760   EVT OrigVT = N->getOperand(0).getValueType();
9761   if (TLI.isZExtFree(OrigVT, VT))
9762     return SDValue();
9763
9764   // and/or/xor
9765   SDValue N0 = N->getOperand(0);
9766   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9767         N0.getOpcode() == ISD::XOR) ||
9768       N0.getOperand(1).getOpcode() != ISD::Constant ||
9769       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9770     return SDValue();
9771
9772   // shl/shr
9773   SDValue N1 = N0->getOperand(0);
9774   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9775       N1.getOperand(1).getOpcode() != ISD::Constant ||
9776       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9777     return SDValue();
9778
9779   // load
9780   if (!isa<LoadSDNode>(N1.getOperand(0)))
9781     return SDValue();
9782   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9783   EVT MemVT = Load->getMemoryVT();
9784   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9785       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9786     return SDValue();
9787
9788
9789   // If the shift op is SHL, the logic op must be AND, otherwise the result
9790   // will be wrong.
9791   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9792     return SDValue();
9793
9794   if (!N0.hasOneUse() || !N1.hasOneUse())
9795     return SDValue();
9796
9797   SmallVector<SDNode*, 4> SetCCs;
9798   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9799                                ISD::ZERO_EXTEND, SetCCs, TLI))
9800     return SDValue();
9801
9802   // Actually do the transformation.
9803   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9804                                    Load->getChain(), Load->getBasePtr(),
9805                                    Load->getMemoryVT(), Load->getMemOperand());
9806
9807   SDLoc DL1(N1);
9808   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9809                               N1.getOperand(1));
9810
9811   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
9812   SDLoc DL0(N0);
9813   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9814                             DAG.getConstant(Mask, DL0, VT));
9815
9816   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9817   CombineTo(N, And);
9818   if (SDValue(Load, 0).hasOneUse()) {
9819     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9820   } else {
9821     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9822                                 Load->getValueType(0), ExtLoad);
9823     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9824   }
9825
9826   // N0 is dead at this point.
9827   recursivelyDeleteUnusedNodes(N0.getNode());
9828
9829   return SDValue(N,0); // Return N so it doesn't get rechecked!
9830 }
9831
9832 /// If we're narrowing or widening the result of a vector select and the final
9833 /// size is the same size as a setcc (compare) feeding the select, then try to
9834 /// apply the cast operation to the select's operands because matching vector
9835 /// sizes for a select condition and other operands should be more efficient.
9836 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9837   unsigned CastOpcode = Cast->getOpcode();
9838   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9839           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9840           CastOpcode == ISD::FP_ROUND) &&
9841          "Unexpected opcode for vector select narrowing/widening");
9842
9843   // We only do this transform before legal ops because the pattern may be
9844   // obfuscated by target-specific operations after legalization. Do not create
9845   // an illegal select op, however, because that may be difficult to lower.
9846   EVT VT = Cast->getValueType(0);
9847   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9848     return SDValue();
9849
9850   SDValue VSel = Cast->getOperand(0);
9851   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9852       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9853     return SDValue();
9854
9855   // Does the setcc have the same vector size as the casted select?
9856   SDValue SetCC = VSel.getOperand(0);
9857   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9858   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9859     return SDValue();
9860
9861   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9862   SDValue A = VSel.getOperand(1);
9863   SDValue B = VSel.getOperand(2);
9864   SDValue CastA, CastB;
9865   SDLoc DL(Cast);
9866   if (CastOpcode == ISD::FP_ROUND) {
9867     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9868     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9869     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9870   } else {
9871     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9872     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9873   }
9874   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9875 }
9876
9877 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9878 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9879 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9880                                      const TargetLowering &TLI, EVT VT,
9881                                      bool LegalOperations, SDNode *N,
9882                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9883   SDNode *N0Node = N0.getNode();
9884   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9885                                                    : ISD::isZEXTLoad(N0Node);
9886   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9887       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9888     return SDValue();
9889
9890   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9891   EVT MemVT = LN0->getMemoryVT();
9892   if ((LegalOperations || !LN0->isSimple() ||
9893        VT.isVector()) &&
9894       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9895     return SDValue();
9896
9897   SDValue ExtLoad =
9898       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9899                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9900   Combiner.CombineTo(N, ExtLoad);
9901   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9902   if (LN0->use_empty())
9903     Combiner.recursivelyDeleteUnusedNodes(LN0);
9904   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9905 }
9906
9907 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9908 // Only generate vector extloads when 1) they're legal, and 2) they are
9909 // deemed desirable by the target.
9910 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9911                                   const TargetLowering &TLI, EVT VT,
9912                                   bool LegalOperations, SDNode *N, SDValue N0,
9913                                   ISD::LoadExtType ExtLoadType,
9914                                   ISD::NodeType ExtOpc) {
9915   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9916       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9917       ((LegalOperations || VT.isVector() ||
9918         !cast<LoadSDNode>(N0)->isSimple()) &&
9919        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9920     return {};
9921
9922   bool DoXform = true;
9923   SmallVector<SDNode *, 4> SetCCs;
9924   if (!N0.hasOneUse())
9925     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9926   if (VT.isVector())
9927     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9928   if (!DoXform)
9929     return {};
9930
9931   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9932   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9933                                    LN0->getBasePtr(), N0.getValueType(),
9934                                    LN0->getMemOperand());
9935   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9936   // If the load value is used only by N, replace it via CombineTo N.
9937   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9938   Combiner.CombineTo(N, ExtLoad);
9939   if (NoReplaceTrunc) {
9940     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9941     Combiner.recursivelyDeleteUnusedNodes(LN0);
9942   } else {
9943     SDValue Trunc =
9944         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9945     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9946   }
9947   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9948 }
9949
9950 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
9951                                         const TargetLowering &TLI, EVT VT,
9952                                         SDNode *N, SDValue N0,
9953                                         ISD::LoadExtType ExtLoadType,
9954                                         ISD::NodeType ExtOpc) {
9955   if (!N0.hasOneUse())
9956     return SDValue();
9957
9958   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
9959   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
9960     return SDValue();
9961
9962   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
9963     return SDValue();
9964
9965   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9966     return SDValue();
9967
9968   SDLoc dl(Ld);
9969   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
9970   SDValue NewLoad = DAG.getMaskedLoad(
9971       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
9972       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
9973       ExtLoadType, Ld->isExpandingLoad());
9974   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
9975   return NewLoad;
9976 }
9977
9978 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9979                                        bool LegalOperations) {
9980   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9981           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9982
9983   SDValue SetCC = N->getOperand(0);
9984   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9985       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9986     return SDValue();
9987
9988   SDValue X = SetCC.getOperand(0);
9989   SDValue Ones = SetCC.getOperand(1);
9990   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9991   EVT VT = N->getValueType(0);
9992   EVT XVT = X.getValueType();
9993   // setge X, C is canonicalized to setgt, so we do not need to match that
9994   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9995   // not require the 'not' op.
9996   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9997     // Invert and smear/shift the sign bit:
9998     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9999     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10000     SDLoc DL(N);
10001     unsigned ShCt = VT.getSizeInBits() - 1;
10002     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10003     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10004       SDValue NotX = DAG.getNOT(DL, X, VT);
10005       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10006       auto ShiftOpcode =
10007         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10008       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10009     }
10010   }
10011   return SDValue();
10012 }
10013
10014 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10015   SDValue N0 = N->getOperand(0);
10016   EVT VT = N->getValueType(0);
10017   SDLoc DL(N);
10018
10019   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10020     return Res;
10021
10022   // fold (sext (sext x)) -> (sext x)
10023   // fold (sext (aext x)) -> (sext x)
10024   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10025     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10026
10027   if (N0.getOpcode() == ISD::TRUNCATE) {
10028     // fold (sext (truncate (load x))) -> (sext (smaller load x))
10029     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10030     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10031       SDNode *oye = N0.getOperand(0).getNode();
10032       if (NarrowLoad.getNode() != N0.getNode()) {
10033         CombineTo(N0.getNode(), NarrowLoad);
10034         // CombineTo deleted the truncate, if needed, but not what's under it.
10035         AddToWorklist(oye);
10036       }
10037       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10038     }
10039
10040     // See if the value being truncated is already sign extended.  If so, just
10041     // eliminate the trunc/sext pair.
10042     SDValue Op = N0.getOperand(0);
10043     unsigned OpBits   = Op.getScalarValueSizeInBits();
10044     unsigned MidBits  = N0.getScalarValueSizeInBits();
10045     unsigned DestBits = VT.getScalarSizeInBits();
10046     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10047
10048     if (OpBits == DestBits) {
10049       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
10050       // bits, it is already ready.
10051       if (NumSignBits > DestBits-MidBits)
10052         return Op;
10053     } else if (OpBits < DestBits) {
10054       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
10055       // bits, just sext from i32.
10056       if (NumSignBits > OpBits-MidBits)
10057         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10058     } else {
10059       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
10060       // bits, just truncate to i32.
10061       if (NumSignBits > OpBits-MidBits)
10062         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10063     }
10064
10065     // fold (sext (truncate x)) -> (sextinreg x).
10066     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10067                                                  N0.getValueType())) {
10068       if (OpBits < DestBits)
10069         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10070       else if (OpBits > DestBits)
10071         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10072       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10073                          DAG.getValueType(N0.getValueType()));
10074     }
10075   }
10076
10077   // Try to simplify (sext (load x)).
10078   if (SDValue foldedExt =
10079           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10080                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10081     return foldedExt;
10082
10083   if (SDValue foldedExt =
10084       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10085                                ISD::SIGN_EXTEND))
10086     return foldedExt;
10087
10088   // fold (sext (load x)) to multiple smaller sextloads.
10089   // Only on illegal but splittable vectors.
10090   if (SDValue ExtLoad = CombineExtLoad(N))
10091     return ExtLoad;
10092
10093   // Try to simplify (sext (sextload x)).
10094   if (SDValue foldedExt = tryToFoldExtOfExtload(
10095           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10096     return foldedExt;
10097
10098   // fold (sext (and/or/xor (load x), cst)) ->
10099   //      (and/or/xor (sextload x), (sext cst))
10100   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10101        N0.getOpcode() == ISD::XOR) &&
10102       isa<LoadSDNode>(N0.getOperand(0)) &&
10103       N0.getOperand(1).getOpcode() == ISD::Constant &&
10104       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10105     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10106     EVT MemVT = LN00->getMemoryVT();
10107     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10108       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10109       SmallVector<SDNode*, 4> SetCCs;
10110       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10111                                              ISD::SIGN_EXTEND, SetCCs, TLI);
10112       if (DoXform) {
10113         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10114                                          LN00->getChain(), LN00->getBasePtr(),
10115                                          LN00->getMemoryVT(),
10116                                          LN00->getMemOperand());
10117         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10118         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10119                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10120         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10121         bool NoReplaceTruncAnd = !N0.hasOneUse();
10122         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10123         CombineTo(N, And);
10124         // If N0 has multiple uses, change other uses as well.
10125         if (NoReplaceTruncAnd) {
10126           SDValue TruncAnd =
10127               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10128           CombineTo(N0.getNode(), TruncAnd);
10129         }
10130         if (NoReplaceTrunc) {
10131           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10132         } else {
10133           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10134                                       LN00->getValueType(0), ExtLoad);
10135           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10136         }
10137         return SDValue(N,0); // Return N so it doesn't get rechecked!
10138       }
10139     }
10140   }
10141
10142   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10143     return V;
10144
10145   if (N0.getOpcode() == ISD::SETCC) {
10146     SDValue N00 = N0.getOperand(0);
10147     SDValue N01 = N0.getOperand(1);
10148     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10149     EVT N00VT = N0.getOperand(0).getValueType();
10150
10151     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10152     // Only do this before legalize for now.
10153     if (VT.isVector() && !LegalOperations &&
10154         TLI.getBooleanContents(N00VT) ==
10155             TargetLowering::ZeroOrNegativeOneBooleanContent) {
10156       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10157       // of the same size as the compared operands. Only optimize sext(setcc())
10158       // if this is the case.
10159       EVT SVT = getSetCCResultType(N00VT);
10160
10161       // If we already have the desired type, don't change it.
10162       if (SVT != N0.getValueType()) {
10163         // We know that the # elements of the results is the same as the
10164         // # elements of the compare (and the # elements of the compare result
10165         // for that matter).  Check to see that they are the same size.  If so,
10166         // we know that the element size of the sext'd result matches the
10167         // element size of the compare operands.
10168         if (VT.getSizeInBits() == SVT.getSizeInBits())
10169           return DAG.getSetCC(DL, VT, N00, N01, CC);
10170
10171         // If the desired elements are smaller or larger than the source
10172         // elements, we can use a matching integer vector type and then
10173         // truncate/sign extend.
10174         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10175         if (SVT == MatchingVecType) {
10176           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10177           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10178         }
10179       }
10180     }
10181
10182     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10183     // Here, T can be 1 or -1, depending on the type of the setcc and
10184     // getBooleanContents().
10185     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10186
10187     // To determine the "true" side of the select, we need to know the high bit
10188     // of the value returned by the setcc if it evaluates to true.
10189     // If the type of the setcc is i1, then the true case of the select is just
10190     // sext(i1 1), that is, -1.
10191     // If the type of the setcc is larger (say, i8) then the value of the high
10192     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10193     // of the appropriate width.
10194     SDValue ExtTrueVal = (SetCCWidth == 1)
10195                              ? DAG.getAllOnesConstant(DL, VT)
10196                              : DAG.getBoolConstant(true, DL, VT, N00VT);
10197     SDValue Zero = DAG.getConstant(0, DL, VT);
10198     if (SDValue SCC =
10199             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10200       return SCC;
10201
10202     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10203       EVT SetCCVT = getSetCCResultType(N00VT);
10204       // Don't do this transform for i1 because there's a select transform
10205       // that would reverse it.
10206       // TODO: We should not do this transform at all without a target hook
10207       // because a sext is likely cheaper than a select?
10208       if (SetCCVT.getScalarSizeInBits() != 1 &&
10209           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10210         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10211         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10212       }
10213     }
10214   }
10215
10216   // fold (sext x) -> (zext x) if the sign bit is known zero.
10217   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10218       DAG.SignBitIsZero(N0))
10219     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10220
10221   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10222     return NewVSel;
10223
10224   // Eliminate this sign extend by doing a negation in the destination type:
10225   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10226   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10227       isNullOrNullSplat(N0.getOperand(0)) &&
10228       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10229       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10230     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10231     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10232   }
10233   // Eliminate this sign extend by doing a decrement in the destination type:
10234   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10235   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10236       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10237       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10238       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10239     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10240     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10241   }
10242
10243   return SDValue();
10244 }
10245
10246 // isTruncateOf - If N is a truncate of some other value, return true, record
10247 // the value being truncated in Op and which of Op's bits are zero/one in Known.
10248 // This function computes KnownBits to avoid a duplicated call to
10249 // computeKnownBits in the caller.
10250 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10251                          KnownBits &Known) {
10252   if (N->getOpcode() == ISD::TRUNCATE) {
10253     Op = N->getOperand(0);
10254     Known = DAG.computeKnownBits(Op);
10255     return true;
10256   }
10257
10258   if (N.getOpcode() != ISD::SETCC ||
10259       N.getValueType().getScalarType() != MVT::i1 ||
10260       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10261     return false;
10262
10263   SDValue Op0 = N->getOperand(0);
10264   SDValue Op1 = N->getOperand(1);
10265   assert(Op0.getValueType() == Op1.getValueType());
10266
10267   if (isNullOrNullSplat(Op0))
10268     Op = Op1;
10269   else if (isNullOrNullSplat(Op1))
10270     Op = Op0;
10271   else
10272     return false;
10273
10274   Known = DAG.computeKnownBits(Op);
10275
10276   return (Known.Zero | 1).isAllOnesValue();
10277 }
10278
10279 /// Given an extending node with a pop-count operand, if the target does not
10280 /// support a pop-count in the narrow source type but does support it in the
10281 /// destination type, widen the pop-count to the destination type.
10282 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10283   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
10284           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
10285
10286   SDValue CtPop = Extend->getOperand(0);
10287   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10288     return SDValue();
10289
10290   EVT VT = Extend->getValueType(0);
10291   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10292   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10293       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
10294     return SDValue();
10295
10296   // zext (ctpop X) --> ctpop (zext X)
10297   SDLoc DL(Extend);
10298   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
10299   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
10300 }
10301
10302 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
10303   SDValue N0 = N->getOperand(0);
10304   EVT VT = N->getValueType(0);
10305
10306   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10307     return Res;
10308
10309   // fold (zext (zext x)) -> (zext x)
10310   // fold (zext (aext x)) -> (zext x)
10311   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10312     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
10313                        N0.getOperand(0));
10314
10315   // fold (zext (truncate x)) -> (zext x) or
10316   //      (zext (truncate x)) -> (truncate x)
10317   // This is valid when the truncated bits of x are already zero.
10318   SDValue Op;
10319   KnownBits Known;
10320   if (isTruncateOf(DAG, N0, Op, Known)) {
10321     APInt TruncatedBits =
10322       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
10323       APInt(Op.getScalarValueSizeInBits(), 0) :
10324       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
10325                         N0.getScalarValueSizeInBits(),
10326                         std::min(Op.getScalarValueSizeInBits(),
10327                                  VT.getScalarSizeInBits()));
10328     if (TruncatedBits.isSubsetOf(Known.Zero))
10329       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10330   }
10331
10332   // fold (zext (truncate x)) -> (and x, mask)
10333   if (N0.getOpcode() == ISD::TRUNCATE) {
10334     // fold (zext (truncate (load x))) -> (zext (smaller load x))
10335     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
10336     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10337       SDNode *oye = N0.getOperand(0).getNode();
10338       if (NarrowLoad.getNode() != N0.getNode()) {
10339         CombineTo(N0.getNode(), NarrowLoad);
10340         // CombineTo deleted the truncate, if needed, but not what's under it.
10341         AddToWorklist(oye);
10342       }
10343       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10344     }
10345
10346     EVT SrcVT = N0.getOperand(0).getValueType();
10347     EVT MinVT = N0.getValueType();
10348
10349     // Try to mask before the extension to avoid having to generate a larger mask,
10350     // possibly over several sub-vectors.
10351     if (SrcVT.bitsLT(VT) && VT.isVector()) {
10352       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
10353                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
10354         SDValue Op = N0.getOperand(0);
10355         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10356         AddToWorklist(Op.getNode());
10357         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10358         // Transfer the debug info; the new node is equivalent to N0.
10359         DAG.transferDbgValues(N0, ZExtOrTrunc);
10360         return ZExtOrTrunc;
10361       }
10362     }
10363
10364     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
10365       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10366       AddToWorklist(Op.getNode());
10367       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10368       // We may safely transfer the debug info describing the truncate node over
10369       // to the equivalent and operation.
10370       DAG.transferDbgValues(N0, And);
10371       return And;
10372     }
10373   }
10374
10375   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
10376   // if either of the casts is not free.
10377   if (N0.getOpcode() == ISD::AND &&
10378       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10379       N0.getOperand(1).getOpcode() == ISD::Constant &&
10380       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10381                            N0.getValueType()) ||
10382        !TLI.isZExtFree(N0.getValueType(), VT))) {
10383     SDValue X = N0.getOperand(0).getOperand(0);
10384     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
10385     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10386     SDLoc DL(N);
10387     return DAG.getNode(ISD::AND, DL, VT,
10388                        X, DAG.getConstant(Mask, DL, VT));
10389   }
10390
10391   // Try to simplify (zext (load x)).
10392   if (SDValue foldedExt =
10393           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10394                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10395     return foldedExt;
10396
10397   if (SDValue foldedExt =
10398       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10399                                ISD::ZERO_EXTEND))
10400     return foldedExt;
10401
10402   // fold (zext (load x)) to multiple smaller zextloads.
10403   // Only on illegal but splittable vectors.
10404   if (SDValue ExtLoad = CombineExtLoad(N))
10405     return ExtLoad;
10406
10407   // fold (zext (and/or/xor (load x), cst)) ->
10408   //      (and/or/xor (zextload x), (zext cst))
10409   // Unless (and (load x) cst) will match as a zextload already and has
10410   // additional users.
10411   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10412        N0.getOpcode() == ISD::XOR) &&
10413       isa<LoadSDNode>(N0.getOperand(0)) &&
10414       N0.getOperand(1).getOpcode() == ISD::Constant &&
10415       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10416     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10417     EVT MemVT = LN00->getMemoryVT();
10418     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10419         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10420       bool DoXform = true;
10421       SmallVector<SDNode*, 4> SetCCs;
10422       if (!N0.hasOneUse()) {
10423         if (N0.getOpcode() == ISD::AND) {
10424           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10425           EVT LoadResultTy = AndC->getValueType(0);
10426           EVT ExtVT;
10427           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10428             DoXform = false;
10429         }
10430       }
10431       if (DoXform)
10432         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10433                                           ISD::ZERO_EXTEND, SetCCs, TLI);
10434       if (DoXform) {
10435         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10436                                          LN00->getChain(), LN00->getBasePtr(),
10437                                          LN00->getMemoryVT(),
10438                                          LN00->getMemOperand());
10439         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10440         SDLoc DL(N);
10441         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10442                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10443         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10444         bool NoReplaceTruncAnd = !N0.hasOneUse();
10445         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10446         CombineTo(N, And);
10447         // If N0 has multiple uses, change other uses as well.
10448         if (NoReplaceTruncAnd) {
10449           SDValue TruncAnd =
10450               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10451           CombineTo(N0.getNode(), TruncAnd);
10452         }
10453         if (NoReplaceTrunc) {
10454           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10455         } else {
10456           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10457                                       LN00->getValueType(0), ExtLoad);
10458           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10459         }
10460         return SDValue(N,0); // Return N so it doesn't get rechecked!
10461       }
10462     }
10463   }
10464
10465   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10466   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10467   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10468     return ZExtLoad;
10469
10470   // Try to simplify (zext (zextload x)).
10471   if (SDValue foldedExt = tryToFoldExtOfExtload(
10472           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10473     return foldedExt;
10474
10475   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10476     return V;
10477
10478   if (N0.getOpcode() == ISD::SETCC) {
10479     // Only do this before legalize for now.
10480     if (!LegalOperations && VT.isVector() &&
10481         N0.getValueType().getVectorElementType() == MVT::i1) {
10482       EVT N00VT = N0.getOperand(0).getValueType();
10483       if (getSetCCResultType(N00VT) == N0.getValueType())
10484         return SDValue();
10485
10486       // We know that the # elements of the results is the same as the #
10487       // elements of the compare (and the # elements of the compare result for
10488       // that matter). Check to see that they are the same size. If so, we know
10489       // that the element size of the sext'd result matches the element size of
10490       // the compare operands.
10491       SDLoc DL(N);
10492       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10493         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
10494         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10495                                      N0.getOperand(1), N0.getOperand(2));
10496         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
10497       }
10498
10499       // If the desired elements are smaller or larger than the source
10500       // elements we can use a matching integer vector type and then
10501       // truncate/any extend followed by zext_in_reg.
10502       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10503       SDValue VsetCC =
10504           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
10505                       N0.getOperand(1), N0.getOperand(2));
10506       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
10507                                     N0.getValueType());
10508     }
10509
10510     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10511     SDLoc DL(N);
10512     if (SDValue SCC = SimplifySelectCC(
10513             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10514             DAG.getConstant(0, DL, VT),
10515             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10516       return SCC;
10517   }
10518
10519   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
10520   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10521       isa<ConstantSDNode>(N0.getOperand(1)) &&
10522       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10523       N0.hasOneUse()) {
10524     SDValue ShAmt = N0.getOperand(1);
10525     if (N0.getOpcode() == ISD::SHL) {
10526       SDValue InnerZExt = N0.getOperand(0);
10527       // If the original shl may be shifting out bits, do not perform this
10528       // transformation.
10529       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10530         InnerZExt.getOperand(0).getValueSizeInBits();
10531       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10532         return SDValue();
10533     }
10534
10535     SDLoc DL(N);
10536
10537     // Ensure that the shift amount is wide enough for the shifted value.
10538     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
10539       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10540
10541     return DAG.getNode(N0.getOpcode(), DL, VT,
10542                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10543                        ShAmt);
10544   }
10545
10546   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10547     return NewVSel;
10548
10549   if (SDValue NewCtPop = widenCtPop(N, DAG))
10550     return NewCtPop;
10551
10552   return SDValue();
10553 }
10554
10555 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10556   SDValue N0 = N->getOperand(0);
10557   EVT VT = N->getValueType(0);
10558
10559   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10560     return Res;
10561
10562   // fold (aext (aext x)) -> (aext x)
10563   // fold (aext (zext x)) -> (zext x)
10564   // fold (aext (sext x)) -> (sext x)
10565   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10566       N0.getOpcode() == ISD::ZERO_EXTEND ||
10567       N0.getOpcode() == ISD::SIGN_EXTEND)
10568     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10569
10570   // fold (aext (truncate (load x))) -> (aext (smaller load x))
10571   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10572   if (N0.getOpcode() == ISD::TRUNCATE) {
10573     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10574       SDNode *oye = N0.getOperand(0).getNode();
10575       if (NarrowLoad.getNode() != N0.getNode()) {
10576         CombineTo(N0.getNode(), NarrowLoad);
10577         // CombineTo deleted the truncate, if needed, but not what's under it.
10578         AddToWorklist(oye);
10579       }
10580       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10581     }
10582   }
10583
10584   // fold (aext (truncate x))
10585   if (N0.getOpcode() == ISD::TRUNCATE)
10586     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10587
10588   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10589   // if the trunc is not free.
10590   if (N0.getOpcode() == ISD::AND &&
10591       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10592       N0.getOperand(1).getOpcode() == ISD::Constant &&
10593       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10594                           N0.getValueType())) {
10595     SDLoc DL(N);
10596     SDValue X = N0.getOperand(0).getOperand(0);
10597     X = DAG.getAnyExtOrTrunc(X, DL, VT);
10598     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10599     return DAG.getNode(ISD::AND, DL, VT,
10600                        X, DAG.getConstant(Mask, DL, VT));
10601   }
10602
10603   // fold (aext (load x)) -> (aext (truncate (extload x)))
10604   // None of the supported targets knows how to perform load and any_ext
10605   // on vectors in one instruction.  We only perform this transformation on
10606   // scalars.
10607   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10608       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10609       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10610     bool DoXform = true;
10611     SmallVector<SDNode*, 4> SetCCs;
10612     if (!N0.hasOneUse())
10613       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10614                                         TLI);
10615     if (DoXform) {
10616       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10617       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10618                                        LN0->getChain(),
10619                                        LN0->getBasePtr(), N0.getValueType(),
10620                                        LN0->getMemOperand());
10621       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10622       // If the load value is used only by N, replace it via CombineTo N.
10623       bool NoReplaceTrunc = N0.hasOneUse();
10624       CombineTo(N, ExtLoad);
10625       if (NoReplaceTrunc) {
10626         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10627         recursivelyDeleteUnusedNodes(LN0);
10628       } else {
10629         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10630                                     N0.getValueType(), ExtLoad);
10631         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10632       }
10633       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10634     }
10635   }
10636
10637   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10638   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10639   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10640   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10641       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10642     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10643     ISD::LoadExtType ExtType = LN0->getExtensionType();
10644     EVT MemVT = LN0->getMemoryVT();
10645     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10646       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10647                                        VT, LN0->getChain(), LN0->getBasePtr(),
10648                                        MemVT, LN0->getMemOperand());
10649       CombineTo(N, ExtLoad);
10650       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10651       recursivelyDeleteUnusedNodes(LN0);
10652       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10653     }
10654   }
10655
10656   if (N0.getOpcode() == ISD::SETCC) {
10657     // For vectors:
10658     // aext(setcc) -> vsetcc
10659     // aext(setcc) -> truncate(vsetcc)
10660     // aext(setcc) -> aext(vsetcc)
10661     // Only do this before legalize for now.
10662     if (VT.isVector() && !LegalOperations) {
10663       EVT N00VT = N0.getOperand(0).getValueType();
10664       if (getSetCCResultType(N00VT) == N0.getValueType())
10665         return SDValue();
10666
10667       // We know that the # elements of the results is the same as the
10668       // # elements of the compare (and the # elements of the compare result
10669       // for that matter).  Check to see that they are the same size.  If so,
10670       // we know that the element size of the sext'd result matches the
10671       // element size of the compare operands.
10672       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10673         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10674                              N0.getOperand(1),
10675                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10676
10677       // If the desired elements are smaller or larger than the source
10678       // elements we can use a matching integer vector type and then
10679       // truncate/any extend
10680       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10681       SDValue VsetCC =
10682         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10683                       N0.getOperand(1),
10684                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10685       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10686     }
10687
10688     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10689     SDLoc DL(N);
10690     if (SDValue SCC = SimplifySelectCC(
10691             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10692             DAG.getConstant(0, DL, VT),
10693             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10694       return SCC;
10695   }
10696
10697   if (SDValue NewCtPop = widenCtPop(N, DAG))
10698     return NewCtPop;
10699
10700   return SDValue();
10701 }
10702
10703 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10704   unsigned Opcode = N->getOpcode();
10705   SDValue N0 = N->getOperand(0);
10706   SDValue N1 = N->getOperand(1);
10707   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10708
10709   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10710   if (N0.getOpcode() == Opcode &&
10711       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10712     return N0;
10713
10714   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10715       N0.getOperand(0).getOpcode() == Opcode) {
10716     // We have an assert, truncate, assert sandwich. Make one stronger assert
10717     // by asserting on the smallest asserted type to the larger source type.
10718     // This eliminates the later assert:
10719     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10720     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10721     SDValue BigA = N0.getOperand(0);
10722     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10723     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10724            "Asserting zero/sign-extended bits to a type larger than the "
10725            "truncated destination does not provide information");
10726
10727     SDLoc DL(N);
10728     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10729     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10730     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10731                                     BigA.getOperand(0), MinAssertVTVal);
10732     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10733   }
10734
10735   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10736   // than X. Just move the AssertZext in front of the truncate and drop the
10737   // AssertSExt.
10738   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10739       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10740       Opcode == ISD::AssertZext) {
10741     SDValue BigA = N0.getOperand(0);
10742     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10743     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10744            "Asserting zero/sign-extended bits to a type larger than the "
10745            "truncated destination does not provide information");
10746
10747     if (AssertVT.bitsLT(BigA_AssertVT)) {
10748       SDLoc DL(N);
10749       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10750                                       BigA.getOperand(0), N1);
10751       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10752     }
10753   }
10754
10755   return SDValue();
10756 }
10757
10758 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
10759   SDLoc DL(N);
10760
10761   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
10762   SDValue N0 = N->getOperand(0);
10763
10764   // Fold (assertalign (assertalign x, AL0), AL1) ->
10765   // (assertalign x, max(AL0, AL1))
10766   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
10767     return DAG.getAssertAlign(DL, N0.getOperand(0),
10768                               std::max(AL, AAN->getAlign()));
10769
10770   // In rare cases, there are trivial arithmetic ops in source operands. Sink
10771   // this assert down to source operands so that those arithmetic ops could be
10772   // exposed to the DAG combining.
10773   switch (N0.getOpcode()) {
10774   default:
10775     break;
10776   case ISD::ADD:
10777   case ISD::SUB: {
10778     unsigned AlignShift = Log2(AL);
10779     SDValue LHS = N0.getOperand(0);
10780     SDValue RHS = N0.getOperand(1);
10781     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
10782     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10783     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
10784       if (LHSAlignShift < AlignShift)
10785         LHS = DAG.getAssertAlign(DL, LHS, AL);
10786       if (RHSAlignShift < AlignShift)
10787         RHS = DAG.getAssertAlign(DL, RHS, AL);
10788       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
10789     }
10790     break;
10791   }
10792   }
10793
10794   return SDValue();
10795 }
10796
10797 /// If the result of a wider load is shifted to right of N  bits and then
10798 /// truncated to a narrower type and where N is a multiple of number of bits of
10799 /// the narrower type, transform it to a narrower load from address + N / num of
10800 /// bits of new type. Also narrow the load if the result is masked with an AND
10801 /// to effectively produce a smaller type. If the result is to be extended, also
10802 /// fold the extension to form a extending load.
10803 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10804   unsigned Opc = N->getOpcode();
10805
10806   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10807   SDValue N0 = N->getOperand(0);
10808   EVT VT = N->getValueType(0);
10809   EVT ExtVT = VT;
10810
10811   // This transformation isn't valid for vector loads.
10812   if (VT.isVector())
10813     return SDValue();
10814
10815   unsigned ShAmt = 0;
10816   bool HasShiftedOffset = false;
10817   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10818   // extended to VT.
10819   if (Opc == ISD::SIGN_EXTEND_INREG) {
10820     ExtType = ISD::SEXTLOAD;
10821     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10822   } else if (Opc == ISD::SRL) {
10823     // Another special-case: SRL is basically zero-extending a narrower value,
10824     // or it maybe shifting a higher subword, half or byte into the lowest
10825     // bits.
10826     ExtType = ISD::ZEXTLOAD;
10827     N0 = SDValue(N, 0);
10828
10829     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10830     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10831     if (!N01 || !LN0)
10832       return SDValue();
10833
10834     uint64_t ShiftAmt = N01->getZExtValue();
10835     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10836     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10837       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10838     else
10839       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10840                                 VT.getSizeInBits() - ShiftAmt);
10841   } else if (Opc == ISD::AND) {
10842     // An AND with a constant mask is the same as a truncate + zero-extend.
10843     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10844     if (!AndC)
10845       return SDValue();
10846
10847     const APInt &Mask = AndC->getAPIntValue();
10848     unsigned ActiveBits = 0;
10849     if (Mask.isMask()) {
10850       ActiveBits = Mask.countTrailingOnes();
10851     } else if (Mask.isShiftedMask()) {
10852       ShAmt = Mask.countTrailingZeros();
10853       APInt ShiftedMask = Mask.lshr(ShAmt);
10854       ActiveBits = ShiftedMask.countTrailingOnes();
10855       HasShiftedOffset = true;
10856     } else
10857       return SDValue();
10858
10859     ExtType = ISD::ZEXTLOAD;
10860     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10861   }
10862
10863   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10864     SDValue SRL = N0;
10865     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10866       ShAmt = ConstShift->getZExtValue();
10867       unsigned EVTBits = ExtVT.getSizeInBits();
10868       // Is the shift amount a multiple of size of VT?
10869       if ((ShAmt & (EVTBits-1)) == 0) {
10870         N0 = N0.getOperand(0);
10871         // Is the load width a multiple of size of VT?
10872         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10873           return SDValue();
10874       }
10875
10876       // At this point, we must have a load or else we can't do the transform.
10877       auto *LN0 = dyn_cast<LoadSDNode>(N0);
10878       if (!LN0) return SDValue();
10879
10880       // Because a SRL must be assumed to *need* to zero-extend the high bits
10881       // (as opposed to anyext the high bits), we can't combine the zextload
10882       // lowering of SRL and an sextload.
10883       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10884         return SDValue();
10885
10886       // If the shift amount is larger than the input type then we're not
10887       // accessing any of the loaded bytes.  If the load was a zextload/extload
10888       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10889       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10890         return SDValue();
10891
10892       // If the SRL is only used by a masking AND, we may be able to adjust
10893       // the ExtVT to make the AND redundant.
10894       SDNode *Mask = *(SRL->use_begin());
10895       if (Mask->getOpcode() == ISD::AND &&
10896           isa<ConstantSDNode>(Mask->getOperand(1))) {
10897         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
10898         if (ShiftMask.isMask()) {
10899           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10900                                            ShiftMask.countTrailingOnes());
10901           // If the mask is smaller, recompute the type.
10902           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10903               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10904             ExtVT = MaskedVT;
10905         }
10906       }
10907     }
10908   }
10909
10910   // If the load is shifted left (and the result isn't shifted back right),
10911   // we can fold the truncate through the shift.
10912   unsigned ShLeftAmt = 0;
10913   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10914       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10915     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10916       ShLeftAmt = N01->getZExtValue();
10917       N0 = N0.getOperand(0);
10918     }
10919   }
10920
10921   // If we haven't found a load, we can't narrow it.
10922   if (!isa<LoadSDNode>(N0))
10923     return SDValue();
10924
10925   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10926   // Reducing the width of a volatile load is illegal.  For atomics, we may be
10927   // able to reduce the width provided we never widen again. (see D66309)
10928   if (!LN0->isSimple() ||
10929       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10930     return SDValue();
10931
10932   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10933     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10934     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10935     return LVTStoreBits - EVTStoreBits - ShAmt;
10936   };
10937
10938   // For big endian targets, we need to adjust the offset to the pointer to
10939   // load the correct bytes.
10940   if (DAG.getDataLayout().isBigEndian())
10941     ShAmt = AdjustBigEndianShift(ShAmt);
10942
10943   uint64_t PtrOff = ShAmt / 8;
10944   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10945   SDLoc DL(LN0);
10946   // The original load itself didn't wrap, so an offset within it doesn't.
10947   SDNodeFlags Flags;
10948   Flags.setNoUnsignedWrap(true);
10949   SDValue NewPtr =
10950       DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
10951   AddToWorklist(NewPtr.getNode());
10952
10953   SDValue Load;
10954   if (ExtType == ISD::NON_EXTLOAD)
10955     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10956                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10957                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10958   else
10959     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10960                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10961                           NewAlign, LN0->getMemOperand()->getFlags(),
10962                           LN0->getAAInfo());
10963
10964   // Replace the old load's chain with the new load's chain.
10965   WorklistRemover DeadNodes(*this);
10966   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10967
10968   // Shift the result left, if we've swallowed a left shift.
10969   SDValue Result = Load;
10970   if (ShLeftAmt != 0) {
10971     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10972     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10973       ShImmTy = VT;
10974     // If the shift amount is as large as the result size (but, presumably,
10975     // no larger than the source) then the useful bits of the result are
10976     // zero; we can't simply return the shortened shift, because the result
10977     // of that operation is undefined.
10978     if (ShLeftAmt >= VT.getSizeInBits())
10979       Result = DAG.getConstant(0, DL, VT);
10980     else
10981       Result = DAG.getNode(ISD::SHL, DL, VT,
10982                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10983   }
10984
10985   if (HasShiftedOffset) {
10986     // Recalculate the shift amount after it has been altered to calculate
10987     // the offset.
10988     if (DAG.getDataLayout().isBigEndian())
10989       ShAmt = AdjustBigEndianShift(ShAmt);
10990
10991     // We're using a shifted mask, so the load now has an offset. This means
10992     // that data has been loaded into the lower bytes than it would have been
10993     // before, so we need to shl the loaded data into the correct position in the
10994     // register.
10995     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10996     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10997     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10998   }
10999
11000   // Return the new loaded value.
11001   return Result;
11002 }
11003
11004 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11005   SDValue N0 = N->getOperand(0);
11006   SDValue N1 = N->getOperand(1);
11007   EVT VT = N->getValueType(0);
11008   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11009   unsigned VTBits = VT.getScalarSizeInBits();
11010   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11011
11012   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11013   if (N0.isUndef())
11014     return DAG.getConstant(0, SDLoc(N), VT);
11015
11016   // fold (sext_in_reg c1) -> c1
11017   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11018     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11019
11020   // If the input is already sign extended, just drop the extension.
11021   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11022     return N0;
11023
11024   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11025   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11026       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11027     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11028                        N1);
11029
11030   // fold (sext_in_reg (sext x)) -> (sext x)
11031   // fold (sext_in_reg (aext x)) -> (sext x)
11032   // if x is small enough or if we know that x has more than 1 sign bit and the
11033   // sign_extend_inreg is extending from one of them.
11034   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11035     SDValue N00 = N0.getOperand(0);
11036     unsigned N00Bits = N00.getScalarValueSizeInBits();
11037     if ((N00Bits <= ExtVTBits ||
11038          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11039         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11040       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11041   }
11042
11043   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11044   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11045        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11046        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11047       N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11048     if (!LegalOperations ||
11049         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11050       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11051                          N0.getOperand(0));
11052   }
11053
11054   // fold (sext_in_reg (zext x)) -> (sext x)
11055   // iff we are extending the source sign bit.
11056   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11057     SDValue N00 = N0.getOperand(0);
11058     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11059         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11060       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11061   }
11062
11063   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11064   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11065     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11066
11067   // fold operands of sext_in_reg based on knowledge that the top bits are not
11068   // demanded.
11069   if (SimplifyDemandedBits(SDValue(N, 0)))
11070     return SDValue(N, 0);
11071
11072   // fold (sext_in_reg (load x)) -> (smaller sextload x)
11073   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11074   if (SDValue NarrowLoad = ReduceLoadWidth(N))
11075     return NarrowLoad;
11076
11077   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11078   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11079   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11080   if (N0.getOpcode() == ISD::SRL) {
11081     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11082       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11083         // We can turn this into an SRA iff the input to the SRL is already sign
11084         // extended enough.
11085         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11086         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11087           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11088                              N0.getOperand(1));
11089       }
11090   }
11091
11092   // fold (sext_inreg (extload x)) -> (sextload x)
11093   // If sextload is not supported by target, we can only do the combine when
11094   // load has one use. Doing otherwise can block folding the extload with other
11095   // extends that the target does support.
11096   if (ISD::isEXTLoad(N0.getNode()) &&
11097       ISD::isUNINDEXEDLoad(N0.getNode()) &&
11098       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11099       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11100         N0.hasOneUse()) ||
11101        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11102     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11103     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11104                                      LN0->getChain(),
11105                                      LN0->getBasePtr(), ExtVT,
11106                                      LN0->getMemOperand());
11107     CombineTo(N, ExtLoad);
11108     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11109     AddToWorklist(ExtLoad.getNode());
11110     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11111   }
11112   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11113   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11114       N0.hasOneUse() &&
11115       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11116       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11117        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11118     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11119     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11120                                      LN0->getChain(),
11121                                      LN0->getBasePtr(), ExtVT,
11122                                      LN0->getMemOperand());
11123     CombineTo(N, ExtLoad);
11124     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11125     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11126   }
11127
11128   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11129   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11130     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11131                                            N0.getOperand(1), false))
11132       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11133   }
11134
11135   return SDValue();
11136 }
11137
11138 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11139   SDValue N0 = N->getOperand(0);
11140   EVT VT = N->getValueType(0);
11141
11142   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11143   if (N0.isUndef())
11144     return DAG.getConstant(0, SDLoc(N), VT);
11145
11146   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11147     return Res;
11148
11149   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11150     return SDValue(N, 0);
11151
11152   return SDValue();
11153 }
11154
11155 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11156   SDValue N0 = N->getOperand(0);
11157   EVT VT = N->getValueType(0);
11158
11159   // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11160   if (N0.isUndef())
11161     return DAG.getConstant(0, SDLoc(N), VT);
11162
11163   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11164     return Res;
11165
11166   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11167     return SDValue(N, 0);
11168
11169   return SDValue();
11170 }
11171
11172 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11173   SDValue N0 = N->getOperand(0);
11174   EVT VT = N->getValueType(0);
11175   EVT SrcVT = N0.getValueType();
11176   bool isLE = DAG.getDataLayout().isLittleEndian();
11177
11178   // noop truncate
11179   if (SrcVT == VT)
11180     return N0;
11181
11182   // fold (truncate (truncate x)) -> (truncate x)
11183   if (N0.getOpcode() == ISD::TRUNCATE)
11184     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11185
11186   // fold (truncate c1) -> c1
11187   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11188     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11189     if (C.getNode() != N)
11190       return C;
11191   }
11192
11193   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11194   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
11195       N0.getOpcode() == ISD::SIGN_EXTEND ||
11196       N0.getOpcode() == ISD::ANY_EXTEND) {
11197     // if the source is smaller than the dest, we still need an extend.
11198     if (N0.getOperand(0).getValueType().bitsLT(VT))
11199       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11200     // if the source is larger than the dest, than we just need the truncate.
11201     if (N0.getOperand(0).getValueType().bitsGT(VT))
11202       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11203     // if the source and dest are the same type, we can drop both the extend
11204     // and the truncate.
11205     return N0.getOperand(0);
11206   }
11207
11208   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11209   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11210     return SDValue();
11211
11212   // Fold extract-and-trunc into a narrow extract. For example:
11213   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11214   //   i32 y = TRUNCATE(i64 x)
11215   //        -- becomes --
11216   //   v16i8 b = BITCAST (v2i64 val)
11217   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11218   //
11219   // Note: We only run this optimization after type legalization (which often
11220   // creates this pattern) and before operation legalization after which
11221   // we need to be more careful about the vector instructions that we generate.
11222   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11223       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11224     EVT VecTy = N0.getOperand(0).getValueType();
11225     EVT ExTy = N0.getValueType();
11226     EVT TrTy = N->getValueType(0);
11227
11228     unsigned NumElem = VecTy.getVectorNumElements();
11229     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11230
11231     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
11232     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
11233
11234     SDValue EltNo = N0->getOperand(1);
11235     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11236       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11237       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
11238
11239       SDLoc DL(N);
11240       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11241                          DAG.getBitcast(NVT, N0.getOperand(0)),
11242                          DAG.getVectorIdxConstant(Index, DL));
11243     }
11244   }
11245
11246   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
11247   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
11248     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
11249         TLI.isTruncateFree(SrcVT, VT)) {
11250       SDLoc SL(N0);
11251       SDValue Cond = N0.getOperand(0);
11252       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11253       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
11254       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
11255     }
11256   }
11257
11258   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
11259   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11260       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11261       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
11262     SDValue Amt = N0.getOperand(1);
11263     KnownBits Known = DAG.computeKnownBits(Amt);
11264     unsigned Size = VT.getScalarSizeInBits();
11265     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
11266       SDLoc SL(N);
11267       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
11268
11269       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11270       if (AmtVT != Amt.getValueType()) {
11271         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
11272         AddToWorklist(Amt.getNode());
11273       }
11274       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
11275     }
11276   }
11277
11278   // Attempt to pre-truncate BUILD_VECTOR sources.
11279   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
11280       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
11281       // Avoid creating illegal types if running after type legalizer.
11282       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
11283     SDLoc DL(N);
11284     EVT SVT = VT.getScalarType();
11285     SmallVector<SDValue, 8> TruncOps;
11286     for (const SDValue &Op : N0->op_values()) {
11287       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
11288       TruncOps.push_back(TruncOp);
11289     }
11290     return DAG.getBuildVector(VT, DL, TruncOps);
11291   }
11292
11293   // Fold a series of buildvector, bitcast, and truncate if possible.
11294   // For example fold
11295   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
11296   //   (2xi32 (buildvector x, y)).
11297   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
11298       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
11299       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
11300       N0.getOperand(0).hasOneUse()) {
11301     SDValue BuildVect = N0.getOperand(0);
11302     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
11303     EVT TruncVecEltTy = VT.getVectorElementType();
11304
11305     // Check that the element types match.
11306     if (BuildVectEltTy == TruncVecEltTy) {
11307       // Now we only need to compute the offset of the truncated elements.
11308       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
11309       unsigned TruncVecNumElts = VT.getVectorNumElements();
11310       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
11311
11312       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
11313              "Invalid number of elements");
11314
11315       SmallVector<SDValue, 8> Opnds;
11316       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
11317         Opnds.push_back(BuildVect.getOperand(i));
11318
11319       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
11320     }
11321   }
11322
11323   // See if we can simplify the input to this truncate through knowledge that
11324   // only the low bits are being used.
11325   // For example "trunc (or (shl x, 8), y)" // -> trunc y
11326   // Currently we only perform this optimization on scalars because vectors
11327   // may have different active low bits.
11328   if (!VT.isVector()) {
11329     APInt Mask =
11330         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
11331     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
11332       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
11333   }
11334
11335   // fold (truncate (load x)) -> (smaller load x)
11336   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
11337   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
11338     if (SDValue Reduced = ReduceLoadWidth(N))
11339       return Reduced;
11340
11341     // Handle the case where the load remains an extending load even
11342     // after truncation.
11343     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
11344       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11345       if (LN0->isSimple() &&
11346           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
11347         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
11348                                          VT, LN0->getChain(), LN0->getBasePtr(),
11349                                          LN0->getMemoryVT(),
11350                                          LN0->getMemOperand());
11351         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
11352         return NewLoad;
11353       }
11354     }
11355   }
11356
11357   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
11358   // where ... are all 'undef'.
11359   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
11360     SmallVector<EVT, 8> VTs;
11361     SDValue V;
11362     unsigned Idx = 0;
11363     unsigned NumDefs = 0;
11364
11365     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
11366       SDValue X = N0.getOperand(i);
11367       if (!X.isUndef()) {
11368         V = X;
11369         Idx = i;
11370         NumDefs++;
11371       }
11372       // Stop if more than one members are non-undef.
11373       if (NumDefs > 1)
11374         break;
11375
11376       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
11377                                      VT.getVectorElementType(),
11378                                      X.getValueType().getVectorElementCount()));
11379     }
11380
11381     if (NumDefs == 0)
11382       return DAG.getUNDEF(VT);
11383
11384     if (NumDefs == 1) {
11385       assert(V.getNode() && "The single defined operand is empty!");
11386       SmallVector<SDValue, 8> Opnds;
11387       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
11388         if (i != Idx) {
11389           Opnds.push_back(DAG.getUNDEF(VTs[i]));
11390           continue;
11391         }
11392         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
11393         AddToWorklist(NV.getNode());
11394         Opnds.push_back(NV);
11395       }
11396       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
11397     }
11398   }
11399
11400   // Fold truncate of a bitcast of a vector to an extract of the low vector
11401   // element.
11402   //
11403   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
11404   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
11405     SDValue VecSrc = N0.getOperand(0);
11406     EVT VecSrcVT = VecSrc.getValueType();
11407     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
11408         (!LegalOperations ||
11409          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
11410       SDLoc SL(N);
11411
11412       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
11413       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
11414                          DAG.getVectorIdxConstant(Idx, SL));
11415     }
11416   }
11417
11418   // Simplify the operands using demanded-bits information.
11419   if (!VT.isVector() &&
11420       SimplifyDemandedBits(SDValue(N, 0)))
11421     return SDValue(N, 0);
11422
11423   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
11424   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
11425   // When the adde's carry is not used.
11426   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
11427       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
11428       // We only do for addcarry before legalize operation
11429       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
11430        TLI.isOperationLegal(N0.getOpcode(), VT))) {
11431     SDLoc SL(N);
11432     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11433     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11434     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
11435     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
11436   }
11437
11438   // fold (truncate (extract_subvector(ext x))) ->
11439   //      (extract_subvector x)
11440   // TODO: This can be generalized to cover cases where the truncate and extract
11441   // do not fully cancel each other out.
11442   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11443     SDValue N00 = N0.getOperand(0);
11444     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11445         N00.getOpcode() == ISD::ZERO_EXTEND ||
11446         N00.getOpcode() == ISD::ANY_EXTEND) {
11447       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11448           VT.getVectorElementType())
11449         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11450                            N00.getOperand(0), N0.getOperand(1));
11451     }
11452   }
11453
11454   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11455     return NewVSel;
11456
11457   // Narrow a suitable binary operation with a non-opaque constant operand by
11458   // moving it ahead of the truncate. This is limited to pre-legalization
11459   // because targets may prefer a wider type during later combines and invert
11460   // this transform.
11461   switch (N0.getOpcode()) {
11462   case ISD::ADD:
11463   case ISD::SUB:
11464   case ISD::MUL:
11465   case ISD::AND:
11466   case ISD::OR:
11467   case ISD::XOR:
11468     if (!LegalOperations && N0.hasOneUse() &&
11469         (isConstantOrConstantVector(N0.getOperand(0), true) ||
11470          isConstantOrConstantVector(N0.getOperand(1), true))) {
11471       // TODO: We already restricted this to pre-legalization, but for vectors
11472       // we are extra cautious to not create an unsupported operation.
11473       // Target-specific changes are likely needed to avoid regressions here.
11474       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
11475         SDLoc DL(N);
11476         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
11477         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
11478         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
11479       }
11480     }
11481   }
11482
11483   return SDValue();
11484 }
11485
11486 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
11487   SDValue Elt = N->getOperand(i);
11488   if (Elt.getOpcode() != ISD::MERGE_VALUES)
11489     return Elt.getNode();
11490   return Elt.getOperand(Elt.getResNo()).getNode();
11491 }
11492
11493 /// build_pair (load, load) -> load
11494 /// if load locations are consecutive.
11495 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
11496   assert(N->getOpcode() == ISD::BUILD_PAIR);
11497
11498   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
11499   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
11500
11501   // A BUILD_PAIR is always having the least significant part in elt 0 and the
11502   // most significant part in elt 1. So when combining into one large load, we
11503   // need to consider the endianness.
11504   if (DAG.getDataLayout().isBigEndian())
11505     std::swap(LD1, LD2);
11506
11507   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
11508       LD1->getAddressSpace() != LD2->getAddressSpace())
11509     return SDValue();
11510   EVT LD1VT = LD1->getValueType(0);
11511   unsigned LD1Bytes = LD1VT.getStoreSize();
11512   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
11513       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
11514     Align Alignment = LD1->getAlign();
11515     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
11516         VT.getTypeForEVT(*DAG.getContext()));
11517
11518     if (NewAlign <= Alignment &&
11519         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
11520       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
11521                          LD1->getPointerInfo(), Alignment);
11522   }
11523
11524   return SDValue();
11525 }
11526
11527 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
11528   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
11529   // and Lo parts; on big-endian machines it doesn't.
11530   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
11531 }
11532
11533 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
11534                                     const TargetLowering &TLI) {
11535   // If this is not a bitcast to an FP type or if the target doesn't have
11536   // IEEE754-compliant FP logic, we're done.
11537   EVT VT = N->getValueType(0);
11538   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
11539     return SDValue();
11540
11541   // TODO: Handle cases where the integer constant is a different scalar
11542   // bitwidth to the FP.
11543   SDValue N0 = N->getOperand(0);
11544   EVT SourceVT = N0.getValueType();
11545   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
11546     return SDValue();
11547
11548   unsigned FPOpcode;
11549   APInt SignMask;
11550   switch (N0.getOpcode()) {
11551   case ISD::AND:
11552     FPOpcode = ISD::FABS;
11553     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
11554     break;
11555   case ISD::XOR:
11556     FPOpcode = ISD::FNEG;
11557     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11558     break;
11559   case ISD::OR:
11560     FPOpcode = ISD::FABS;
11561     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11562     break;
11563   default:
11564     return SDValue();
11565   }
11566
11567   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11568   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11569   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11570   //   fneg (fabs X)
11571   SDValue LogicOp0 = N0.getOperand(0);
11572   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11573   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11574       LogicOp0.getOpcode() == ISD::BITCAST &&
11575       LogicOp0.getOperand(0).getValueType() == VT) {
11576     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11577     NumFPLogicOpsConv++;
11578     if (N0.getOpcode() == ISD::OR)
11579       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11580     return FPOp;
11581   }
11582
11583   return SDValue();
11584 }
11585
11586 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11587   SDValue N0 = N->getOperand(0);
11588   EVT VT = N->getValueType(0);
11589
11590   if (N0.isUndef())
11591     return DAG.getUNDEF(VT);
11592
11593   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11594   // Only do this before legalize types, unless both types are integer and the
11595   // scalar type is legal. Only do this before legalize ops, since the target
11596   // maybe depending on the bitcast.
11597   // First check to see if this is all constant.
11598   // TODO: Support FP bitcasts after legalize types.
11599   if (VT.isVector() &&
11600       (!LegalTypes ||
11601        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11602         TLI.isTypeLegal(VT.getVectorElementType()))) &&
11603       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11604       cast<BuildVectorSDNode>(N0)->isConstant())
11605     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11606                                              VT.getVectorElementType());
11607
11608   // If the input is a constant, let getNode fold it.
11609   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11610     // If we can't allow illegal operations, we need to check that this is just
11611     // a fp -> int or int -> conversion and that the resulting operation will
11612     // be legal.
11613     if (!LegalOperations ||
11614         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11615          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11616         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11617          TLI.isOperationLegal(ISD::Constant, VT))) {
11618       SDValue C = DAG.getBitcast(VT, N0);
11619       if (C.getNode() != N)
11620         return C;
11621     }
11622   }
11623
11624   // (conv (conv x, t1), t2) -> (conv x, t2)
11625   if (N0.getOpcode() == ISD::BITCAST)
11626     return DAG.getBitcast(VT, N0.getOperand(0));
11627
11628   // fold (conv (load x)) -> (load (conv*)x)
11629   // If the resultant load doesn't need a higher alignment than the original!
11630   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11631       // Do not remove the cast if the types differ in endian layout.
11632       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11633           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11634       // If the load is volatile, we only want to change the load type if the
11635       // resulting load is legal. Otherwise we might increase the number of
11636       // memory accesses. We don't care if the original type was legal or not
11637       // as we assume software couldn't rely on the number of accesses of an
11638       // illegal type.
11639       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
11640        TLI.isOperationLegal(ISD::LOAD, VT))) {
11641     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11642
11643     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11644                                     *LN0->getMemOperand())) {
11645       SDValue Load =
11646           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11647                       LN0->getPointerInfo(), LN0->getAlignment(),
11648                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11649       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11650       return Load;
11651     }
11652   }
11653
11654   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11655     return V;
11656
11657   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11658   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11659   //
11660   // For ppc_fp128:
11661   // fold (bitcast (fneg x)) ->
11662   //     flipbit = signbit
11663   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11664   //
11665   // fold (bitcast (fabs x)) ->
11666   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11667   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11668   // This often reduces constant pool loads.
11669   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11670        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11671       N0.getNode()->hasOneUse() && VT.isInteger() &&
11672       !VT.isVector() && !N0.getValueType().isVector()) {
11673     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11674     AddToWorklist(NewConv.getNode());
11675
11676     SDLoc DL(N);
11677     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11678       assert(VT.getSizeInBits() == 128);
11679       SDValue SignBit = DAG.getConstant(
11680           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11681       SDValue FlipBit;
11682       if (N0.getOpcode() == ISD::FNEG) {
11683         FlipBit = SignBit;
11684         AddToWorklist(FlipBit.getNode());
11685       } else {
11686         assert(N0.getOpcode() == ISD::FABS);
11687         SDValue Hi =
11688             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11689                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11690                                               SDLoc(NewConv)));
11691         AddToWorklist(Hi.getNode());
11692         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11693         AddToWorklist(FlipBit.getNode());
11694       }
11695       SDValue FlipBits =
11696           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11697       AddToWorklist(FlipBits.getNode());
11698       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11699     }
11700     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11701     if (N0.getOpcode() == ISD::FNEG)
11702       return DAG.getNode(ISD::XOR, DL, VT,
11703                          NewConv, DAG.getConstant(SignBit, DL, VT));
11704     assert(N0.getOpcode() == ISD::FABS);
11705     return DAG.getNode(ISD::AND, DL, VT,
11706                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11707   }
11708
11709   // fold (bitconvert (fcopysign cst, x)) ->
11710   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11711   // Note that we don't handle (copysign x, cst) because this can always be
11712   // folded to an fneg or fabs.
11713   //
11714   // For ppc_fp128:
11715   // fold (bitcast (fcopysign cst, x)) ->
11716   //     flipbit = (and (extract_element
11717   //                     (xor (bitcast cst), (bitcast x)), 0),
11718   //                    signbit)
11719   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11720   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11721       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11722       VT.isInteger() && !VT.isVector()) {
11723     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11724     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11725     if (isTypeLegal(IntXVT)) {
11726       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11727       AddToWorklist(X.getNode());
11728
11729       // If X has a different width than the result/lhs, sext it or truncate it.
11730       unsigned VTWidth = VT.getSizeInBits();
11731       if (OrigXWidth < VTWidth) {
11732         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11733         AddToWorklist(X.getNode());
11734       } else if (OrigXWidth > VTWidth) {
11735         // To get the sign bit in the right place, we have to shift it right
11736         // before truncating.
11737         SDLoc DL(X);
11738         X = DAG.getNode(ISD::SRL, DL,
11739                         X.getValueType(), X,
11740                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11741                                         X.getValueType()));
11742         AddToWorklist(X.getNode());
11743         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11744         AddToWorklist(X.getNode());
11745       }
11746
11747       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11748         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11749         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11750         AddToWorklist(Cst.getNode());
11751         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11752         AddToWorklist(X.getNode());
11753         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11754         AddToWorklist(XorResult.getNode());
11755         SDValue XorResult64 = DAG.getNode(
11756             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11757             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11758                                   SDLoc(XorResult)));
11759         AddToWorklist(XorResult64.getNode());
11760         SDValue FlipBit =
11761             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11762                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11763         AddToWorklist(FlipBit.getNode());
11764         SDValue FlipBits =
11765             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11766         AddToWorklist(FlipBits.getNode());
11767         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11768       }
11769       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11770       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11771                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11772       AddToWorklist(X.getNode());
11773
11774       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11775       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11776                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11777       AddToWorklist(Cst.getNode());
11778
11779       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11780     }
11781   }
11782
11783   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11784   if (N0.getOpcode() == ISD::BUILD_PAIR)
11785     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11786       return CombineLD;
11787
11788   // Remove double bitcasts from shuffles - this is often a legacy of
11789   // XformToShuffleWithZero being used to combine bitmaskings (of
11790   // float vectors bitcast to integer vectors) into shuffles.
11791   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11792   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11793       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11794       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11795       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11796     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11797
11798     // If operands are a bitcast, peek through if it casts the original VT.
11799     // If operands are a constant, just bitcast back to original VT.
11800     auto PeekThroughBitcast = [&](SDValue Op) {
11801       if (Op.getOpcode() == ISD::BITCAST &&
11802           Op.getOperand(0).getValueType() == VT)
11803         return SDValue(Op.getOperand(0));
11804       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11805           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11806         return DAG.getBitcast(VT, Op);
11807       return SDValue();
11808     };
11809
11810     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11811     // the result type of this bitcast. This would eliminate at least one
11812     // bitcast. See the transform in InstCombine.
11813     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11814     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11815     if (!(SV0 && SV1))
11816       return SDValue();
11817
11818     int MaskScale =
11819         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11820     SmallVector<int, 8> NewMask;
11821     for (int M : SVN->getMask())
11822       for (int i = 0; i != MaskScale; ++i)
11823         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11824
11825     SDValue LegalShuffle =
11826         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11827     if (LegalShuffle)
11828       return LegalShuffle;
11829   }
11830
11831   return SDValue();
11832 }
11833
11834 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11835   EVT VT = N->getValueType(0);
11836   return CombineConsecutiveLoads(N, VT);
11837 }
11838
11839 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
11840   SDValue N0 = N->getOperand(0);
11841
11842   // (freeze (freeze x)) -> (freeze x)
11843   if (N0.getOpcode() == ISD::FREEZE)
11844     return N0;
11845
11846   // If the input is a constant, return it.
11847   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
11848     return N0;
11849
11850   return SDValue();
11851 }
11852
11853 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11854 /// operands. DstEltVT indicates the destination element value type.
11855 SDValue DAGCombiner::
11856 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11857   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11858
11859   // If this is already the right type, we're done.
11860   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11861
11862   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11863   unsigned DstBitSize = DstEltVT.getSizeInBits();
11864
11865   // If this is a conversion of N elements of one type to N elements of another
11866   // type, convert each element.  This handles FP<->INT cases.
11867   if (SrcBitSize == DstBitSize) {
11868     SmallVector<SDValue, 8> Ops;
11869     for (SDValue Op : BV->op_values()) {
11870       // If the vector element type is not legal, the BUILD_VECTOR operands
11871       // are promoted and implicitly truncated.  Make that explicit here.
11872       if (Op.getValueType() != SrcEltVT)
11873         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11874       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11875       AddToWorklist(Ops.back().getNode());
11876     }
11877     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11878                               BV->getValueType(0).getVectorNumElements());
11879     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11880   }
11881
11882   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11883   // handle annoying details of growing/shrinking FP values, we convert them to
11884   // int first.
11885   if (SrcEltVT.isFloatingPoint()) {
11886     // Convert the input float vector to a int vector where the elements are the
11887     // same sizes.
11888     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11889     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11890     SrcEltVT = IntVT;
11891   }
11892
11893   // Now we know the input is an integer vector.  If the output is a FP type,
11894   // convert to integer first, then to FP of the right size.
11895   if (DstEltVT.isFloatingPoint()) {
11896     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11897     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11898
11899     // Next, convert to FP elements of the same size.
11900     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11901   }
11902
11903   SDLoc DL(BV);
11904
11905   // Okay, we know the src/dst types are both integers of differing types.
11906   // Handling growing first.
11907   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11908   if (SrcBitSize < DstBitSize) {
11909     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11910
11911     SmallVector<SDValue, 8> Ops;
11912     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11913          i += NumInputsPerOutput) {
11914       bool isLE = DAG.getDataLayout().isLittleEndian();
11915       APInt NewBits = APInt(DstBitSize, 0);
11916       bool EltIsUndef = true;
11917       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11918         // Shift the previously computed bits over.
11919         NewBits <<= SrcBitSize;
11920         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11921         if (Op.isUndef()) continue;
11922         EltIsUndef = false;
11923
11924         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11925                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11926       }
11927
11928       if (EltIsUndef)
11929         Ops.push_back(DAG.getUNDEF(DstEltVT));
11930       else
11931         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11932     }
11933
11934     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11935     return DAG.getBuildVector(VT, DL, Ops);
11936   }
11937
11938   // Finally, this must be the case where we are shrinking elements: each input
11939   // turns into multiple outputs.
11940   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11941   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11942                             NumOutputsPerInput*BV->getNumOperands());
11943   SmallVector<SDValue, 8> Ops;
11944
11945   for (const SDValue &Op : BV->op_values()) {
11946     if (Op.isUndef()) {
11947       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11948       continue;
11949     }
11950
11951     APInt OpVal = cast<ConstantSDNode>(Op)->
11952                   getAPIntValue().zextOrTrunc(SrcBitSize);
11953
11954     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11955       APInt ThisVal = OpVal.trunc(DstBitSize);
11956       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11957       OpVal.lshrInPlace(DstBitSize);
11958     }
11959
11960     // For big endian targets, swap the order of the pieces of each element.
11961     if (DAG.getDataLayout().isBigEndian())
11962       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11963   }
11964
11965   return DAG.getBuildVector(VT, DL, Ops);
11966 }
11967
11968 static bool isContractable(SDNode *N) {
11969   SDNodeFlags F = N->getFlags();
11970   return F.hasAllowContract() || F.hasAllowReassociation();
11971 }
11972
11973 /// Try to perform FMA combining on a given FADD node.
11974 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11975   SDValue N0 = N->getOperand(0);
11976   SDValue N1 = N->getOperand(1);
11977   EVT VT = N->getValueType(0);
11978   SDLoc SL(N);
11979
11980   const TargetOptions &Options = DAG.getTarget().Options;
11981
11982   // Floating-point multiply-add with intermediate rounding.
11983   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
11984
11985   // Floating-point multiply-add without intermediate rounding.
11986   bool HasFMA =
11987       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11988       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11989
11990   // No valid opcode, do not combine.
11991   if (!HasFMAD && !HasFMA)
11992     return SDValue();
11993
11994   SDNodeFlags Flags = N->getFlags();
11995   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11996   bool CanReassociate =
11997       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
11998   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11999                               CanFuse || HasFMAD);
12000   // If the addition is not contractable, do not combine.
12001   if (!AllowFusionGlobally && !isContractable(N))
12002     return SDValue();
12003
12004   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12005     return SDValue();
12006
12007   // Always prefer FMAD to FMA for precision.
12008   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12009   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12010
12011   // Is the node an FMUL and contractable either due to global flags or
12012   // SDNodeFlags.
12013   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12014     if (N.getOpcode() != ISD::FMUL)
12015       return false;
12016     return AllowFusionGlobally || isContractable(N.getNode());
12017   };
12018   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12019   // prefer to fold the multiply with fewer uses.
12020   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12021     if (N0.getNode()->use_size() > N1.getNode()->use_size())
12022       std::swap(N0, N1);
12023   }
12024
12025   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12026   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12027     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12028                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
12029   }
12030
12031   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12032   // Note: Commutes FADD operands.
12033   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12034     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12035                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
12036   }
12037
12038   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12039   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12040   // This requires reassociation because it changes the order of operations.
12041   SDValue FMA, E;
12042   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12043       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12044       N0.getOperand(2).hasOneUse()) {
12045     FMA = N0;
12046     E = N1;
12047   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12048              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12049              N1.getOperand(2).hasOneUse()) {
12050     FMA = N1;
12051     E = N0;
12052   }
12053   if (FMA && E) {
12054     SDValue A = FMA.getOperand(0);
12055     SDValue B = FMA.getOperand(1);
12056     SDValue C = FMA.getOperand(2).getOperand(0);
12057     SDValue D = FMA.getOperand(2).getOperand(1);
12058     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
12059     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
12060   }
12061
12062   // Look through FP_EXTEND nodes to do more combining.
12063
12064   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12065   if (N0.getOpcode() == ISD::FP_EXTEND) {
12066     SDValue N00 = N0.getOperand(0);
12067     if (isContractableFMUL(N00) &&
12068         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12069                             N00.getValueType())) {
12070       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12071                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12072                                      N00.getOperand(0)),
12073                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12074                                      N00.getOperand(1)), N1, Flags);
12075     }
12076   }
12077
12078   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12079   // Note: Commutes FADD operands.
12080   if (N1.getOpcode() == ISD::FP_EXTEND) {
12081     SDValue N10 = N1.getOperand(0);
12082     if (isContractableFMUL(N10) &&
12083         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12084                             N10.getValueType())) {
12085       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12086                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12087                                      N10.getOperand(0)),
12088                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12089                                      N10.getOperand(1)), N0, Flags);
12090     }
12091   }
12092
12093   // More folding opportunities when target permits.
12094   if (Aggressive) {
12095     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12096     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
12097     auto FoldFAddFMAFPExtFMul = [&] (
12098       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12099       SDNodeFlags Flags) {
12100       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12101                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12102                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12103                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12104                                      Z, Flags), Flags);
12105     };
12106     if (N0.getOpcode() == PreferredFusedOpcode) {
12107       SDValue N02 = N0.getOperand(2);
12108       if (N02.getOpcode() == ISD::FP_EXTEND) {
12109         SDValue N020 = N02.getOperand(0);
12110         if (isContractableFMUL(N020) &&
12111             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12112                                 N020.getValueType())) {
12113           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12114                                       N020.getOperand(0), N020.getOperand(1),
12115                                       N1, Flags);
12116         }
12117       }
12118     }
12119
12120     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12121     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12122     // FIXME: This turns two single-precision and one double-precision
12123     // operation into two double-precision operations, which might not be
12124     // interesting for all targets, especially GPUs.
12125     auto FoldFAddFPExtFMAFMul = [&] (
12126       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12127       SDNodeFlags Flags) {
12128       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12129                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12130                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12131                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12132                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12133                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12134                                      Z, Flags), Flags);
12135     };
12136     if (N0.getOpcode() == ISD::FP_EXTEND) {
12137       SDValue N00 = N0.getOperand(0);
12138       if (N00.getOpcode() == PreferredFusedOpcode) {
12139         SDValue N002 = N00.getOperand(2);
12140         if (isContractableFMUL(N002) &&
12141             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12142                                 N00.getValueType())) {
12143           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12144                                       N002.getOperand(0), N002.getOperand(1),
12145                                       N1, Flags);
12146         }
12147       }
12148     }
12149
12150     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12151     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
12152     if (N1.getOpcode() == PreferredFusedOpcode) {
12153       SDValue N12 = N1.getOperand(2);
12154       if (N12.getOpcode() == ISD::FP_EXTEND) {
12155         SDValue N120 = N12.getOperand(0);
12156         if (isContractableFMUL(N120) &&
12157             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12158                                 N120.getValueType())) {
12159           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12160                                       N120.getOperand(0), N120.getOperand(1),
12161                                       N0, Flags);
12162         }
12163       }
12164     }
12165
12166     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12167     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12168     // FIXME: This turns two single-precision and one double-precision
12169     // operation into two double-precision operations, which might not be
12170     // interesting for all targets, especially GPUs.
12171     if (N1.getOpcode() == ISD::FP_EXTEND) {
12172       SDValue N10 = N1.getOperand(0);
12173       if (N10.getOpcode() == PreferredFusedOpcode) {
12174         SDValue N102 = N10.getOperand(2);
12175         if (isContractableFMUL(N102) &&
12176             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12177                                 N10.getValueType())) {
12178           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12179                                       N102.getOperand(0), N102.getOperand(1),
12180                                       N0, Flags);
12181         }
12182       }
12183     }
12184   }
12185
12186   return SDValue();
12187 }
12188
12189 /// Try to perform FMA combining on a given FSUB node.
12190 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12191   SDValue N0 = N->getOperand(0);
12192   SDValue N1 = N->getOperand(1);
12193   EVT VT = N->getValueType(0);
12194   SDLoc SL(N);
12195
12196   const TargetOptions &Options = DAG.getTarget().Options;
12197   // Floating-point multiply-add with intermediate rounding.
12198   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12199
12200   // Floating-point multiply-add without intermediate rounding.
12201   bool HasFMA =
12202       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12203       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12204
12205   // No valid opcode, do not combine.
12206   if (!HasFMAD && !HasFMA)
12207     return SDValue();
12208
12209   const SDNodeFlags Flags = N->getFlags();
12210   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12211   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12212                               CanFuse || HasFMAD);
12213
12214   // If the subtraction is not contractable, do not combine.
12215   if (!AllowFusionGlobally && !isContractable(N))
12216     return SDValue();
12217
12218   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12219     return SDValue();
12220
12221   // Always prefer FMAD to FMA for precision.
12222   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12223   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12224   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
12225
12226   // Is the node an FMUL and contractable either due to global flags or
12227   // SDNodeFlags.
12228   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12229     if (N.getOpcode() != ISD::FMUL)
12230       return false;
12231     return AllowFusionGlobally || isContractable(N.getNode());
12232   };
12233
12234   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12235   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
12236     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
12237       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
12238                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
12239                          Flags);
12240     }
12241     return SDValue();
12242   };
12243
12244   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12245   // Note: Commutes FSUB operands.
12246   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
12247     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
12248       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12249                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
12250                          YZ.getOperand(1), X, Flags);
12251     }
12252     return SDValue();
12253   };
12254
12255   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
12256   // prefer to fold the multiply with fewer uses.
12257   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
12258       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
12259     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
12260     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12261       return V;
12262     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
12263     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12264       return V;
12265   } else {
12266     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12267     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12268       return V;
12269     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12270     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12271       return V;
12272   }
12273
12274   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
12275   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
12276       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
12277     SDValue N00 = N0.getOperand(0).getOperand(0);
12278     SDValue N01 = N0.getOperand(0).getOperand(1);
12279     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12280                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
12281                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12282   }
12283
12284   // Look through FP_EXTEND nodes to do more combining.
12285
12286   // fold (fsub (fpext (fmul x, y)), z)
12287   //   -> (fma (fpext x), (fpext y), (fneg z))
12288   if (N0.getOpcode() == ISD::FP_EXTEND) {
12289     SDValue N00 = N0.getOperand(0);
12290     if (isContractableFMUL(N00) &&
12291         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12292                             N00.getValueType())) {
12293       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12294                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12295                                      N00.getOperand(0)),
12296                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12297                                      N00.getOperand(1)),
12298                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12299     }
12300   }
12301
12302   // fold (fsub x, (fpext (fmul y, z)))
12303   //   -> (fma (fneg (fpext y)), (fpext z), x)
12304   // Note: Commutes FSUB operands.
12305   if (N1.getOpcode() == ISD::FP_EXTEND) {
12306     SDValue N10 = N1.getOperand(0);
12307     if (isContractableFMUL(N10) &&
12308         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12309                             N10.getValueType())) {
12310       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12311                          DAG.getNode(ISD::FNEG, SL, VT,
12312                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
12313                                                  N10.getOperand(0))),
12314                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12315                                      N10.getOperand(1)),
12316                          N0, Flags);
12317     }
12318   }
12319
12320   // fold (fsub (fpext (fneg (fmul, x, y))), z)
12321   //   -> (fneg (fma (fpext x), (fpext y), z))
12322   // Note: This could be removed with appropriate canonicalization of the
12323   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12324   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12325   // from implementing the canonicalization in visitFSUB.
12326   if (N0.getOpcode() == ISD::FP_EXTEND) {
12327     SDValue N00 = N0.getOperand(0);
12328     if (N00.getOpcode() == ISD::FNEG) {
12329       SDValue N000 = N00.getOperand(0);
12330       if (isContractableFMUL(N000) &&
12331           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12332                               N00.getValueType())) {
12333         return DAG.getNode(ISD::FNEG, SL, VT,
12334                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12335                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12336                                                    N000.getOperand(0)),
12337                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12338                                                    N000.getOperand(1)),
12339                                        N1, Flags));
12340       }
12341     }
12342   }
12343
12344   // fold (fsub (fneg (fpext (fmul, x, y))), z)
12345   //   -> (fneg (fma (fpext x)), (fpext y), z)
12346   // Note: This could be removed with appropriate canonicalization of the
12347   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12348   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12349   // from implementing the canonicalization in visitFSUB.
12350   if (N0.getOpcode() == ISD::FNEG) {
12351     SDValue N00 = N0.getOperand(0);
12352     if (N00.getOpcode() == ISD::FP_EXTEND) {
12353       SDValue N000 = N00.getOperand(0);
12354       if (isContractableFMUL(N000) &&
12355           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12356                               N000.getValueType())) {
12357         return DAG.getNode(ISD::FNEG, SL, VT,
12358                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12359                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12360                                                    N000.getOperand(0)),
12361                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12362                                                    N000.getOperand(1)),
12363                                        N1, Flags));
12364       }
12365     }
12366   }
12367
12368   // More folding opportunities when target permits.
12369   if (Aggressive) {
12370     // fold (fsub (fma x, y, (fmul u, v)), z)
12371     //   -> (fma x, y (fma u, v, (fneg z)))
12372     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
12373         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
12374         N0.getOperand(2)->hasOneUse()) {
12375       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12376                          N0.getOperand(0), N0.getOperand(1),
12377                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12378                                      N0.getOperand(2).getOperand(0),
12379                                      N0.getOperand(2).getOperand(1),
12380                                      DAG.getNode(ISD::FNEG, SL, VT,
12381                                                  N1), Flags), Flags);
12382     }
12383
12384     // fold (fsub x, (fma y, z, (fmul u, v)))
12385     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
12386     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
12387         isContractableFMUL(N1.getOperand(2)) &&
12388         N1->hasOneUse() && NoSignedZero) {
12389       SDValue N20 = N1.getOperand(2).getOperand(0);
12390       SDValue N21 = N1.getOperand(2).getOperand(1);
12391       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12392                          DAG.getNode(ISD::FNEG, SL, VT,
12393                                      N1.getOperand(0)),
12394                          N1.getOperand(1),
12395                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12396                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
12397                                      N21, N0, Flags), Flags);
12398     }
12399
12400
12401     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
12402     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
12403     if (N0.getOpcode() == PreferredFusedOpcode &&
12404         N0->hasOneUse()) {
12405       SDValue N02 = N0.getOperand(2);
12406       if (N02.getOpcode() == ISD::FP_EXTEND) {
12407         SDValue N020 = N02.getOperand(0);
12408         if (isContractableFMUL(N020) &&
12409             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12410                                 N020.getValueType())) {
12411           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12412                              N0.getOperand(0), N0.getOperand(1),
12413                              DAG.getNode(PreferredFusedOpcode, SL, VT,
12414                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12415                                                      N020.getOperand(0)),
12416                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12417                                                      N020.getOperand(1)),
12418                                          DAG.getNode(ISD::FNEG, SL, VT,
12419                                                      N1), Flags), Flags);
12420         }
12421       }
12422     }
12423
12424     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
12425     //   -> (fma (fpext x), (fpext y),
12426     //           (fma (fpext u), (fpext v), (fneg z)))
12427     // FIXME: This turns two single-precision and one double-precision
12428     // operation into two double-precision operations, which might not be
12429     // interesting for all targets, especially GPUs.
12430     if (N0.getOpcode() == ISD::FP_EXTEND) {
12431       SDValue N00 = N0.getOperand(0);
12432       if (N00.getOpcode() == PreferredFusedOpcode) {
12433         SDValue N002 = N00.getOperand(2);
12434         if (isContractableFMUL(N002) &&
12435             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12436                                 N00.getValueType())) {
12437           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12438                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
12439                                          N00.getOperand(0)),
12440                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
12441                                          N00.getOperand(1)),
12442                              DAG.getNode(PreferredFusedOpcode, SL, VT,
12443                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12444                                                      N002.getOperand(0)),
12445                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
12446                                                      N002.getOperand(1)),
12447                                          DAG.getNode(ISD::FNEG, SL, VT,
12448                                                      N1), Flags), Flags);
12449         }
12450       }
12451     }
12452
12453     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
12454     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
12455     if (N1.getOpcode() == PreferredFusedOpcode &&
12456         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
12457         N1->hasOneUse()) {
12458       SDValue N120 = N1.getOperand(2).getOperand(0);
12459       if (isContractableFMUL(N120) &&
12460           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12461                               N120.getValueType())) {
12462         SDValue N1200 = N120.getOperand(0);
12463         SDValue N1201 = N120.getOperand(1);
12464         return DAG.getNode(PreferredFusedOpcode, SL, VT,
12465                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
12466                            N1.getOperand(1),
12467                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12468                                        DAG.getNode(ISD::FNEG, SL, VT,
12469                                                    DAG.getNode(ISD::FP_EXTEND, SL,
12470                                                                VT, N1200)),
12471                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12472                                                    N1201),
12473                                        N0, Flags), Flags);
12474       }
12475     }
12476
12477     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12478     //   -> (fma (fneg (fpext y)), (fpext z),
12479     //           (fma (fneg (fpext u)), (fpext v), x))
12480     // FIXME: This turns two single-precision and one double-precision
12481     // operation into two double-precision operations, which might not be
12482     // interesting for all targets, especially GPUs.
12483     if (N1.getOpcode() == ISD::FP_EXTEND &&
12484         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
12485       SDValue CvtSrc = N1.getOperand(0);
12486       SDValue N100 = CvtSrc.getOperand(0);
12487       SDValue N101 = CvtSrc.getOperand(1);
12488       SDValue N102 = CvtSrc.getOperand(2);
12489       if (isContractableFMUL(N102) &&
12490           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12491                               CvtSrc.getValueType())) {
12492         SDValue N1020 = N102.getOperand(0);
12493         SDValue N1021 = N102.getOperand(1);
12494         return DAG.getNode(PreferredFusedOpcode, SL, VT,
12495                            DAG.getNode(ISD::FNEG, SL, VT,
12496                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12497                                                    N100)),
12498                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
12499                            DAG.getNode(PreferredFusedOpcode, SL, VT,
12500                                        DAG.getNode(ISD::FNEG, SL, VT,
12501                                                    DAG.getNode(ISD::FP_EXTEND, SL,
12502                                                                VT, N1020)),
12503                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
12504                                                    N1021),
12505                                        N0, Flags), Flags);
12506       }
12507     }
12508   }
12509
12510   return SDValue();
12511 }
12512
12513 /// Try to perform FMA combining on a given FMUL node based on the distributive
12514 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
12515 /// subtraction instead of addition).
12516 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
12517   SDValue N0 = N->getOperand(0);
12518   SDValue N1 = N->getOperand(1);
12519   EVT VT = N->getValueType(0);
12520   SDLoc SL(N);
12521   const SDNodeFlags Flags = N->getFlags();
12522
12523   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
12524
12525   const TargetOptions &Options = DAG.getTarget().Options;
12526
12527   // The transforms below are incorrect when x == 0 and y == inf, because the
12528   // intermediate multiplication produces a nan.
12529   if (!Options.NoInfsFPMath)
12530     return SDValue();
12531
12532   // Floating-point multiply-add without intermediate rounding.
12533   bool HasFMA =
12534       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
12535       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12536       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12537
12538   // Floating-point multiply-add with intermediate rounding. This can result
12539   // in a less precise result due to the changed rounding order.
12540   bool HasFMAD = Options.UnsafeFPMath &&
12541                  (LegalOperations && TLI.isFMADLegal(DAG, N));
12542
12543   // No valid opcode, do not combine.
12544   if (!HasFMAD && !HasFMA)
12545     return SDValue();
12546
12547   // Always prefer FMAD to FMA for precision.
12548   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12549   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12550
12551   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
12552   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
12553   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12554     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
12555       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
12556         if (C->isExactlyValue(+1.0))
12557           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12558                              Y, Flags);
12559         if (C->isExactlyValue(-1.0))
12560           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12561                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12562       }
12563     }
12564     return SDValue();
12565   };
12566
12567   if (SDValue FMA = FuseFADD(N0, N1, Flags))
12568     return FMA;
12569   if (SDValue FMA = FuseFADD(N1, N0, Flags))
12570     return FMA;
12571
12572   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
12573   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
12574   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
12575   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
12576   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12577     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
12578       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
12579         if (C0->isExactlyValue(+1.0))
12580           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12581                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12582                              Y, Flags);
12583         if (C0->isExactlyValue(-1.0))
12584           return DAG.getNode(PreferredFusedOpcode, SL, VT,
12585                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12586                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12587       }
12588       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
12589         if (C1->isExactlyValue(+1.0))
12590           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12591                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12592         if (C1->isExactlyValue(-1.0))
12593           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12594                              Y, Flags);
12595       }
12596     }
12597     return SDValue();
12598   };
12599
12600   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
12601     return FMA;
12602   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
12603     return FMA;
12604
12605   return SDValue();
12606 }
12607
12608 SDValue DAGCombiner::visitFADD(SDNode *N) {
12609   SDValue N0 = N->getOperand(0);
12610   SDValue N1 = N->getOperand(1);
12611   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12612   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12613   EVT VT = N->getValueType(0);
12614   SDLoc DL(N);
12615   const TargetOptions &Options = DAG.getTarget().Options;
12616   const SDNodeFlags Flags = N->getFlags();
12617
12618   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12619     return R;
12620
12621   // fold vector ops
12622   if (VT.isVector())
12623     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12624       return FoldedVOp;
12625
12626   // fold (fadd c1, c2) -> c1 + c2
12627   if (N0CFP && N1CFP)
12628     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
12629
12630   // canonicalize constant to RHS
12631   if (N0CFP && !N1CFP)
12632     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
12633
12634   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12635   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12636   if (N1C && N1C->isZero())
12637     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12638       return N0;
12639
12640   if (SDValue NewSel = foldBinOpIntoSelect(N))
12641     return NewSel;
12642
12643   // fold (fadd A, (fneg B)) -> (fsub A, B)
12644   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12645     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
12646             N1, DAG, LegalOperations, ForCodeSize))
12647       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
12648
12649   // fold (fadd (fneg A), B) -> (fsub B, A)
12650   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12651     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
12652             N0, DAG, LegalOperations, ForCodeSize))
12653       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
12654
12655   auto isFMulNegTwo = [](SDValue FMul) {
12656     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12657       return false;
12658     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12659     return C && C->isExactlyValue(-2.0);
12660   };
12661
12662   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12663   if (isFMulNegTwo(N0)) {
12664     SDValue B = N0.getOperand(0);
12665     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12666     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12667   }
12668   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12669   if (isFMulNegTwo(N1)) {
12670     SDValue B = N1.getOperand(0);
12671     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12672     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12673   }
12674
12675   // No FP constant should be created after legalization as Instruction
12676   // Selection pass has a hard time dealing with FP constants.
12677   bool AllowNewConst = (Level < AfterLegalizeDAG);
12678
12679   // If nnan is enabled, fold lots of things.
12680   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12681     // If allowed, fold (fadd (fneg x), x) -> 0.0
12682     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12683       return DAG.getConstantFP(0.0, DL, VT);
12684
12685     // If allowed, fold (fadd x, (fneg x)) -> 0.0
12686     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12687       return DAG.getConstantFP(0.0, DL, VT);
12688   }
12689
12690   // If 'unsafe math' or reassoc and nsz, fold lots of things.
12691   // TODO: break out portions of the transformations below for which Unsafe is
12692   //       considered and which do not require both nsz and reassoc
12693   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12694        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12695       AllowNewConst) {
12696     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12697     if (N1CFP && N0.getOpcode() == ISD::FADD &&
12698         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12699       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12700       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12701     }
12702
12703     // We can fold chains of FADD's of the same value into multiplications.
12704     // This transform is not safe in general because we are reducing the number
12705     // of rounding steps.
12706     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12707       if (N0.getOpcode() == ISD::FMUL) {
12708         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12709         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12710
12711         // (fadd (fmul x, c), x) -> (fmul x, c+1)
12712         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12713           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12714                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12715           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12716         }
12717
12718         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12719         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12720             N1.getOperand(0) == N1.getOperand(1) &&
12721             N0.getOperand(0) == N1.getOperand(0)) {
12722           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12723                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12724           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12725         }
12726       }
12727
12728       if (N1.getOpcode() == ISD::FMUL) {
12729         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12730         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12731
12732         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12733         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12734           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12735                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12736           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12737         }
12738
12739         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12740         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12741             N0.getOperand(0) == N0.getOperand(1) &&
12742             N1.getOperand(0) == N0.getOperand(0)) {
12743           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12744                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12745           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12746         }
12747       }
12748
12749       if (N0.getOpcode() == ISD::FADD) {
12750         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12751         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12752         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12753             (N0.getOperand(0) == N1)) {
12754           return DAG.getNode(ISD::FMUL, DL, VT,
12755                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12756         }
12757       }
12758
12759       if (N1.getOpcode() == ISD::FADD) {
12760         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12761         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12762         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12763             N1.getOperand(0) == N0) {
12764           return DAG.getNode(ISD::FMUL, DL, VT,
12765                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12766         }
12767       }
12768
12769       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12770       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12771           N0.getOperand(0) == N0.getOperand(1) &&
12772           N1.getOperand(0) == N1.getOperand(1) &&
12773           N0.getOperand(0) == N1.getOperand(0)) {
12774         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12775                            DAG.getConstantFP(4.0, DL, VT), Flags);
12776       }
12777     }
12778   } // enable-unsafe-fp-math
12779
12780   // FADD -> FMA combines:
12781   if (SDValue Fused = visitFADDForFMACombine(N)) {
12782     AddToWorklist(Fused.getNode());
12783     return Fused;
12784   }
12785   return SDValue();
12786 }
12787
12788 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12789   SDValue N0 = N->getOperand(0);
12790   SDValue N1 = N->getOperand(1);
12791   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12792   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12793   EVT VT = N->getValueType(0);
12794   SDLoc DL(N);
12795   const TargetOptions &Options = DAG.getTarget().Options;
12796   const SDNodeFlags Flags = N->getFlags();
12797
12798   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12799     return R;
12800
12801   // fold vector ops
12802   if (VT.isVector())
12803     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12804       return FoldedVOp;
12805
12806   // fold (fsub c1, c2) -> c1-c2
12807   if (N0CFP && N1CFP)
12808     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12809
12810   if (SDValue NewSel = foldBinOpIntoSelect(N))
12811     return NewSel;
12812
12813   // (fsub A, 0) -> A
12814   if (N1CFP && N1CFP->isZero()) {
12815     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12816         Flags.hasNoSignedZeros()) {
12817       return N0;
12818     }
12819   }
12820
12821   if (N0 == N1) {
12822     // (fsub x, x) -> 0.0
12823     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12824       return DAG.getConstantFP(0.0f, DL, VT);
12825   }
12826
12827   // (fsub -0.0, N1) -> -N1
12828   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12829   //       FSUB does not specify the sign bit of a NaN. Also note that for
12830   //       the same reason, the inverse transform is not safe, unless fast math
12831   //       flags are in play.
12832   if (N0CFP && N0CFP->isZero()) {
12833     if (N0CFP->isNegative() ||
12834         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12835       if (SDValue NegN1 =
12836               TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12837         return NegN1;
12838       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12839         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12840     }
12841   }
12842
12843   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12844        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12845       N1.getOpcode() == ISD::FADD) {
12846     // X - (X + Y) -> -Y
12847     if (N0 == N1->getOperand(0))
12848       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12849     // X - (Y + X) -> -Y
12850     if (N0 == N1->getOperand(1))
12851       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12852   }
12853
12854   // fold (fsub A, (fneg B)) -> (fadd A, B)
12855   if (SDValue NegN1 =
12856           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12857     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
12858
12859   // FSUB -> FMA combines:
12860   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12861     AddToWorklist(Fused.getNode());
12862     return Fused;
12863   }
12864
12865   return SDValue();
12866 }
12867
12868 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12869   SDValue N0 = N->getOperand(0);
12870   SDValue N1 = N->getOperand(1);
12871   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12872   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12873   EVT VT = N->getValueType(0);
12874   SDLoc DL(N);
12875   const TargetOptions &Options = DAG.getTarget().Options;
12876   const SDNodeFlags Flags = N->getFlags();
12877
12878   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12879     return R;
12880
12881   // fold vector ops
12882   if (VT.isVector()) {
12883     // This just handles C1 * C2 for vectors. Other vector folds are below.
12884     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12885       return FoldedVOp;
12886   }
12887
12888   // fold (fmul c1, c2) -> c1*c2
12889   if (N0CFP && N1CFP)
12890     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12891
12892   // canonicalize constant to RHS
12893   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12894      !isConstantFPBuildVectorOrConstantFP(N1))
12895     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12896
12897   if (SDValue NewSel = foldBinOpIntoSelect(N))
12898     return NewSel;
12899
12900   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12901       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12902     // fold (fmul A, 0) -> 0
12903     if (N1CFP && N1CFP->isZero())
12904       return N1;
12905   }
12906
12907   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12908     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12909     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12910         N0.getOpcode() == ISD::FMUL) {
12911       SDValue N00 = N0.getOperand(0);
12912       SDValue N01 = N0.getOperand(1);
12913       // Avoid an infinite loop by making sure that N00 is not a constant
12914       // (the inner multiply has not been constant folded yet).
12915       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12916           !isConstantFPBuildVectorOrConstantFP(N00)) {
12917         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12918         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12919       }
12920     }
12921
12922     // Match a special-case: we convert X * 2.0 into fadd.
12923     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12924     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12925         N0.getOperand(0) == N0.getOperand(1)) {
12926       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12927       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12928       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12929     }
12930   }
12931
12932   // fold (fmul X, 2.0) -> (fadd X, X)
12933   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12934     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12935
12936   // fold (fmul X, -1.0) -> (fneg X)
12937   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12938     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12939       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12940
12941   // -N0 * -N1 --> N0 * N1
12942   TargetLowering::NegatibleCost CostN0 =
12943       TargetLowering::NegatibleCost::Expensive;
12944   TargetLowering::NegatibleCost CostN1 =
12945       TargetLowering::NegatibleCost::Expensive;
12946   SDValue NegN0 =
12947       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
12948   SDValue NegN1 =
12949       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
12950   if (NegN0 && NegN1 &&
12951       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
12952        CostN1 == TargetLowering::NegatibleCost::Cheaper))
12953     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12954
12955   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12956   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12957   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12958       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12959       TLI.isOperationLegal(ISD::FABS, VT)) {
12960     SDValue Select = N0, X = N1;
12961     if (Select.getOpcode() != ISD::SELECT)
12962       std::swap(Select, X);
12963
12964     SDValue Cond = Select.getOperand(0);
12965     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12966     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12967
12968     if (TrueOpnd && FalseOpnd &&
12969         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12970         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12971         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12972       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12973       switch (CC) {
12974       default: break;
12975       case ISD::SETOLT:
12976       case ISD::SETULT:
12977       case ISD::SETOLE:
12978       case ISD::SETULE:
12979       case ISD::SETLT:
12980       case ISD::SETLE:
12981         std::swap(TrueOpnd, FalseOpnd);
12982         LLVM_FALLTHROUGH;
12983       case ISD::SETOGT:
12984       case ISD::SETUGT:
12985       case ISD::SETOGE:
12986       case ISD::SETUGE:
12987       case ISD::SETGT:
12988       case ISD::SETGE:
12989         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12990             TLI.isOperationLegal(ISD::FNEG, VT))
12991           return DAG.getNode(ISD::FNEG, DL, VT,
12992                    DAG.getNode(ISD::FABS, DL, VT, X));
12993         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12994           return DAG.getNode(ISD::FABS, DL, VT, X);
12995
12996         break;
12997       }
12998     }
12999   }
13000
13001   // FMUL -> FMA combines:
13002   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13003     AddToWorklist(Fused.getNode());
13004     return Fused;
13005   }
13006
13007   return SDValue();
13008 }
13009
13010 SDValue DAGCombiner::visitFMA(SDNode *N) {
13011   SDValue N0 = N->getOperand(0);
13012   SDValue N1 = N->getOperand(1);
13013   SDValue N2 = N->getOperand(2);
13014   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13015   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13016   EVT VT = N->getValueType(0);
13017   SDLoc DL(N);
13018   const TargetOptions &Options = DAG.getTarget().Options;
13019
13020   // FMA nodes have flags that propagate to the created nodes.
13021   const SDNodeFlags Flags = N->getFlags();
13022   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
13023
13024   // Constant fold FMA.
13025   if (isa<ConstantFPSDNode>(N0) &&
13026       isa<ConstantFPSDNode>(N1) &&
13027       isa<ConstantFPSDNode>(N2)) {
13028     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13029   }
13030
13031   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13032   TargetLowering::NegatibleCost CostN0 =
13033       TargetLowering::NegatibleCost::Expensive;
13034   TargetLowering::NegatibleCost CostN1 =
13035       TargetLowering::NegatibleCost::Expensive;
13036   SDValue NegN0 =
13037       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13038   SDValue NegN1 =
13039       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13040   if (NegN0 && NegN1 &&
13041       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13042        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13043     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
13044
13045   if (UnsafeFPMath) {
13046     if (N0CFP && N0CFP->isZero())
13047       return N2;
13048     if (N1CFP && N1CFP->isZero())
13049       return N2;
13050   }
13051   // TODO: The FMA node should have flags that propagate to these nodes.
13052   if (N0CFP && N0CFP->isExactlyValue(1.0))
13053     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13054   if (N1CFP && N1CFP->isExactlyValue(1.0))
13055     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13056
13057   // Canonicalize (fma c, x, y) -> (fma x, c, y)
13058   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13059      !isConstantFPBuildVectorOrConstantFP(N1))
13060     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13061
13062   if (UnsafeFPMath) {
13063     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13064     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13065         isConstantFPBuildVectorOrConstantFP(N1) &&
13066         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13067       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13068                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
13069                                      Flags), Flags);
13070     }
13071
13072     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13073     if (N0.getOpcode() == ISD::FMUL &&
13074         isConstantFPBuildVectorOrConstantFP(N1) &&
13075         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13076       return DAG.getNode(ISD::FMA, DL, VT,
13077                          N0.getOperand(0),
13078                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
13079                                      Flags),
13080                          N2);
13081     }
13082   }
13083
13084   // (fma x, 1, y) -> (fadd x, y)
13085   // (fma x, -1, y) -> (fadd (fneg x), y)
13086   if (N1CFP) {
13087     if (N1CFP->isExactlyValue(1.0))
13088       // TODO: The FMA node should have flags that propagate to this node.
13089       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13090
13091     if (N1CFP->isExactlyValue(-1.0) &&
13092         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13093       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13094       AddToWorklist(RHSNeg.getNode());
13095       // TODO: The FMA node should have flags that propagate to this node.
13096       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13097     }
13098
13099     // fma (fneg x), K, y -> fma x -K, y
13100     if (N0.getOpcode() == ISD::FNEG &&
13101         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13102          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13103                                               ForCodeSize)))) {
13104       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13105                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
13106     }
13107   }
13108
13109   if (UnsafeFPMath) {
13110     // (fma x, c, x) -> (fmul x, (c+1))
13111     if (N1CFP && N0 == N2) {
13112       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13113                          DAG.getNode(ISD::FADD, DL, VT, N1,
13114                                      DAG.getConstantFP(1.0, DL, VT), Flags),
13115                          Flags);
13116     }
13117
13118     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13119     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13120       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13121                          DAG.getNode(ISD::FADD, DL, VT, N1,
13122                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
13123                          Flags);
13124     }
13125   }
13126
13127   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13128   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13129   if (!TLI.isFNegFree(VT))
13130     if (SDValue Neg = TLI.getCheaperNegatedExpression(
13131             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13132       return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
13133   return SDValue();
13134 }
13135
13136 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13137 // reciprocal.
13138 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13139 // Notice that this is not always beneficial. One reason is different targets
13140 // may have different costs for FDIV and FMUL, so sometimes the cost of two
13141 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13142 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13143 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13144   // TODO: Limit this transform based on optsize/minsize - it always creates at
13145   //       least 1 extra instruction. But the perf win may be substantial enough
13146   //       that only minsize should restrict this.
13147   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13148   const SDNodeFlags Flags = N->getFlags();
13149   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13150     return SDValue();
13151
13152   // Skip if current node is a reciprocal/fneg-reciprocal.
13153   SDValue N0 = N->getOperand(0);
13154   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
13155   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
13156     return SDValue();
13157
13158   // Exit early if the target does not want this transform or if there can't
13159   // possibly be enough uses of the divisor to make the transform worthwhile.
13160   SDValue N1 = N->getOperand(1);
13161   unsigned MinUses = TLI.combineRepeatedFPDivisors();
13162
13163   // For splat vectors, scale the number of uses by the splat factor. If we can
13164   // convert the division into a scalar op, that will likely be much faster.
13165   unsigned NumElts = 1;
13166   EVT VT = N->getValueType(0);
13167   if (VT.isVector() && DAG.isSplatValue(N1))
13168     NumElts = VT.getVectorNumElements();
13169
13170   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
13171     return SDValue();
13172
13173   // Find all FDIV users of the same divisor.
13174   // Use a set because duplicates may be present in the user list.
13175   SetVector<SDNode *> Users;
13176   for (auto *U : N1->uses()) {
13177     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13178       // This division is eligible for optimization only if global unsafe math
13179       // is enabled or if this division allows reciprocal formation.
13180       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
13181         Users.insert(U);
13182     }
13183   }
13184
13185   // Now that we have the actual number of divisor uses, make sure it meets
13186   // the minimum threshold specified by the target.
13187   if ((Users.size() * NumElts) < MinUses)
13188     return SDValue();
13189
13190   SDLoc DL(N);
13191   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13192   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13193
13194   // Dividend / Divisor -> Dividend * Reciprocal
13195   for (auto *U : Users) {
13196     SDValue Dividend = U->getOperand(0);
13197     if (Dividend != FPOne) {
13198       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13199                                     Reciprocal, Flags);
13200       CombineTo(U, NewNode);
13201     } else if (U != Reciprocal.getNode()) {
13202       // In the absence of fast-math-flags, this user node is always the
13203       // same node as Reciprocal, but with FMF they may be different nodes.
13204       CombineTo(U, Reciprocal);
13205     }
13206   }
13207   return SDValue(N, 0);  // N was replaced.
13208 }
13209
13210 SDValue DAGCombiner::visitFDIV(SDNode *N) {
13211   SDValue N0 = N->getOperand(0);
13212   SDValue N1 = N->getOperand(1);
13213   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13214   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13215   EVT VT = N->getValueType(0);
13216   SDLoc DL(N);
13217   const TargetOptions &Options = DAG.getTarget().Options;
13218   SDNodeFlags Flags = N->getFlags();
13219
13220   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13221     return R;
13222
13223   // fold vector ops
13224   if (VT.isVector())
13225     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13226       return FoldedVOp;
13227
13228   // fold (fdiv c1, c2) -> c1/c2
13229   if (N0CFP && N1CFP)
13230     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
13231
13232   if (SDValue NewSel = foldBinOpIntoSelect(N))
13233     return NewSel;
13234
13235   if (SDValue V = combineRepeatedFPDivisors(N))
13236     return V;
13237
13238   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
13239     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
13240     if (N1CFP) {
13241       // Compute the reciprocal 1.0 / c2.
13242       const APFloat &N1APF = N1CFP->getValueAPF();
13243       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
13244       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
13245       // Only do the transform if the reciprocal is a legal fp immediate that
13246       // isn't too nasty (eg NaN, denormal, ...).
13247       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
13248           (!LegalOperations ||
13249            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
13250            // backend)... we should handle this gracefully after Legalize.
13251            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
13252            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13253            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
13254         return DAG.getNode(ISD::FMUL, DL, VT, N0,
13255                            DAG.getConstantFP(Recip, DL, VT), Flags);
13256     }
13257
13258     // If this FDIV is part of a reciprocal square root, it may be folded
13259     // into a target-specific square root estimate instruction.
13260     if (N1.getOpcode() == ISD::FSQRT) {
13261       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
13262         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13263     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
13264                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13265       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13266                                           Flags)) {
13267         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
13268         AddToWorklist(RV.getNode());
13269         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13270       }
13271     } else if (N1.getOpcode() == ISD::FP_ROUND &&
13272                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13273       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13274                                           Flags)) {
13275         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
13276         AddToWorklist(RV.getNode());
13277         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13278       }
13279     } else if (N1.getOpcode() == ISD::FMUL) {
13280       // Look through an FMUL. Even though this won't remove the FDIV directly,
13281       // it's still worthwhile to get rid of the FSQRT if possible.
13282       SDValue Sqrt, Y;
13283       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13284         Sqrt = N1.getOperand(0);
13285         Y = N1.getOperand(1);
13286       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
13287         Sqrt = N1.getOperand(1);
13288         Y = N1.getOperand(0);
13289       }
13290       if (Sqrt.getNode()) {
13291         // If the other multiply operand is known positive, pull it into the
13292         // sqrt. That will eliminate the division if we convert to an estimate:
13293         // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
13294         // TODO: Also fold the case where A == Z (fabs is missing).
13295         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
13296             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
13297             Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
13298           SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
13299                                    Y.getOperand(0), Flags);
13300           SDValue AAZ =
13301               DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
13302           if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
13303             return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
13304
13305           // Estimate creation failed. Clean up speculatively created nodes.
13306           recursivelyDeleteUnusedNodes(AAZ.getNode());
13307         }
13308
13309         // We found a FSQRT, so try to make this fold:
13310         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
13311         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
13312           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
13313           AddToWorklist(Div.getNode());
13314           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
13315         }
13316       }
13317     }
13318
13319     // Fold into a reciprocal estimate and multiply instead of a real divide.
13320     if (Options.NoInfsFPMath || Flags.hasNoInfs())
13321       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
13322         return RV;
13323   }
13324
13325   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
13326   TargetLowering::NegatibleCost CostN0 =
13327       TargetLowering::NegatibleCost::Expensive;
13328   TargetLowering::NegatibleCost CostN1 =
13329       TargetLowering::NegatibleCost::Expensive;
13330   SDValue NegN0 =
13331       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13332   SDValue NegN1 =
13333       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13334   if (NegN0 && NegN1 &&
13335       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13336        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13337     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
13338
13339   return SDValue();
13340 }
13341
13342 SDValue DAGCombiner::visitFREM(SDNode *N) {
13343   SDValue N0 = N->getOperand(0);
13344   SDValue N1 = N->getOperand(1);
13345   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13346   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13347   EVT VT = N->getValueType(0);
13348   SDNodeFlags Flags = N->getFlags();
13349
13350   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13351     return R;
13352
13353   // fold (frem c1, c2) -> fmod(c1,c2)
13354   if (N0CFP && N1CFP)
13355     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
13356
13357   if (SDValue NewSel = foldBinOpIntoSelect(N))
13358     return NewSel;
13359
13360   return SDValue();
13361 }
13362
13363 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
13364   SDNodeFlags Flags = N->getFlags();
13365   const TargetOptions &Options = DAG.getTarget().Options;
13366
13367   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
13368   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
13369   if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
13370       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
13371     return SDValue();
13372
13373   SDValue N0 = N->getOperand(0);
13374   if (TLI.isFsqrtCheap(N0, DAG))
13375     return SDValue();
13376
13377   // FSQRT nodes have flags that propagate to the created nodes.
13378   return buildSqrtEstimate(N0, Flags);
13379 }
13380
13381 /// copysign(x, fp_extend(y)) -> copysign(x, y)
13382 /// copysign(x, fp_round(y)) -> copysign(x, y)
13383 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
13384   SDValue N1 = N->getOperand(1);
13385   if ((N1.getOpcode() == ISD::FP_EXTEND ||
13386        N1.getOpcode() == ISD::FP_ROUND)) {
13387     // Do not optimize out type conversion of f128 type yet.
13388     // For some targets like x86_64, configuration is changed to keep one f128
13389     // value in one SSE register, but instruction selection cannot handle
13390     // FCOPYSIGN on SSE registers yet.
13391     EVT N1VT = N1->getValueType(0);
13392     EVT N1Op0VT = N1->getOperand(0).getValueType();
13393     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
13394   }
13395   return false;
13396 }
13397
13398 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
13399   SDValue N0 = N->getOperand(0);
13400   SDValue N1 = N->getOperand(1);
13401   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13402   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13403   EVT VT = N->getValueType(0);
13404
13405   if (N0CFP && N1CFP) // Constant fold
13406     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
13407
13408   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
13409     const APFloat &V = N1C->getValueAPF();
13410     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
13411     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
13412     if (!V.isNegative()) {
13413       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
13414         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13415     } else {
13416       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13417         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
13418                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
13419     }
13420   }
13421
13422   // copysign(fabs(x), y) -> copysign(x, y)
13423   // copysign(fneg(x), y) -> copysign(x, y)
13424   // copysign(copysign(x,z), y) -> copysign(x, y)
13425   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
13426       N0.getOpcode() == ISD::FCOPYSIGN)
13427     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
13428
13429   // copysign(x, abs(y)) -> abs(x)
13430   if (N1.getOpcode() == ISD::FABS)
13431     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13432
13433   // copysign(x, copysign(y,z)) -> copysign(x, z)
13434   if (N1.getOpcode() == ISD::FCOPYSIGN)
13435     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
13436
13437   // copysign(x, fp_extend(y)) -> copysign(x, y)
13438   // copysign(x, fp_round(y)) -> copysign(x, y)
13439   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
13440     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
13441
13442   return SDValue();
13443 }
13444
13445 SDValue DAGCombiner::visitFPOW(SDNode *N) {
13446   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
13447   if (!ExponentC)
13448     return SDValue();
13449
13450   // Try to convert x ** (1/3) into cube root.
13451   // TODO: Handle the various flavors of long double.
13452   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
13453   //       Some range near 1/3 should be fine.
13454   EVT VT = N->getValueType(0);
13455   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
13456       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
13457     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
13458     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
13459     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
13460     // For regular numbers, rounding may cause the results to differ.
13461     // Therefore, we require { nsz ninf nnan afn } for this transform.
13462     // TODO: We could select out the special cases if we don't have nsz/ninf.
13463     SDNodeFlags Flags = N->getFlags();
13464     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
13465         !Flags.hasApproximateFuncs())
13466       return SDValue();
13467
13468     // Do not create a cbrt() libcall if the target does not have it, and do not
13469     // turn a pow that has lowering support into a cbrt() libcall.
13470     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
13471         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
13472          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
13473       return SDValue();
13474
13475     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
13476   }
13477
13478   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
13479   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
13480   // TODO: This could be extended (using a target hook) to handle smaller
13481   // power-of-2 fractional exponents.
13482   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
13483   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
13484   if (ExponentIs025 || ExponentIs075) {
13485     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
13486     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
13487     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
13488     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
13489     // For regular numbers, rounding may cause the results to differ.
13490     // Therefore, we require { nsz ninf afn } for this transform.
13491     // TODO: We could select out the special cases if we don't have nsz/ninf.
13492     SDNodeFlags Flags = N->getFlags();
13493
13494     // We only need no signed zeros for the 0.25 case.
13495     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
13496         !Flags.hasApproximateFuncs())
13497       return SDValue();
13498
13499     // Don't double the number of libcalls. We are trying to inline fast code.
13500     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
13501       return SDValue();
13502
13503     // Assume that libcalls are the smallest code.
13504     // TODO: This restriction should probably be lifted for vectors.
13505     if (ForCodeSize)
13506       return SDValue();
13507
13508     // pow(X, 0.25) --> sqrt(sqrt(X))
13509     SDLoc DL(N);
13510     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
13511     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
13512     if (ExponentIs025)
13513       return SqrtSqrt;
13514     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
13515     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
13516   }
13517
13518   return SDValue();
13519 }
13520
13521 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
13522                                const TargetLowering &TLI) {
13523   // This optimization is guarded by a function attribute because it may produce
13524   // unexpected results. Ie, programs may be relying on the platform-specific
13525   // undefined behavior when the float-to-int conversion overflows.
13526   const Function &F = DAG.getMachineFunction().getFunction();
13527   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
13528   if (StrictOverflow.getValueAsString().equals("false"))
13529     return SDValue();
13530
13531   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
13532   // replacing casts with a libcall. We also must be allowed to ignore -0.0
13533   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
13534   // conversions would return +0.0.
13535   // FIXME: We should be able to use node-level FMF here.
13536   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
13537   EVT VT = N->getValueType(0);
13538   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
13539       !DAG.getTarget().Options.NoSignedZerosFPMath)
13540     return SDValue();
13541
13542   // fptosi/fptoui round towards zero, so converting from FP to integer and
13543   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
13544   SDValue N0 = N->getOperand(0);
13545   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
13546       N0.getOperand(0).getValueType() == VT)
13547     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13548
13549   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
13550       N0.getOperand(0).getValueType() == VT)
13551     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13552
13553   return SDValue();
13554 }
13555
13556 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
13557   SDValue N0 = N->getOperand(0);
13558   EVT VT = N->getValueType(0);
13559   EVT OpVT = N0.getValueType();
13560
13561   // [us]itofp(undef) = 0, because the result value is bounded.
13562   if (N0.isUndef())
13563     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13564
13565   // fold (sint_to_fp c1) -> c1fp
13566   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13567       // ...but only if the target supports immediate floating-point values
13568       (!LegalOperations ||
13569        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13570     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13571
13572   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
13573   // but UINT_TO_FP is legal on this target, try to convert.
13574   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
13575       hasOperation(ISD::UINT_TO_FP, OpVT)) {
13576     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
13577     if (DAG.SignBitIsZero(N0))
13578       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13579   }
13580
13581   // The next optimizations are desirable only if SELECT_CC can be lowered.
13582   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
13583   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
13584       !VT.isVector() &&
13585       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13586     SDLoc DL(N);
13587     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
13588                          DAG.getConstantFP(0.0, DL, VT));
13589   }
13590
13591   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
13592   //      (select (setcc x, y, cc), 1.0, 0.0)
13593   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
13594       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
13595       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13596     SDLoc DL(N);
13597     return DAG.getSelect(DL, VT, N0.getOperand(0),
13598                          DAG.getConstantFP(1.0, DL, VT),
13599                          DAG.getConstantFP(0.0, DL, VT));
13600   }
13601
13602   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13603     return FTrunc;
13604
13605   return SDValue();
13606 }
13607
13608 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
13609   SDValue N0 = N->getOperand(0);
13610   EVT VT = N->getValueType(0);
13611   EVT OpVT = N0.getValueType();
13612
13613   // [us]itofp(undef) = 0, because the result value is bounded.
13614   if (N0.isUndef())
13615     return DAG.getConstantFP(0.0, SDLoc(N), VT);
13616
13617   // fold (uint_to_fp c1) -> c1fp
13618   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13619       // ...but only if the target supports immediate floating-point values
13620       (!LegalOperations ||
13621        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13622     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13623
13624   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
13625   // but SINT_TO_FP is legal on this target, try to convert.
13626   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
13627       hasOperation(ISD::SINT_TO_FP, OpVT)) {
13628     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
13629     if (DAG.SignBitIsZero(N0))
13630       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13631   }
13632
13633   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
13634   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
13635       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13636     SDLoc DL(N);
13637     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
13638                          DAG.getConstantFP(0.0, DL, VT));
13639   }
13640
13641   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13642     return FTrunc;
13643
13644   return SDValue();
13645 }
13646
13647 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13648 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13649   SDValue N0 = N->getOperand(0);
13650   EVT VT = N->getValueType(0);
13651
13652   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13653     return SDValue();
13654
13655   SDValue Src = N0.getOperand(0);
13656   EVT SrcVT = Src.getValueType();
13657   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13658   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13659
13660   // We can safely assume the conversion won't overflow the output range,
13661   // because (for example) (uint8_t)18293.f is undefined behavior.
13662
13663   // Since we can assume the conversion won't overflow, our decision as to
13664   // whether the input will fit in the float should depend on the minimum
13665   // of the input range and output range.
13666
13667   // This means this is also safe for a signed input and unsigned output, since
13668   // a negative input would lead to undefined behavior.
13669   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13670   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13671   unsigned ActualSize = std::min(InputSize, OutputSize);
13672   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13673
13674   // We can only fold away the float conversion if the input range can be
13675   // represented exactly in the float range.
13676   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13677     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13678       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13679                                                        : ISD::ZERO_EXTEND;
13680       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13681     }
13682     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13683       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13684     return DAG.getBitcast(VT, Src);
13685   }
13686   return SDValue();
13687 }
13688
13689 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13690   SDValue N0 = N->getOperand(0);
13691   EVT VT = N->getValueType(0);
13692
13693   // fold (fp_to_sint undef) -> undef
13694   if (N0.isUndef())
13695     return DAG.getUNDEF(VT);
13696
13697   // fold (fp_to_sint c1fp) -> c1
13698   if (isConstantFPBuildVectorOrConstantFP(N0))
13699     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13700
13701   return FoldIntToFPToInt(N, DAG);
13702 }
13703
13704 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13705   SDValue N0 = N->getOperand(0);
13706   EVT VT = N->getValueType(0);
13707
13708   // fold (fp_to_uint undef) -> undef
13709   if (N0.isUndef())
13710     return DAG.getUNDEF(VT);
13711
13712   // fold (fp_to_uint c1fp) -> c1
13713   if (isConstantFPBuildVectorOrConstantFP(N0))
13714     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13715
13716   return FoldIntToFPToInt(N, DAG);
13717 }
13718
13719 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13720   SDValue N0 = N->getOperand(0);
13721   SDValue N1 = N->getOperand(1);
13722   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13723   EVT VT = N->getValueType(0);
13724
13725   // fold (fp_round c1fp) -> c1fp
13726   if (N0CFP)
13727     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13728
13729   // fold (fp_round (fp_extend x)) -> x
13730   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13731     return N0.getOperand(0);
13732
13733   // fold (fp_round (fp_round x)) -> (fp_round x)
13734   if (N0.getOpcode() == ISD::FP_ROUND) {
13735     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13736     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13737
13738     // Skip this folding if it results in an fp_round from f80 to f16.
13739     //
13740     // f80 to f16 always generates an expensive (and as yet, unimplemented)
13741     // libcall to __truncxfhf2 instead of selecting native f16 conversion
13742     // instructions from f32 or f64.  Moreover, the first (value-preserving)
13743     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13744     // x86.
13745     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13746       return SDValue();
13747
13748     // If the first fp_round isn't a value preserving truncation, it might
13749     // introduce a tie in the second fp_round, that wouldn't occur in the
13750     // single-step fp_round we want to fold to.
13751     // In other words, double rounding isn't the same as rounding.
13752     // Also, this is a value preserving truncation iff both fp_round's are.
13753     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13754       SDLoc DL(N);
13755       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13756                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13757     }
13758   }
13759
13760   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13761   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13762     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13763                               N0.getOperand(0), N1);
13764     AddToWorklist(Tmp.getNode());
13765     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13766                        Tmp, N0.getOperand(1));
13767   }
13768
13769   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13770     return NewVSel;
13771
13772   return SDValue();
13773 }
13774
13775 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13776   SDValue N0 = N->getOperand(0);
13777   EVT VT = N->getValueType(0);
13778
13779   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13780   if (N->hasOneUse() &&
13781       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13782     return SDValue();
13783
13784   // fold (fp_extend c1fp) -> c1fp
13785   if (isConstantFPBuildVectorOrConstantFP(N0))
13786     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13787
13788   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13789   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13790       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13791     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13792
13793   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13794   // value of X.
13795   if (N0.getOpcode() == ISD::FP_ROUND
13796       && N0.getConstantOperandVal(1) == 1) {
13797     SDValue In = N0.getOperand(0);
13798     if (In.getValueType() == VT) return In;
13799     if (VT.bitsLT(In.getValueType()))
13800       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13801                          In, N0.getOperand(1));
13802     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13803   }
13804
13805   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13806   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13807        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13808     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13809     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13810                                      LN0->getChain(),
13811                                      LN0->getBasePtr(), N0.getValueType(),
13812                                      LN0->getMemOperand());
13813     CombineTo(N, ExtLoad);
13814     CombineTo(N0.getNode(),
13815               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13816                           N0.getValueType(), ExtLoad,
13817                           DAG.getIntPtrConstant(1, SDLoc(N0))),
13818               ExtLoad.getValue(1));
13819     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13820   }
13821
13822   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13823     return NewVSel;
13824
13825   return SDValue();
13826 }
13827
13828 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13829   SDValue N0 = N->getOperand(0);
13830   EVT VT = N->getValueType(0);
13831
13832   // fold (fceil c1) -> fceil(c1)
13833   if (isConstantFPBuildVectorOrConstantFP(N0))
13834     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13835
13836   return SDValue();
13837 }
13838
13839 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13840   SDValue N0 = N->getOperand(0);
13841   EVT VT = N->getValueType(0);
13842
13843   // fold (ftrunc c1) -> ftrunc(c1)
13844   if (isConstantFPBuildVectorOrConstantFP(N0))
13845     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13846
13847   // fold ftrunc (known rounded int x) -> x
13848   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13849   // likely to be generated to extract integer from a rounded floating value.
13850   switch (N0.getOpcode()) {
13851   default: break;
13852   case ISD::FRINT:
13853   case ISD::FTRUNC:
13854   case ISD::FNEARBYINT:
13855   case ISD::FFLOOR:
13856   case ISD::FCEIL:
13857     return N0;
13858   }
13859
13860   return SDValue();
13861 }
13862
13863 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13864   SDValue N0 = N->getOperand(0);
13865   EVT VT = N->getValueType(0);
13866
13867   // fold (ffloor c1) -> ffloor(c1)
13868   if (isConstantFPBuildVectorOrConstantFP(N0))
13869     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13870
13871   return SDValue();
13872 }
13873
13874 // FIXME: FNEG and FABS have a lot in common; refactor.
13875 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13876   SDValue N0 = N->getOperand(0);
13877   EVT VT = N->getValueType(0);
13878
13879   // Constant fold FNEG.
13880   if (isConstantFPBuildVectorOrConstantFP(N0))
13881     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13882
13883   if (SDValue NegN0 =
13884           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
13885     return NegN0;
13886
13887   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
13888   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
13889   // know it was called from a context with a nsz flag if the input fsub does
13890   // not.
13891   if (N0.getOpcode() == ISD::FSUB &&
13892       (DAG.getTarget().Options.NoSignedZerosFPMath ||
13893        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
13894     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
13895                        N0.getOperand(0), N->getFlags());
13896   }
13897
13898   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13899   // constant pool values.
13900   if (!TLI.isFNegFree(VT) &&
13901       N0.getOpcode() == ISD::BITCAST &&
13902       N0.getNode()->hasOneUse()) {
13903     SDValue Int = N0.getOperand(0);
13904     EVT IntVT = Int.getValueType();
13905     if (IntVT.isInteger() && !IntVT.isVector()) {
13906       APInt SignMask;
13907       if (N0.getValueType().isVector()) {
13908         // For a vector, get a mask such as 0x80... per scalar element
13909         // and splat it.
13910         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13911         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13912       } else {
13913         // For a scalar, just generate 0x80...
13914         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13915       }
13916       SDLoc DL0(N0);
13917       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13918                         DAG.getConstant(SignMask, DL0, IntVT));
13919       AddToWorklist(Int.getNode());
13920       return DAG.getBitcast(VT, Int);
13921     }
13922   }
13923
13924   // (fneg (fmul c, x)) -> (fmul -c, x)
13925   if (N0.getOpcode() == ISD::FMUL &&
13926       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13927     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13928     if (CFP1) {
13929       APFloat CVal = CFP1->getValueAPF();
13930       CVal.changeSign();
13931       if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13932                        TLI.isOperationLegal(ISD::ConstantFP, VT)))
13933         return DAG.getNode(
13934             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13935             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13936             N0->getFlags());
13937     }
13938   }
13939
13940   return SDValue();
13941 }
13942
13943 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13944                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13945   SDValue N0 = N->getOperand(0);
13946   SDValue N1 = N->getOperand(1);
13947   EVT VT = N->getValueType(0);
13948   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13949   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13950
13951   if (N0CFP && N1CFP) {
13952     const APFloat &C0 = N0CFP->getValueAPF();
13953     const APFloat &C1 = N1CFP->getValueAPF();
13954     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13955   }
13956
13957   // Canonicalize to constant on RHS.
13958   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13959       !isConstantFPBuildVectorOrConstantFP(N1))
13960     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13961
13962   return SDValue();
13963 }
13964
13965 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13966   return visitFMinMax(DAG, N, minnum);
13967 }
13968
13969 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13970   return visitFMinMax(DAG, N, maxnum);
13971 }
13972
13973 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13974   return visitFMinMax(DAG, N, minimum);
13975 }
13976
13977 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13978   return visitFMinMax(DAG, N, maximum);
13979 }
13980
13981 SDValue DAGCombiner::visitFABS(SDNode *N) {
13982   SDValue N0 = N->getOperand(0);
13983   EVT VT = N->getValueType(0);
13984
13985   // fold (fabs c1) -> fabs(c1)
13986   if (isConstantFPBuildVectorOrConstantFP(N0))
13987     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13988
13989   // fold (fabs (fabs x)) -> (fabs x)
13990   if (N0.getOpcode() == ISD::FABS)
13991     return N->getOperand(0);
13992
13993   // fold (fabs (fneg x)) -> (fabs x)
13994   // fold (fabs (fcopysign x, y)) -> (fabs x)
13995   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13996     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13997
13998   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13999   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
14000     SDValue Int = N0.getOperand(0);
14001     EVT IntVT = Int.getValueType();
14002     if (IntVT.isInteger() && !IntVT.isVector()) {
14003       APInt SignMask;
14004       if (N0.getValueType().isVector()) {
14005         // For a vector, get a mask such as 0x7f... per scalar element
14006         // and splat it.
14007         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
14008         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
14009       } else {
14010         // For a scalar, just generate 0x7f...
14011         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
14012       }
14013       SDLoc DL(N0);
14014       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
14015                         DAG.getConstant(SignMask, DL, IntVT));
14016       AddToWorklist(Int.getNode());
14017       return DAG.getBitcast(N->getValueType(0), Int);
14018     }
14019   }
14020
14021   return SDValue();
14022 }
14023
14024 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14025   SDValue Chain = N->getOperand(0);
14026   SDValue N1 = N->getOperand(1);
14027   SDValue N2 = N->getOperand(2);
14028
14029   // If N is a constant we could fold this into a fallthrough or unconditional
14030   // branch. However that doesn't happen very often in normal code, because
14031   // Instcombine/SimplifyCFG should have handled the available opportunities.
14032   // If we did this folding here, it would be necessary to update the
14033   // MachineBasicBlock CFG, which is awkward.
14034
14035   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14036   // on the target.
14037   if (N1.getOpcode() == ISD::SETCC &&
14038       TLI.isOperationLegalOrCustom(ISD::BR_CC,
14039                                    N1.getOperand(0).getValueType())) {
14040     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14041                        Chain, N1.getOperand(2),
14042                        N1.getOperand(0), N1.getOperand(1), N2);
14043   }
14044
14045   if (N1.hasOneUse()) {
14046     // rebuildSetCC calls visitXor which may change the Chain when there is a
14047     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14048     HandleSDNode ChainHandle(Chain);
14049     if (SDValue NewN1 = rebuildSetCC(N1))
14050       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14051                          ChainHandle.getValue(), NewN1, N2);
14052   }
14053
14054   return SDValue();
14055 }
14056
14057 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14058   if (N.getOpcode() == ISD::SRL ||
14059       (N.getOpcode() == ISD::TRUNCATE &&
14060        (N.getOperand(0).hasOneUse() &&
14061         N.getOperand(0).getOpcode() == ISD::SRL))) {
14062     // Look pass the truncate.
14063     if (N.getOpcode() == ISD::TRUNCATE)
14064       N = N.getOperand(0);
14065
14066     // Match this pattern so that we can generate simpler code:
14067     //
14068     //   %a = ...
14069     //   %b = and i32 %a, 2
14070     //   %c = srl i32 %b, 1
14071     //   brcond i32 %c ...
14072     //
14073     // into
14074     //
14075     //   %a = ...
14076     //   %b = and i32 %a, 2
14077     //   %c = setcc eq %b, 0
14078     //   brcond %c ...
14079     //
14080     // This applies only when the AND constant value has one bit set and the
14081     // SRL constant is equal to the log2 of the AND constant. The back-end is
14082     // smart enough to convert the result into a TEST/JMP sequence.
14083     SDValue Op0 = N.getOperand(0);
14084     SDValue Op1 = N.getOperand(1);
14085
14086     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14087       SDValue AndOp1 = Op0.getOperand(1);
14088
14089       if (AndOp1.getOpcode() == ISD::Constant) {
14090         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14091
14092         if (AndConst.isPowerOf2() &&
14093             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14094           SDLoc DL(N);
14095           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14096                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14097                               ISD::SETNE);
14098         }
14099       }
14100     }
14101   }
14102
14103   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14104   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14105   if (N.getOpcode() == ISD::XOR) {
14106     // Because we may call this on a speculatively constructed
14107     // SimplifiedSetCC Node, we need to simplify this node first.
14108     // Ideally this should be folded into SimplifySetCC and not
14109     // here. For now, grab a handle to N so we don't lose it from
14110     // replacements interal to the visit.
14111     HandleSDNode XORHandle(N);
14112     while (N.getOpcode() == ISD::XOR) {
14113       SDValue Tmp = visitXOR(N.getNode());
14114       // No simplification done.
14115       if (!Tmp.getNode())
14116         break;
14117       // Returning N is form in-visit replacement that may invalidated
14118       // N. Grab value from Handle.
14119       if (Tmp.getNode() == N.getNode())
14120         N = XORHandle.getValue();
14121       else // Node simplified. Try simplifying again.
14122         N = Tmp;
14123     }
14124
14125     if (N.getOpcode() != ISD::XOR)
14126       return N;
14127
14128     SDValue Op0 = N->getOperand(0);
14129     SDValue Op1 = N->getOperand(1);
14130
14131     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14132       bool Equal = false;
14133       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14134       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14135           Op0.getValueType() == MVT::i1) {
14136         N = Op0;
14137         Op0 = N->getOperand(0);
14138         Op1 = N->getOperand(1);
14139         Equal = true;
14140       }
14141
14142       EVT SetCCVT = N.getValueType();
14143       if (LegalTypes)
14144         SetCCVT = getSetCCResultType(SetCCVT);
14145       // Replace the uses of XOR with SETCC
14146       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14147                           Equal ? ISD::SETEQ : ISD::SETNE);
14148     }
14149   }
14150
14151   return SDValue();
14152 }
14153
14154 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14155 //
14156 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14157   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14158   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14159
14160   // If N is a constant we could fold this into a fallthrough or unconditional
14161   // branch. However that doesn't happen very often in normal code, because
14162   // Instcombine/SimplifyCFG should have handled the available opportunities.
14163   // If we did this folding here, it would be necessary to update the
14164   // MachineBasicBlock CFG, which is awkward.
14165
14166   // Use SimplifySetCC to simplify SETCC's.
14167   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14168                                CondLHS, CondRHS, CC->get(), SDLoc(N),
14169                                false);
14170   if (Simp.getNode()) AddToWorklist(Simp.getNode());
14171
14172   // fold to a simpler setcc
14173   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14174     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14175                        N->getOperand(0), Simp.getOperand(2),
14176                        Simp.getOperand(0), Simp.getOperand(1),
14177                        N->getOperand(4));
14178
14179   return SDValue();
14180 }
14181
14182 /// Return true if 'Use' is a load or a store that uses N as its base pointer
14183 /// and that N may be folded in the load / store addressing mode.
14184 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
14185                                     SelectionDAG &DAG,
14186                                     const TargetLowering &TLI) {
14187   EVT VT;
14188   unsigned AS;
14189
14190   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
14191     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14192       return false;
14193     VT = LD->getMemoryVT();
14194     AS = LD->getAddressSpace();
14195   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
14196     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14197       return false;
14198     VT = ST->getMemoryVT();
14199     AS = ST->getAddressSpace();
14200   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
14201     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14202       return false;
14203     VT = LD->getMemoryVT();
14204     AS = LD->getAddressSpace();
14205   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
14206     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14207       return false;
14208     VT = ST->getMemoryVT();
14209     AS = ST->getAddressSpace();
14210   } else
14211     return false;
14212
14213   TargetLowering::AddrMode AM;
14214   if (N->getOpcode() == ISD::ADD) {
14215     AM.HasBaseReg = true;
14216     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14217     if (Offset)
14218       // [reg +/- imm]
14219       AM.BaseOffs = Offset->getSExtValue();
14220     else
14221       // [reg +/- reg]
14222       AM.Scale = 1;
14223   } else if (N->getOpcode() == ISD::SUB) {
14224     AM.HasBaseReg = true;
14225     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14226     if (Offset)
14227       // [reg +/- imm]
14228       AM.BaseOffs = -Offset->getSExtValue();
14229     else
14230       // [reg +/- reg]
14231       AM.Scale = 1;
14232   } else
14233     return false;
14234
14235   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
14236                                    VT.getTypeForEVT(*DAG.getContext()), AS);
14237 }
14238
14239 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14240                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14241                                      const TargetLowering &TLI) {
14242   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14243     if (LD->isIndexed())
14244       return false;
14245     EVT VT = LD->getMemoryVT();
14246     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14247       return false;
14248     Ptr = LD->getBasePtr();
14249   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14250     if (ST->isIndexed())
14251       return false;
14252     EVT VT = ST->getMemoryVT();
14253     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14254       return false;
14255     Ptr = ST->getBasePtr();
14256     IsLoad = false;
14257   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14258     if (LD->isIndexed())
14259       return false;
14260     EVT VT = LD->getMemoryVT();
14261     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14262         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
14263       return false;
14264     Ptr = LD->getBasePtr();
14265     IsMasked = true;
14266   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14267     if (ST->isIndexed())
14268       return false;
14269     EVT VT = ST->getMemoryVT();
14270     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14271         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
14272       return false;
14273     Ptr = ST->getBasePtr();
14274     IsLoad = false;
14275     IsMasked = true;
14276   } else {
14277     return false;
14278   }
14279   return true;
14280 }
14281
14282 /// Try turning a load/store into a pre-indexed load/store when the base
14283 /// pointer is an add or subtract and it has other uses besides the load/store.
14284 /// After the transformation, the new indexed load/store has effectively folded
14285 /// the add/subtract in and all of its other uses are redirected to the
14286 /// new load/store.
14287 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
14288   if (Level < AfterLegalizeDAG)
14289     return false;
14290
14291   bool IsLoad = true;
14292   bool IsMasked = false;
14293   SDValue Ptr;
14294   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
14295                                 Ptr, TLI))
14296     return false;
14297
14298   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
14299   // out.  There is no reason to make this a preinc/predec.
14300   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
14301       Ptr.getNode()->hasOneUse())
14302     return false;
14303
14304   // Ask the target to do addressing mode selection.
14305   SDValue BasePtr;
14306   SDValue Offset;
14307   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14308   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
14309     return false;
14310
14311   // Backends without true r+i pre-indexed forms may need to pass a
14312   // constant base with a variable offset so that constant coercion
14313   // will work with the patterns in canonical form.
14314   bool Swapped = false;
14315   if (isa<ConstantSDNode>(BasePtr)) {
14316     std::swap(BasePtr, Offset);
14317     Swapped = true;
14318   }
14319
14320   // Don't create a indexed load / store with zero offset.
14321   if (isNullConstant(Offset))
14322     return false;
14323
14324   // Try turning it into a pre-indexed load / store except when:
14325   // 1) The new base ptr is a frame index.
14326   // 2) If N is a store and the new base ptr is either the same as or is a
14327   //    predecessor of the value being stored.
14328   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
14329   //    that would create a cycle.
14330   // 4) All uses are load / store ops that use it as old base ptr.
14331
14332   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
14333   // (plus the implicit offset) to a register to preinc anyway.
14334   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14335     return false;
14336
14337   // Check #2.
14338   if (!IsLoad) {
14339     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
14340                            : cast<StoreSDNode>(N)->getValue();
14341
14342     // Would require a copy.
14343     if (Val == BasePtr)
14344       return false;
14345
14346     // Would create a cycle.
14347     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
14348       return false;
14349   }
14350
14351   // Caches for hasPredecessorHelper.
14352   SmallPtrSet<const SDNode *, 32> Visited;
14353   SmallVector<const SDNode *, 16> Worklist;
14354   Worklist.push_back(N);
14355
14356   // If the offset is a constant, there may be other adds of constants that
14357   // can be folded with this one. We should do this to avoid having to keep
14358   // a copy of the original base pointer.
14359   SmallVector<SDNode *, 16> OtherUses;
14360   if (isa<ConstantSDNode>(Offset))
14361     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
14362                               UE = BasePtr.getNode()->use_end();
14363          UI != UE; ++UI) {
14364       SDUse &Use = UI.getUse();
14365       // Skip the use that is Ptr and uses of other results from BasePtr's
14366       // node (important for nodes that return multiple results).
14367       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
14368         continue;
14369
14370       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
14371         continue;
14372
14373       if (Use.getUser()->getOpcode() != ISD::ADD &&
14374           Use.getUser()->getOpcode() != ISD::SUB) {
14375         OtherUses.clear();
14376         break;
14377       }
14378
14379       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
14380       if (!isa<ConstantSDNode>(Op1)) {
14381         OtherUses.clear();
14382         break;
14383       }
14384
14385       // FIXME: In some cases, we can be smarter about this.
14386       if (Op1.getValueType() != Offset.getValueType()) {
14387         OtherUses.clear();
14388         break;
14389       }
14390
14391       OtherUses.push_back(Use.getUser());
14392     }
14393
14394   if (Swapped)
14395     std::swap(BasePtr, Offset);
14396
14397   // Now check for #3 and #4.
14398   bool RealUse = false;
14399
14400   for (SDNode *Use : Ptr.getNode()->uses()) {
14401     if (Use == N)
14402       continue;
14403     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
14404       return false;
14405
14406     // If Ptr may be folded in addressing mode of other use, then it's
14407     // not profitable to do this transformation.
14408     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
14409       RealUse = true;
14410   }
14411
14412   if (!RealUse)
14413     return false;
14414
14415   SDValue Result;
14416   if (!IsMasked) {
14417     if (IsLoad)
14418       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14419     else
14420       Result =
14421           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14422   } else {
14423     if (IsLoad)
14424       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14425                                         Offset, AM);
14426     else
14427       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
14428                                          Offset, AM);
14429   }
14430   ++PreIndexedNodes;
14431   ++NodesCombined;
14432   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
14433              Result.getNode()->dump(&DAG); dbgs() << '\n');
14434   WorklistRemover DeadNodes(*this);
14435   if (IsLoad) {
14436     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14437     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14438   } else {
14439     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14440   }
14441
14442   // Finally, since the node is now dead, remove it from the graph.
14443   deleteAndRecombine(N);
14444
14445   if (Swapped)
14446     std::swap(BasePtr, Offset);
14447
14448   // Replace other uses of BasePtr that can be updated to use Ptr
14449   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
14450     unsigned OffsetIdx = 1;
14451     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
14452       OffsetIdx = 0;
14453     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
14454            BasePtr.getNode() && "Expected BasePtr operand");
14455
14456     // We need to replace ptr0 in the following expression:
14457     //   x0 * offset0 + y0 * ptr0 = t0
14458     // knowing that
14459     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
14460     //
14461     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
14462     // indexed load/store and the expression that needs to be re-written.
14463     //
14464     // Therefore, we have:
14465     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
14466
14467     ConstantSDNode *CN =
14468       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
14469     int X0, X1, Y0, Y1;
14470     const APInt &Offset0 = CN->getAPIntValue();
14471     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
14472
14473     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
14474     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
14475     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
14476     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
14477
14478     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
14479
14480     APInt CNV = Offset0;
14481     if (X0 < 0) CNV = -CNV;
14482     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
14483     else CNV = CNV - Offset1;
14484
14485     SDLoc DL(OtherUses[i]);
14486
14487     // We can now generate the new expression.
14488     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14489     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14490
14491     SDValue NewUse = DAG.getNode(Opcode,
14492                                  DL,
14493                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14494     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14495     deleteAndRecombine(OtherUses[i]);
14496   }
14497
14498   // Replace the uses of Ptr with uses of the updated base value.
14499   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14500   deleteAndRecombine(Ptr.getNode());
14501   AddToWorklist(Result.getNode());
14502
14503   return true;
14504 }
14505
14506 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
14507                                    SDValue &BasePtr, SDValue &Offset,
14508                                    ISD::MemIndexedMode &AM,
14509                                    SelectionDAG &DAG,
14510                                    const TargetLowering &TLI) {
14511   if (PtrUse == N ||
14512       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
14513     return false;
14514
14515   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
14516     return false;
14517
14518   // Don't create a indexed load / store with zero offset.
14519   if (isNullConstant(Offset))
14520     return false;
14521
14522   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14523     return false;
14524
14525   SmallPtrSet<const SDNode *, 32> Visited;
14526   for (SDNode *Use : BasePtr.getNode()->uses()) {
14527     if (Use == Ptr.getNode())
14528       continue;
14529
14530     // No if there's a later user which could perform the index instead.
14531     if (isa<MemSDNode>(Use)) {
14532       bool IsLoad = true;
14533       bool IsMasked = false;
14534       SDValue OtherPtr;
14535       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14536                                    IsMasked, OtherPtr, TLI)) {
14537         SmallVector<const SDNode *, 2> Worklist;
14538         Worklist.push_back(Use);
14539         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
14540           return false;
14541       }
14542     }
14543
14544     // If all the uses are load / store addresses, then don't do the
14545     // transformation.
14546     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
14547       for (SDNode *UseUse : Use->uses())
14548         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
14549           return false;
14550     }
14551   }
14552   return true;
14553 }
14554
14555 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
14556                                          bool &IsMasked, SDValue &Ptr,
14557                                          SDValue &BasePtr, SDValue &Offset,
14558                                          ISD::MemIndexedMode &AM,
14559                                          SelectionDAG &DAG,
14560                                          const TargetLowering &TLI) {
14561   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14562                                 IsMasked, Ptr, TLI) ||
14563       Ptr.getNode()->hasOneUse())
14564     return nullptr;
14565
14566   // Try turning it into a post-indexed load / store except when
14567   // 1) All uses are load / store ops that use it as base ptr (and
14568   //    it may be folded as addressing mmode).
14569   // 2) Op must be independent of N, i.e. Op is neither a predecessor
14570   //    nor a successor of N. Otherwise, if Op is folded that would
14571   //    create a cycle.
14572   for (SDNode *Op : Ptr->uses()) {
14573     // Check for #1.
14574     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
14575       continue;
14576
14577     // Check for #2.
14578     SmallPtrSet<const SDNode *, 32> Visited;
14579     SmallVector<const SDNode *, 8> Worklist;
14580     // Ptr is predecessor to both N and Op.
14581     Visited.insert(Ptr.getNode());
14582     Worklist.push_back(N);
14583     Worklist.push_back(Op);
14584     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
14585         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
14586       return Op;
14587   }
14588   return nullptr;
14589 }
14590
14591 /// Try to combine a load/store with a add/sub of the base pointer node into a
14592 /// post-indexed load/store. The transformation folded the add/subtract into the
14593 /// new indexed load/store effectively and all of its uses are redirected to the
14594 /// new load/store.
14595 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
14596   if (Level < AfterLegalizeDAG)
14597     return false;
14598
14599   bool IsLoad = true;
14600   bool IsMasked = false;
14601   SDValue Ptr;
14602   SDValue BasePtr;
14603   SDValue Offset;
14604   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14605   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
14606                                          Offset, AM, DAG, TLI);
14607   if (!Op)
14608     return false;
14609
14610   SDValue Result;
14611   if (!IsMasked)
14612     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14613                                          Offset, AM)
14614                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
14615                                           BasePtr, Offset, AM);
14616   else
14617     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
14618                                                BasePtr, Offset, AM)
14619                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
14620                                                 BasePtr, Offset, AM);
14621   ++PostIndexedNodes;
14622   ++NodesCombined;
14623   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
14624              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
14625              dbgs() << '\n');
14626   WorklistRemover DeadNodes(*this);
14627   if (IsLoad) {
14628     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14629     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14630   } else {
14631     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14632   }
14633
14634   // Finally, since the node is now dead, remove it from the graph.
14635   deleteAndRecombine(N);
14636
14637   // Replace the uses of Use with uses of the updated base value.
14638   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
14639                                 Result.getValue(IsLoad ? 1 : 0));
14640   deleteAndRecombine(Op);
14641   return true;
14642 }
14643
14644 /// Return the base-pointer arithmetic from an indexed \p LD.
14645 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
14646   ISD::MemIndexedMode AM = LD->getAddressingMode();
14647   assert(AM != ISD::UNINDEXED);
14648   SDValue BP = LD->getOperand(1);
14649   SDValue Inc = LD->getOperand(2);
14650
14651   // Some backends use TargetConstants for load offsets, but don't expect
14652   // TargetConstants in general ADD nodes. We can convert these constants into
14653   // regular Constants (if the constant is not opaque).
14654   assert((Inc.getOpcode() != ISD::TargetConstant ||
14655           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
14656          "Cannot split out indexing using opaque target constants");
14657   if (Inc.getOpcode() == ISD::TargetConstant) {
14658     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
14659     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
14660                           ConstInc->getValueType(0));
14661   }
14662
14663   unsigned Opc =
14664       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
14665   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
14666 }
14667
14668 static inline int numVectorEltsOrZero(EVT T) {
14669   return T.isVector() ? T.getVectorNumElements() : 0;
14670 }
14671
14672 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
14673   Val = ST->getValue();
14674   EVT STType = Val.getValueType();
14675   EVT STMemType = ST->getMemoryVT();
14676   if (STType == STMemType)
14677     return true;
14678   if (isTypeLegal(STMemType))
14679     return false; // fail.
14680   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
14681       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
14682     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
14683     return true;
14684   }
14685   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
14686       STType.isInteger() && STMemType.isInteger()) {
14687     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
14688     return true;
14689   }
14690   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
14691     Val = DAG.getBitcast(STMemType, Val);
14692     return true;
14693   }
14694   return false; // fail.
14695 }
14696
14697 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14698   EVT LDMemType = LD->getMemoryVT();
14699   EVT LDType = LD->getValueType(0);
14700   assert(Val.getValueType() == LDMemType &&
14701          "Attempting to extend value of non-matching type");
14702   if (LDType == LDMemType)
14703     return true;
14704   if (LDMemType.isInteger() && LDType.isInteger()) {
14705     switch (LD->getExtensionType()) {
14706     case ISD::NON_EXTLOAD:
14707       Val = DAG.getBitcast(LDType, Val);
14708       return true;
14709     case ISD::EXTLOAD:
14710       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14711       return true;
14712     case ISD::SEXTLOAD:
14713       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14714       return true;
14715     case ISD::ZEXTLOAD:
14716       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14717       return true;
14718     }
14719   }
14720   return false;
14721 }
14722
14723 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14724   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
14725     return SDValue();
14726   SDValue Chain = LD->getOperand(0);
14727   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14728   // TODO: Relax this restriction for unordered atomics (see D66309)
14729   if (!ST || !ST->isSimple())
14730     return SDValue();
14731
14732   EVT LDType = LD->getValueType(0);
14733   EVT LDMemType = LD->getMemoryVT();
14734   EVT STMemType = ST->getMemoryVT();
14735   EVT STType = ST->getValue().getValueType();
14736
14737   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14738   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14739   int64_t Offset;
14740   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14741     return SDValue();
14742
14743   // Normalize for Endianness. After this Offset=0 will denote that the least
14744   // significant bit in the loaded value maps to the least significant bit in
14745   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14746   // n:th least significant byte of the stored value.
14747   if (DAG.getDataLayout().isBigEndian())
14748     Offset = ((int64_t)STMemType.getStoreSizeInBits() -
14749               (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
14750
14751   // Check that the stored value cover all bits that are loaded.
14752   bool STCoversLD =
14753       (Offset >= 0) &&
14754       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14755
14756   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14757     if (LD->isIndexed()) {
14758       // Cannot handle opaque target constants and we must respect the user's
14759       // request not to split indexes from loads.
14760       if (!canSplitIdx(LD))
14761         return SDValue();
14762       SDValue Idx = SplitIndexingFromLoad(LD);
14763       SDValue Ops[] = {Val, Idx, Chain};
14764       return CombineTo(LD, Ops, 3);
14765     }
14766     return CombineTo(LD, Val, Chain);
14767   };
14768
14769   if (!STCoversLD)
14770     return SDValue();
14771
14772   // Memory as copy space (potentially masked).
14773   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14774     // Simple case: Direct non-truncating forwarding
14775     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14776       return ReplaceLd(LD, ST->getValue(), Chain);
14777     // Can we model the truncate and extension with an and mask?
14778     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14779         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14780       // Mask to size of LDMemType
14781       auto Mask =
14782           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14783                                                STMemType.getSizeInBits()),
14784                           SDLoc(ST), STType);
14785       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14786       return ReplaceLd(LD, Val, Chain);
14787     }
14788   }
14789
14790   // TODO: Deal with nonzero offset.
14791   if (LD->getBasePtr().isUndef() || Offset != 0)
14792     return SDValue();
14793   // Model necessary truncations / extenstions.
14794   SDValue Val;
14795   // Truncate Value To Stored Memory Size.
14796   do {
14797     if (!getTruncatedStoreValue(ST, Val))
14798       continue;
14799     if (!isTypeLegal(LDMemType))
14800       continue;
14801     if (STMemType != LDMemType) {
14802       // TODO: Support vectors? This requires extract_subvector/bitcast.
14803       if (!STMemType.isVector() && !LDMemType.isVector() &&
14804           STMemType.isInteger() && LDMemType.isInteger())
14805         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14806       else
14807         continue;
14808     }
14809     if (!extendLoadedValueToExtension(LD, Val))
14810       continue;
14811     return ReplaceLd(LD, Val, Chain);
14812   } while (false);
14813
14814   // On failure, cleanup dead nodes we may have created.
14815   if (Val->use_empty())
14816     deleteAndRecombine(Val.getNode());
14817   return SDValue();
14818 }
14819
14820 SDValue DAGCombiner::visitLOAD(SDNode *N) {
14821   LoadSDNode *LD  = cast<LoadSDNode>(N);
14822   SDValue Chain = LD->getChain();
14823   SDValue Ptr   = LD->getBasePtr();
14824
14825   // If load is not volatile and there are no uses of the loaded value (and
14826   // the updated indexed value in case of indexed loads), change uses of the
14827   // chain value into uses of the chain input (i.e. delete the dead load).
14828   // TODO: Allow this for unordered atomics (see D66309)
14829   if (LD->isSimple()) {
14830     if (N->getValueType(1) == MVT::Other) {
14831       // Unindexed loads.
14832       if (!N->hasAnyUseOfValue(0)) {
14833         // It's not safe to use the two value CombineTo variant here. e.g.
14834         // v1, chain2 = load chain1, loc
14835         // v2, chain3 = load chain2, loc
14836         // v3         = add v2, c
14837         // Now we replace use of chain2 with chain1.  This makes the second load
14838         // isomorphic to the one we are deleting, and thus makes this load live.
14839         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14840                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14841                    dbgs() << "\n");
14842         WorklistRemover DeadNodes(*this);
14843         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14844         AddUsersToWorklist(Chain.getNode());
14845         if (N->use_empty())
14846           deleteAndRecombine(N);
14847
14848         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14849       }
14850     } else {
14851       // Indexed loads.
14852       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14853
14854       // If this load has an opaque TargetConstant offset, then we cannot split
14855       // the indexing into an add/sub directly (that TargetConstant may not be
14856       // valid for a different type of node, and we cannot convert an opaque
14857       // target constant into a regular constant).
14858       bool CanSplitIdx = canSplitIdx(LD);
14859
14860       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
14861         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14862         SDValue Index;
14863         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
14864           Index = SplitIndexingFromLoad(LD);
14865           // Try to fold the base pointer arithmetic into subsequent loads and
14866           // stores.
14867           AddUsersToWorklist(N);
14868         } else
14869           Index = DAG.getUNDEF(N->getValueType(1));
14870         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14871                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14872                    dbgs() << " and 2 other values\n");
14873         WorklistRemover DeadNodes(*this);
14874         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14875         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14876         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14877         deleteAndRecombine(N);
14878         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14879       }
14880     }
14881   }
14882
14883   // If this load is directly stored, replace the load value with the stored
14884   // value.
14885   if (auto V = ForwardStoreValueToDirectLoad(LD))
14886     return V;
14887
14888   // Try to infer better alignment information than the load already has.
14889   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
14890     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
14891       if (*Alignment > LD->getAlign() &&
14892           isAligned(*Alignment, LD->getSrcValueOffset())) {
14893         SDValue NewLoad = DAG.getExtLoad(
14894             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14895             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
14896             LD->getMemOperand()->getFlags(), LD->getAAInfo());
14897         // NewLoad will always be N as we are only refining the alignment
14898         assert(NewLoad.getNode() == N);
14899         (void)NewLoad;
14900       }
14901     }
14902   }
14903
14904   if (LD->isUnindexed()) {
14905     // Walk up chain skipping non-aliasing memory nodes.
14906     SDValue BetterChain = FindBetterChain(LD, Chain);
14907
14908     // If there is a better chain.
14909     if (Chain != BetterChain) {
14910       SDValue ReplLoad;
14911
14912       // Replace the chain to void dependency.
14913       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14914         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14915                                BetterChain, Ptr, LD->getMemOperand());
14916       } else {
14917         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14918                                   LD->getValueType(0),
14919                                   BetterChain, Ptr, LD->getMemoryVT(),
14920                                   LD->getMemOperand());
14921       }
14922
14923       // Create token factor to keep old chain connected.
14924       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14925                                   MVT::Other, Chain, ReplLoad.getValue(1));
14926
14927       // Replace uses with load result and token factor
14928       return CombineTo(N, ReplLoad.getValue(0), Token);
14929     }
14930   }
14931
14932   // Try transforming N to an indexed load.
14933   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14934     return SDValue(N, 0);
14935
14936   // Try to slice up N to more direct loads if the slices are mapped to
14937   // different register banks or pairing can take place.
14938   if (SliceUpLoad(N))
14939     return SDValue(N, 0);
14940
14941   return SDValue();
14942 }
14943
14944 namespace {
14945
14946 /// Helper structure used to slice a load in smaller loads.
14947 /// Basically a slice is obtained from the following sequence:
14948 /// Origin = load Ty1, Base
14949 /// Shift = srl Ty1 Origin, CstTy Amount
14950 /// Inst = trunc Shift to Ty2
14951 ///
14952 /// Then, it will be rewritten into:
14953 /// Slice = load SliceTy, Base + SliceOffset
14954 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14955 ///
14956 /// SliceTy is deduced from the number of bits that are actually used to
14957 /// build Inst.
14958 struct LoadedSlice {
14959   /// Helper structure used to compute the cost of a slice.
14960   struct Cost {
14961     /// Are we optimizing for code size.
14962     bool ForCodeSize = false;
14963
14964     /// Various cost.
14965     unsigned Loads = 0;
14966     unsigned Truncates = 0;
14967     unsigned CrossRegisterBanksCopies = 0;
14968     unsigned ZExts = 0;
14969     unsigned Shift = 0;
14970
14971     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14972
14973     /// Get the cost of one isolated slice.
14974     Cost(const LoadedSlice &LS, bool ForCodeSize)
14975         : ForCodeSize(ForCodeSize), Loads(1) {
14976       EVT TruncType = LS.Inst->getValueType(0);
14977       EVT LoadedType = LS.getLoadedType();
14978       if (TruncType != LoadedType &&
14979           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14980         ZExts = 1;
14981     }
14982
14983     /// Account for slicing gain in the current cost.
14984     /// Slicing provide a few gains like removing a shift or a
14985     /// truncate. This method allows to grow the cost of the original
14986     /// load with the gain from this slice.
14987     void addSliceGain(const LoadedSlice &LS) {
14988       // Each slice saves a truncate.
14989       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14990       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14991                               LS.Inst->getValueType(0)))
14992         ++Truncates;
14993       // If there is a shift amount, this slice gets rid of it.
14994       if (LS.Shift)
14995         ++Shift;
14996       // If this slice can merge a cross register bank copy, account for it.
14997       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14998         ++CrossRegisterBanksCopies;
14999     }
15000
15001     Cost &operator+=(const Cost &RHS) {
15002       Loads += RHS.Loads;
15003       Truncates += RHS.Truncates;
15004       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15005       ZExts += RHS.ZExts;
15006       Shift += RHS.Shift;
15007       return *this;
15008     }
15009
15010     bool operator==(const Cost &RHS) const {
15011       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15012              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15013              ZExts == RHS.ZExts && Shift == RHS.Shift;
15014     }
15015
15016     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15017
15018     bool operator<(const Cost &RHS) const {
15019       // Assume cross register banks copies are as expensive as loads.
15020       // FIXME: Do we want some more target hooks?
15021       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15022       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15023       // Unless we are optimizing for code size, consider the
15024       // expensive operation first.
15025       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15026         return ExpensiveOpsLHS < ExpensiveOpsRHS;
15027       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15028              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15029     }
15030
15031     bool operator>(const Cost &RHS) const { return RHS < *this; }
15032
15033     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15034
15035     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15036   };
15037
15038   // The last instruction that represent the slice. This should be a
15039   // truncate instruction.
15040   SDNode *Inst;
15041
15042   // The original load instruction.
15043   LoadSDNode *Origin;
15044
15045   // The right shift amount in bits from the original load.
15046   unsigned Shift;
15047
15048   // The DAG from which Origin came from.
15049   // This is used to get some contextual information about legal types, etc.
15050   SelectionDAG *DAG;
15051
15052   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15053               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15054       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15055
15056   /// Get the bits used in a chunk of bits \p BitWidth large.
15057   /// \return Result is \p BitWidth and has used bits set to 1 and
15058   ///         not used bits set to 0.
15059   APInt getUsedBits() const {
15060     // Reproduce the trunc(lshr) sequence:
15061     // - Start from the truncated value.
15062     // - Zero extend to the desired bit width.
15063     // - Shift left.
15064     assert(Origin && "No original load to compare against.");
15065     unsigned BitWidth = Origin->getValueSizeInBits(0);
15066     assert(Inst && "This slice is not bound to an instruction");
15067     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
15068            "Extracted slice is bigger than the whole type!");
15069     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15070     UsedBits.setAllBits();
15071     UsedBits = UsedBits.zext(BitWidth);
15072     UsedBits <<= Shift;
15073     return UsedBits;
15074   }
15075
15076   /// Get the size of the slice to be loaded in bytes.
15077   unsigned getLoadedSize() const {
15078     unsigned SliceSize = getUsedBits().countPopulation();
15079     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
15080     return SliceSize / 8;
15081   }
15082
15083   /// Get the type that will be loaded for this slice.
15084   /// Note: This may not be the final type for the slice.
15085   EVT getLoadedType() const {
15086     assert(DAG && "Missing context");
15087     LLVMContext &Ctxt = *DAG->getContext();
15088     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15089   }
15090
15091   /// Get the alignment of the load used for this slice.
15092   Align getAlign() const {
15093     Align Alignment = Origin->getAlign();
15094     uint64_t Offset = getOffsetFromBase();
15095     if (Offset != 0)
15096       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15097     return Alignment;
15098   }
15099
15100   /// Check if this slice can be rewritten with legal operations.
15101   bool isLegal() const {
15102     // An invalid slice is not legal.
15103     if (!Origin || !Inst || !DAG)
15104       return false;
15105
15106     // Offsets are for indexed load only, we do not handle that.
15107     if (!Origin->getOffset().isUndef())
15108       return false;
15109
15110     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15111
15112     // Check that the type is legal.
15113     EVT SliceType = getLoadedType();
15114     if (!TLI.isTypeLegal(SliceType))
15115       return false;
15116
15117     // Check that the load is legal for this type.
15118     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15119       return false;
15120
15121     // Check that the offset can be computed.
15122     // 1. Check its type.
15123     EVT PtrType = Origin->getBasePtr().getValueType();
15124     if (PtrType == MVT::Untyped || PtrType.isExtended())
15125       return false;
15126
15127     // 2. Check that it fits in the immediate.
15128     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15129       return false;
15130
15131     // 3. Check that the computation is legal.
15132     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15133       return false;
15134
15135     // Check that the zext is legal if it needs one.
15136     EVT TruncateType = Inst->getValueType(0);
15137     if (TruncateType != SliceType &&
15138         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15139       return false;
15140
15141     return true;
15142   }
15143
15144   /// Get the offset in bytes of this slice in the original chunk of
15145   /// bits.
15146   /// \pre DAG != nullptr.
15147   uint64_t getOffsetFromBase() const {
15148     assert(DAG && "Missing context.");
15149     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15150     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
15151     uint64_t Offset = Shift / 8;
15152     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15153     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
15154            "The size of the original loaded type is not a multiple of a"
15155            " byte.");
15156     // If Offset is bigger than TySizeInBytes, it means we are loading all
15157     // zeros. This should have been optimized before in the process.
15158     assert(TySizeInBytes > Offset &&
15159            "Invalid shift amount for given loaded size");
15160     if (IsBigEndian)
15161       Offset = TySizeInBytes - Offset - getLoadedSize();
15162     return Offset;
15163   }
15164
15165   /// Generate the sequence of instructions to load the slice
15166   /// represented by this object and redirect the uses of this slice to
15167   /// this new sequence of instructions.
15168   /// \pre this->Inst && this->Origin are valid Instructions and this
15169   /// object passed the legal check: LoadedSlice::isLegal returned true.
15170   /// \return The last instruction of the sequence used to load the slice.
15171   SDValue loadSlice() const {
15172     assert(Inst && Origin && "Unable to replace a non-existing slice.");
15173     const SDValue &OldBaseAddr = Origin->getBasePtr();
15174     SDValue BaseAddr = OldBaseAddr;
15175     // Get the offset in that chunk of bytes w.r.t. the endianness.
15176     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15177     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
15178     if (Offset) {
15179       // BaseAddr = BaseAddr + Offset.
15180       EVT ArithType = BaseAddr.getValueType();
15181       SDLoc DL(Origin);
15182       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15183                               DAG->getConstant(Offset, DL, ArithType));
15184     }
15185
15186     // Create the type of the loaded slice according to its size.
15187     EVT SliceType = getLoadedType();
15188
15189     // Create the load for the slice.
15190     SDValue LastInst =
15191         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15192                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15193                      Origin->getMemOperand()->getFlags());
15194     // If the final type is not the same as the loaded type, this means that
15195     // we have to pad with zero. Create a zero extend for that.
15196     EVT FinalType = Inst->getValueType(0);
15197     if (SliceType != FinalType)
15198       LastInst =
15199           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15200     return LastInst;
15201   }
15202
15203   /// Check if this slice can be merged with an expensive cross register
15204   /// bank copy. E.g.,
15205   /// i = load i32
15206   /// f = bitcast i32 i to float
15207   bool canMergeExpensiveCrossRegisterBankCopy() const {
15208     if (!Inst || !Inst->hasOneUse())
15209       return false;
15210     SDNode *Use = *Inst->use_begin();
15211     if (Use->getOpcode() != ISD::BITCAST)
15212       return false;
15213     assert(DAG && "Missing context");
15214     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15215     EVT ResVT = Use->getValueType(0);
15216     const TargetRegisterClass *ResRC =
15217         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15218     const TargetRegisterClass *ArgRC =
15219         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15220                            Use->getOperand(0)->isDivergent());
15221     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
15222       return false;
15223
15224     // At this point, we know that we perform a cross-register-bank copy.
15225     // Check if it is expensive.
15226     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15227     // Assume bitcasts are cheap, unless both register classes do not
15228     // explicitly share a common sub class.
15229     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
15230       return false;
15231
15232     // Check if it will be merged with the load.
15233     // 1. Check the alignment constraint.
15234     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15235         ResVT.getTypeForEVT(*DAG->getContext()));
15236
15237     if (RequiredAlignment > getAlign())
15238       return false;
15239
15240     // 2. Check that the load is a legal operation for that type.
15241     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15242       return false;
15243
15244     // 3. Check that we do not have a zext in the way.
15245     if (Inst->getValueType(0) != getLoadedType())
15246       return false;
15247
15248     return true;
15249   }
15250 };
15251
15252 } // end anonymous namespace
15253
15254 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
15255 /// \p UsedBits looks like 0..0 1..1 0..0.
15256 static bool areUsedBitsDense(const APInt &UsedBits) {
15257   // If all the bits are one, this is dense!
15258   if (UsedBits.isAllOnesValue())
15259     return true;
15260
15261   // Get rid of the unused bits on the right.
15262   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
15263   // Get rid of the unused bits on the left.
15264   if (NarrowedUsedBits.countLeadingZeros())
15265     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
15266   // Check that the chunk of bits is completely used.
15267   return NarrowedUsedBits.isAllOnesValue();
15268 }
15269
15270 /// Check whether or not \p First and \p Second are next to each other
15271 /// in memory. This means that there is no hole between the bits loaded
15272 /// by \p First and the bits loaded by \p Second.
15273 static bool areSlicesNextToEachOther(const LoadedSlice &First,
15274                                      const LoadedSlice &Second) {
15275   assert(First.Origin == Second.Origin && First.Origin &&
15276          "Unable to match different memory origins.");
15277   APInt UsedBits = First.getUsedBits();
15278   assert((UsedBits & Second.getUsedBits()) == 0 &&
15279          "Slices are not supposed to overlap.");
15280   UsedBits |= Second.getUsedBits();
15281   return areUsedBitsDense(UsedBits);
15282 }
15283
15284 /// Adjust the \p GlobalLSCost according to the target
15285 /// paring capabilities and the layout of the slices.
15286 /// \pre \p GlobalLSCost should account for at least as many loads as
15287 /// there is in the slices in \p LoadedSlices.
15288 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15289                                  LoadedSlice::Cost &GlobalLSCost) {
15290   unsigned NumberOfSlices = LoadedSlices.size();
15291   // If there is less than 2 elements, no pairing is possible.
15292   if (NumberOfSlices < 2)
15293     return;
15294
15295   // Sort the slices so that elements that are likely to be next to each
15296   // other in memory are next to each other in the list.
15297   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
15298     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
15299     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
15300   });
15301   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
15302   // First (resp. Second) is the first (resp. Second) potentially candidate
15303   // to be placed in a paired load.
15304   const LoadedSlice *First = nullptr;
15305   const LoadedSlice *Second = nullptr;
15306   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
15307                 // Set the beginning of the pair.
15308                                                            First = Second) {
15309     Second = &LoadedSlices[CurrSlice];
15310
15311     // If First is NULL, it means we start a new pair.
15312     // Get to the next slice.
15313     if (!First)
15314       continue;
15315
15316     EVT LoadedType = First->getLoadedType();
15317
15318     // If the types of the slices are different, we cannot pair them.
15319     if (LoadedType != Second->getLoadedType())
15320       continue;
15321
15322     // Check if the target supplies paired loads for this type.
15323     Align RequiredAlignment;
15324     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
15325       // move to the next pair, this type is hopeless.
15326       Second = nullptr;
15327       continue;
15328     }
15329     // Check if we meet the alignment requirement.
15330     if (First->getAlign() < RequiredAlignment)
15331       continue;
15332
15333     // Check that both loads are next to each other in memory.
15334     if (!areSlicesNextToEachOther(*First, *Second))
15335       continue;
15336
15337     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
15338     --GlobalLSCost.Loads;
15339     // Move to the next pair.
15340     Second = nullptr;
15341   }
15342 }
15343
15344 /// Check the profitability of all involved LoadedSlice.
15345 /// Currently, it is considered profitable if there is exactly two
15346 /// involved slices (1) which are (2) next to each other in memory, and
15347 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
15348 ///
15349 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
15350 /// the elements themselves.
15351 ///
15352 /// FIXME: When the cost model will be mature enough, we can relax
15353 /// constraints (1) and (2).
15354 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15355                                 const APInt &UsedBits, bool ForCodeSize) {
15356   unsigned NumberOfSlices = LoadedSlices.size();
15357   if (StressLoadSlicing)
15358     return NumberOfSlices > 1;
15359
15360   // Check (1).
15361   if (NumberOfSlices != 2)
15362     return false;
15363
15364   // Check (2).
15365   if (!areUsedBitsDense(UsedBits))
15366     return false;
15367
15368   // Check (3).
15369   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
15370   // The original code has one big load.
15371   OrigCost.Loads = 1;
15372   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
15373     const LoadedSlice &LS = LoadedSlices[CurrSlice];
15374     // Accumulate the cost of all the slices.
15375     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
15376     GlobalSlicingCost += SliceCost;
15377
15378     // Account as cost in the original configuration the gain obtained
15379     // with the current slices.
15380     OrigCost.addSliceGain(LS);
15381   }
15382
15383   // If the target supports paired load, adjust the cost accordingly.
15384   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
15385   return OrigCost > GlobalSlicingCost;
15386 }
15387
15388 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
15389 /// operations, split it in the various pieces being extracted.
15390 ///
15391 /// This sort of thing is introduced by SROA.
15392 /// This slicing takes care not to insert overlapping loads.
15393 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
15394 bool DAGCombiner::SliceUpLoad(SDNode *N) {
15395   if (Level < AfterLegalizeDAG)
15396     return false;
15397
15398   LoadSDNode *LD = cast<LoadSDNode>(N);
15399   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
15400       !LD->getValueType(0).isInteger())
15401     return false;
15402
15403   // The algorithm to split up a load of a scalable vector into individual
15404   // elements currently requires knowing the length of the loaded type,
15405   // so will need adjusting to work on scalable vectors.
15406   if (LD->getValueType(0).isScalableVector())
15407     return false;
15408
15409   // Keep track of already used bits to detect overlapping values.
15410   // In that case, we will just abort the transformation.
15411   APInt UsedBits(LD->getValueSizeInBits(0), 0);
15412
15413   SmallVector<LoadedSlice, 4> LoadedSlices;
15414
15415   // Check if this load is used as several smaller chunks of bits.
15416   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
15417   // of computation for each trunc.
15418   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
15419        UI != UIEnd; ++UI) {
15420     // Skip the uses of the chain.
15421     if (UI.getUse().getResNo() != 0)
15422       continue;
15423
15424     SDNode *User = *UI;
15425     unsigned Shift = 0;
15426
15427     // Check if this is a trunc(lshr).
15428     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
15429         isa<ConstantSDNode>(User->getOperand(1))) {
15430       Shift = User->getConstantOperandVal(1);
15431       User = *User->use_begin();
15432     }
15433
15434     // At this point, User is a Truncate, iff we encountered, trunc or
15435     // trunc(lshr).
15436     if (User->getOpcode() != ISD::TRUNCATE)
15437       return false;
15438
15439     // The width of the type must be a power of 2 and greater than 8-bits.
15440     // Otherwise the load cannot be represented in LLVM IR.
15441     // Moreover, if we shifted with a non-8-bits multiple, the slice
15442     // will be across several bytes. We do not support that.
15443     unsigned Width = User->getValueSizeInBits(0);
15444     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
15445       return false;
15446
15447     // Build the slice for this chain of computations.
15448     LoadedSlice LS(User, LD, Shift, &DAG);
15449     APInt CurrentUsedBits = LS.getUsedBits();
15450
15451     // Check if this slice overlaps with another.
15452     if ((CurrentUsedBits & UsedBits) != 0)
15453       return false;
15454     // Update the bits used globally.
15455     UsedBits |= CurrentUsedBits;
15456
15457     // Check if the new slice would be legal.
15458     if (!LS.isLegal())
15459       return false;
15460
15461     // Record the slice.
15462     LoadedSlices.push_back(LS);
15463   }
15464
15465   // Abort slicing if it does not seem to be profitable.
15466   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
15467     return false;
15468
15469   ++SlicedLoads;
15470
15471   // Rewrite each chain to use an independent load.
15472   // By construction, each chain can be represented by a unique load.
15473
15474   // Prepare the argument for the new token factor for all the slices.
15475   SmallVector<SDValue, 8> ArgChains;
15476   for (SmallVectorImpl<LoadedSlice>::const_iterator
15477            LSIt = LoadedSlices.begin(),
15478            LSItEnd = LoadedSlices.end();
15479        LSIt != LSItEnd; ++LSIt) {
15480     SDValue SliceInst = LSIt->loadSlice();
15481     CombineTo(LSIt->Inst, SliceInst, true);
15482     if (SliceInst.getOpcode() != ISD::LOAD)
15483       SliceInst = SliceInst.getOperand(0);
15484     assert(SliceInst->getOpcode() == ISD::LOAD &&
15485            "It takes more than a zext to get to the loaded slice!!");
15486     ArgChains.push_back(SliceInst.getValue(1));
15487   }
15488
15489   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
15490                               ArgChains);
15491   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15492   AddToWorklist(Chain.getNode());
15493   return true;
15494 }
15495
15496 /// Check to see if V is (and load (ptr), imm), where the load is having
15497 /// specific bytes cleared out.  If so, return the byte size being masked out
15498 /// and the shift amount.
15499 static std::pair<unsigned, unsigned>
15500 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
15501   std::pair<unsigned, unsigned> Result(0, 0);
15502
15503   // Check for the structure we're looking for.
15504   if (V->getOpcode() != ISD::AND ||
15505       !isa<ConstantSDNode>(V->getOperand(1)) ||
15506       !ISD::isNormalLoad(V->getOperand(0).getNode()))
15507     return Result;
15508
15509   // Check the chain and pointer.
15510   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
15511   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
15512
15513   // This only handles simple types.
15514   if (V.getValueType() != MVT::i16 &&
15515       V.getValueType() != MVT::i32 &&
15516       V.getValueType() != MVT::i64)
15517     return Result;
15518
15519   // Check the constant mask.  Invert it so that the bits being masked out are
15520   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
15521   // follow the sign bit for uniformity.
15522   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
15523   unsigned NotMaskLZ = countLeadingZeros(NotMask);
15524   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
15525   unsigned NotMaskTZ = countTrailingZeros(NotMask);
15526   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
15527   if (NotMaskLZ == 64) return Result;  // All zero mask.
15528
15529   // See if we have a continuous run of bits.  If so, we have 0*1+0*
15530   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
15531     return Result;
15532
15533   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
15534   if (V.getValueType() != MVT::i64 && NotMaskLZ)
15535     NotMaskLZ -= 64-V.getValueSizeInBits();
15536
15537   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
15538   switch (MaskedBytes) {
15539   case 1:
15540   case 2:
15541   case 4: break;
15542   default: return Result; // All one mask, or 5-byte mask.
15543   }
15544
15545   // Verify that the first bit starts at a multiple of mask so that the access
15546   // is aligned the same as the access width.
15547   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
15548
15549   // For narrowing to be valid, it must be the case that the load the
15550   // immediately preceding memory operation before the store.
15551   if (LD == Chain.getNode())
15552     ; // ok.
15553   else if (Chain->getOpcode() == ISD::TokenFactor &&
15554            SDValue(LD, 1).hasOneUse()) {
15555     // LD has only 1 chain use so they are no indirect dependencies.
15556     if (!LD->isOperandOf(Chain.getNode()))
15557       return Result;
15558   } else
15559     return Result; // Fail.
15560
15561   Result.first = MaskedBytes;
15562   Result.second = NotMaskTZ/8;
15563   return Result;
15564 }
15565
15566 /// Check to see if IVal is something that provides a value as specified by
15567 /// MaskInfo. If so, replace the specified store with a narrower store of
15568 /// truncated IVal.
15569 static SDValue
15570 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
15571                                 SDValue IVal, StoreSDNode *St,
15572                                 DAGCombiner *DC) {
15573   unsigned NumBytes = MaskInfo.first;
15574   unsigned ByteShift = MaskInfo.second;
15575   SelectionDAG &DAG = DC->getDAG();
15576
15577   // Check to see if IVal is all zeros in the part being masked in by the 'or'
15578   // that uses this.  If not, this is not a replacement.
15579   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
15580                                   ByteShift*8, (ByteShift+NumBytes)*8);
15581   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
15582
15583   // Check that it is legal on the target to do this.  It is legal if the new
15584   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
15585   // legalization (and the target doesn't explicitly think this is a bad idea).
15586   MVT VT = MVT::getIntegerVT(NumBytes * 8);
15587   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15588   if (!DC->isTypeLegal(VT))
15589     return SDValue();
15590   if (St->getMemOperand() &&
15591       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15592                               *St->getMemOperand()))
15593     return SDValue();
15594
15595   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
15596   // shifted by ByteShift and truncated down to NumBytes.
15597   if (ByteShift) {
15598     SDLoc DL(IVal);
15599     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
15600                        DAG.getConstant(ByteShift*8, DL,
15601                                     DC->getShiftAmountTy(IVal.getValueType())));
15602   }
15603
15604   // Figure out the offset for the store and the alignment of the access.
15605   unsigned StOffset;
15606   unsigned NewAlign = St->getAlignment();
15607
15608   if (DAG.getDataLayout().isLittleEndian())
15609     StOffset = ByteShift;
15610   else
15611     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
15612
15613   SDValue Ptr = St->getBasePtr();
15614   if (StOffset) {
15615     SDLoc DL(IVal);
15616     Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
15617     NewAlign = MinAlign(NewAlign, StOffset);
15618   }
15619
15620   // Truncate down to the new size.
15621   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
15622
15623   ++OpsNarrowed;
15624   return DAG
15625       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
15626                 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
15627 }
15628
15629 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
15630 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
15631 /// narrowing the load and store if it would end up being a win for performance
15632 /// or code size.
15633 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
15634   StoreSDNode *ST  = cast<StoreSDNode>(N);
15635   if (!ST->isSimple())
15636     return SDValue();
15637
15638   SDValue Chain = ST->getChain();
15639   SDValue Value = ST->getValue();
15640   SDValue Ptr   = ST->getBasePtr();
15641   EVT VT = Value.getValueType();
15642
15643   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
15644     return SDValue();
15645
15646   unsigned Opc = Value.getOpcode();
15647
15648   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
15649   // is a byte mask indicating a consecutive number of bytes, check to see if
15650   // Y is known to provide just those bytes.  If so, we try to replace the
15651   // load + replace + store sequence with a single (narrower) store, which makes
15652   // the load dead.
15653   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
15654     std::pair<unsigned, unsigned> MaskedLoad;
15655     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
15656     if (MaskedLoad.first)
15657       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15658                                                   Value.getOperand(1), ST,this))
15659         return NewST;
15660
15661     // Or is commutative, so try swapping X and Y.
15662     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
15663     if (MaskedLoad.first)
15664       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15665                                                   Value.getOperand(0), ST,this))
15666         return NewST;
15667   }
15668
15669   if (!EnableReduceLoadOpStoreWidth)
15670     return SDValue();
15671
15672   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
15673       Value.getOperand(1).getOpcode() != ISD::Constant)
15674     return SDValue();
15675
15676   SDValue N0 = Value.getOperand(0);
15677   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15678       Chain == SDValue(N0.getNode(), 1)) {
15679     LoadSDNode *LD = cast<LoadSDNode>(N0);
15680     if (LD->getBasePtr() != Ptr ||
15681         LD->getPointerInfo().getAddrSpace() !=
15682         ST->getPointerInfo().getAddrSpace())
15683       return SDValue();
15684
15685     // Find the type to narrow it the load / op / store to.
15686     SDValue N1 = Value.getOperand(1);
15687     unsigned BitWidth = N1.getValueSizeInBits();
15688     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
15689     if (Opc == ISD::AND)
15690       Imm ^= APInt::getAllOnesValue(BitWidth);
15691     if (Imm == 0 || Imm.isAllOnesValue())
15692       return SDValue();
15693     unsigned ShAmt = Imm.countTrailingZeros();
15694     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
15695     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
15696     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15697     // The narrowing should be profitable, the load/store operation should be
15698     // legal (or custom) and the store size should be equal to the NewVT width.
15699     while (NewBW < BitWidth &&
15700            (NewVT.getStoreSizeInBits() != NewBW ||
15701             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
15702             !TLI.isNarrowingProfitable(VT, NewVT))) {
15703       NewBW = NextPowerOf2(NewBW);
15704       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15705     }
15706     if (NewBW >= BitWidth)
15707       return SDValue();
15708
15709     // If the lsb changed does not start at the type bitwidth boundary,
15710     // start at the previous one.
15711     if (ShAmt % NewBW)
15712       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15713     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15714                                    std::min(BitWidth, ShAmt + NewBW));
15715     if ((Imm & Mask) == Imm) {
15716       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15717       if (Opc == ISD::AND)
15718         NewImm ^= APInt::getAllOnesValue(NewBW);
15719       uint64_t PtrOff = ShAmt / 8;
15720       // For big endian targets, we need to adjust the offset to the pointer to
15721       // load the correct bytes.
15722       if (DAG.getDataLayout().isBigEndian())
15723         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15724
15725       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
15726       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15727       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
15728         return SDValue();
15729
15730       SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
15731       SDValue NewLD =
15732           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15733                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15734                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
15735       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15736                                    DAG.getConstant(NewImm, SDLoc(Value),
15737                                                    NewVT));
15738       SDValue NewST =
15739           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15740                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15741
15742       AddToWorklist(NewPtr.getNode());
15743       AddToWorklist(NewLD.getNode());
15744       AddToWorklist(NewVal.getNode());
15745       WorklistRemover DeadNodes(*this);
15746       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15747       ++OpsNarrowed;
15748       return NewST;
15749     }
15750   }
15751
15752   return SDValue();
15753 }
15754
15755 /// For a given floating point load / store pair, if the load value isn't used
15756 /// by any other operations, then consider transforming the pair to integer
15757 /// load / store operations if the target deems the transformation profitable.
15758 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15759   StoreSDNode *ST  = cast<StoreSDNode>(N);
15760   SDValue Value = ST->getValue();
15761   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15762       Value.hasOneUse()) {
15763     LoadSDNode *LD = cast<LoadSDNode>(Value);
15764     EVT VT = LD->getMemoryVT();
15765     if (!VT.isFloatingPoint() ||
15766         VT != ST->getMemoryVT() ||
15767         LD->isNonTemporal() ||
15768         ST->isNonTemporal() ||
15769         LD->getPointerInfo().getAddrSpace() != 0 ||
15770         ST->getPointerInfo().getAddrSpace() != 0)
15771       return SDValue();
15772
15773     TypeSize VTSize = VT.getSizeInBits();
15774
15775     // We don't know the size of scalable types at compile time so we cannot
15776     // create an integer of the equivalent size.
15777     if (VTSize.isScalable())
15778       return SDValue();
15779
15780     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
15781     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15782         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15783         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15784         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15785       return SDValue();
15786
15787     Align LDAlign = LD->getAlign();
15788     Align STAlign = ST->getAlign();
15789     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15790     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
15791     if (LDAlign < ABIAlign || STAlign < ABIAlign)
15792       return SDValue();
15793
15794     SDValue NewLD =
15795         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15796                     LD->getPointerInfo(), LDAlign);
15797
15798     SDValue NewST =
15799         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15800                      ST->getPointerInfo(), STAlign);
15801
15802     AddToWorklist(NewLD.getNode());
15803     AddToWorklist(NewST.getNode());
15804     WorklistRemover DeadNodes(*this);
15805     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15806     ++LdStFP2Int;
15807     return NewST;
15808   }
15809
15810   return SDValue();
15811 }
15812
15813 // This is a helper function for visitMUL to check the profitability
15814 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15815 // MulNode is the original multiply, AddNode is (add x, c1),
15816 // and ConstNode is c2.
15817 //
15818 // If the (add x, c1) has multiple uses, we could increase
15819 // the number of adds if we make this transformation.
15820 // It would only be worth doing this if we can remove a
15821 // multiply in the process. Check for that here.
15822 // To illustrate:
15823 //     (A + c1) * c3
15824 //     (A + c2) * c3
15825 // We're checking for cases where we have common "c3 * A" expressions.
15826 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15827                                               SDValue &AddNode,
15828                                               SDValue &ConstNode) {
15829   APInt Val;
15830
15831   // If the add only has one use, this would be OK to do.
15832   if (AddNode.getNode()->hasOneUse())
15833     return true;
15834
15835   // Walk all the users of the constant with which we're multiplying.
15836   for (SDNode *Use : ConstNode->uses()) {
15837     if (Use == MulNode) // This use is the one we're on right now. Skip it.
15838       continue;
15839
15840     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15841       SDNode *OtherOp;
15842       SDNode *MulVar = AddNode.getOperand(0).getNode();
15843
15844       // OtherOp is what we're multiplying against the constant.
15845       if (Use->getOperand(0) == ConstNode)
15846         OtherOp = Use->getOperand(1).getNode();
15847       else
15848         OtherOp = Use->getOperand(0).getNode();
15849
15850       // Check to see if multiply is with the same operand of our "add".
15851       //
15852       //     ConstNode  = CONST
15853       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15854       //     ...
15855       //     AddNode  = (A + c1)  <-- MulVar is A.
15856       //         = AddNode * ConstNode   <-- current visiting instruction.
15857       //
15858       // If we make this transformation, we will have a common
15859       // multiply (ConstNode * A) that we can save.
15860       if (OtherOp == MulVar)
15861         return true;
15862
15863       // Now check to see if a future expansion will give us a common
15864       // multiply.
15865       //
15866       //     ConstNode  = CONST
15867       //     AddNode    = (A + c1)
15868       //     ...   = AddNode * ConstNode <-- current visiting instruction.
15869       //     ...
15870       //     OtherOp = (A + c2)
15871       //     Use     = OtherOp * ConstNode <-- visiting Use.
15872       //
15873       // If we make this transformation, we will have a common
15874       // multiply (CONST * A) after we also do the same transformation
15875       // to the "t2" instruction.
15876       if (OtherOp->getOpcode() == ISD::ADD &&
15877           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15878           OtherOp->getOperand(0).getNode() == MulVar)
15879         return true;
15880     }
15881   }
15882
15883   // Didn't find a case where this would be profitable.
15884   return false;
15885 }
15886
15887 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15888                                          unsigned NumStores) {
15889   SmallVector<SDValue, 8> Chains;
15890   SmallPtrSet<const SDNode *, 8> Visited;
15891   SDLoc StoreDL(StoreNodes[0].MemNode);
15892
15893   for (unsigned i = 0; i < NumStores; ++i) {
15894     Visited.insert(StoreNodes[i].MemNode);
15895   }
15896
15897   // don't include nodes that are children or repeated nodes.
15898   for (unsigned i = 0; i < NumStores; ++i) {
15899     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15900       Chains.push_back(StoreNodes[i].MemNode->getChain());
15901   }
15902
15903   assert(Chains.size() > 0 && "Chain should have generated a chain");
15904   return DAG.getTokenFactor(StoreDL, Chains);
15905 }
15906
15907 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
15908     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15909     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15910   // Make sure we have something to merge.
15911   if (NumStores < 2)
15912     return false;
15913
15914   // The latest Node in the DAG.
15915   SDLoc DL(StoreNodes[0].MemNode);
15916
15917   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
15918   unsigned SizeInBits = NumStores * ElementSizeBits;
15919   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15920
15921   EVT StoreTy;
15922   if (UseVector) {
15923     unsigned Elts = NumStores * NumMemElts;
15924     // Get the type for the merged vector store.
15925     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15926   } else
15927     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15928
15929   SDValue StoredVal;
15930   if (UseVector) {
15931     if (IsConstantSrc) {
15932       SmallVector<SDValue, 8> BuildVector;
15933       for (unsigned I = 0; I != NumStores; ++I) {
15934         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15935         SDValue Val = St->getValue();
15936         // If constant is of the wrong type, convert it now.
15937         if (MemVT != Val.getValueType()) {
15938           Val = peekThroughBitcasts(Val);
15939           // Deal with constants of wrong size.
15940           if (ElementSizeBits != Val.getValueSizeInBits()) {
15941             EVT IntMemVT =
15942                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15943             if (isa<ConstantFPSDNode>(Val)) {
15944               // Not clear how to truncate FP values.
15945               return false;
15946             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15947               Val = DAG.getConstant(C->getAPIntValue()
15948                                         .zextOrTrunc(Val.getValueSizeInBits())
15949                                         .zextOrTrunc(ElementSizeBits),
15950                                     SDLoc(C), IntMemVT);
15951           }
15952           // Make sure correctly size type is the correct type.
15953           Val = DAG.getBitcast(MemVT, Val);
15954         }
15955         BuildVector.push_back(Val);
15956       }
15957       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15958                                                : ISD::BUILD_VECTOR,
15959                               DL, StoreTy, BuildVector);
15960     } else {
15961       SmallVector<SDValue, 8> Ops;
15962       for (unsigned i = 0; i < NumStores; ++i) {
15963         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15964         SDValue Val = peekThroughBitcasts(St->getValue());
15965         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15966         // type MemVT. If the underlying value is not the correct
15967         // type, but it is an extraction of an appropriate vector we
15968         // can recast Val to be of the correct type. This may require
15969         // converting between EXTRACT_VECTOR_ELT and
15970         // EXTRACT_SUBVECTOR.
15971         if ((MemVT != Val.getValueType()) &&
15972             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15973              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15974           EVT MemVTScalarTy = MemVT.getScalarType();
15975           // We may need to add a bitcast here to get types to line up.
15976           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15977             Val = DAG.getBitcast(MemVT, Val);
15978           } else {
15979             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15980                                             : ISD::EXTRACT_VECTOR_ELT;
15981             SDValue Vec = Val.getOperand(0);
15982             SDValue Idx = Val.getOperand(1);
15983             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15984           }
15985         }
15986         Ops.push_back(Val);
15987       }
15988
15989       // Build the extracted vector elements back into a vector.
15990       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15991                                                : ISD::BUILD_VECTOR,
15992                               DL, StoreTy, Ops);
15993     }
15994   } else {
15995     // We should always use a vector store when merging extracted vector
15996     // elements, so this path implies a store of constants.
15997     assert(IsConstantSrc && "Merged vector elements should use vector store");
15998
15999     APInt StoreInt(SizeInBits, 0);
16000
16001     // Construct a single integer constant which is made of the smaller
16002     // constant inputs.
16003     bool IsLE = DAG.getDataLayout().isLittleEndian();
16004     for (unsigned i = 0; i < NumStores; ++i) {
16005       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16006       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16007
16008       SDValue Val = St->getValue();
16009       Val = peekThroughBitcasts(Val);
16010       StoreInt <<= ElementSizeBits;
16011       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16012         StoreInt |= C->getAPIntValue()
16013                         .zextOrTrunc(ElementSizeBits)
16014                         .zextOrTrunc(SizeInBits);
16015       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16016         StoreInt |= C->getValueAPF()
16017                         .bitcastToAPInt()
16018                         .zextOrTrunc(ElementSizeBits)
16019                         .zextOrTrunc(SizeInBits);
16020         // If fp truncation is necessary give up for now.
16021         if (MemVT.getSizeInBits() != ElementSizeBits)
16022           return false;
16023       } else {
16024         llvm_unreachable("Invalid constant element type");
16025       }
16026     }
16027
16028     // Create the new Load and Store operations.
16029     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16030   }
16031
16032   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16033   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16034
16035   // make sure we use trunc store if it's necessary to be legal.
16036   SDValue NewStore;
16037   if (!UseTrunc) {
16038     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16039                             FirstInChain->getPointerInfo(),
16040                             FirstInChain->getAlignment());
16041   } else { // Must be realized as a trunc store
16042     EVT LegalizedStoredValTy =
16043         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16044     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16045     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16046     SDValue ExtendedStoreVal =
16047         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16048                         LegalizedStoredValTy);
16049     NewStore = DAG.getTruncStore(
16050         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16051         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16052         FirstInChain->getAlignment(),
16053         FirstInChain->getMemOperand()->getFlags());
16054   }
16055
16056   // Replace all merged stores with the new store.
16057   for (unsigned i = 0; i < NumStores; ++i)
16058     CombineTo(StoreNodes[i].MemNode, NewStore);
16059
16060   AddToWorklist(NewChain.getNode());
16061   return true;
16062 }
16063
16064 void DAGCombiner::getStoreMergeCandidates(
16065     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16066     SDNode *&RootNode) {
16067   // This holds the base pointer, index, and the offset in bytes from the base
16068   // pointer.
16069   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16070   EVT MemVT = St->getMemoryVT();
16071
16072   SDValue Val = peekThroughBitcasts(St->getValue());
16073   // We must have a base and an offset.
16074   if (!BasePtr.getBase().getNode())
16075     return;
16076
16077   // Do not handle stores to undef base pointers.
16078   if (BasePtr.getBase().isUndef())
16079     return;
16080
16081   StoreSource StoreSrc = getStoreSource(Val);
16082   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
16083   BaseIndexOffset LBasePtr;
16084   // Match on loadbaseptr if relevant.
16085   EVT LoadVT;
16086   if (StoreSrc == StoreSource::Load) {
16087     auto *Ld = cast<LoadSDNode>(Val);
16088     LBasePtr = BaseIndexOffset::match(Ld, DAG);
16089     LoadVT = Ld->getMemoryVT();
16090     // Load and store should be the same type.
16091     if (MemVT != LoadVT)
16092       return;
16093     // Loads must only have one use.
16094     if (!Ld->hasNUsesOfValue(1, 0))
16095       return;
16096     // The memory operands must not be volatile/indexed/atomic.
16097     // TODO: May be able to relax for unordered atomics (see D66309)
16098     if (!Ld->isSimple() || Ld->isIndexed())
16099       return;
16100   }
16101   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16102                             int64_t &Offset) -> bool {
16103     // The memory operands must not be volatile/indexed/atomic.
16104     // TODO: May be able to relax for unordered atomics (see D66309)
16105     if (!Other->isSimple() ||  Other->isIndexed())
16106       return false;
16107     // Don't mix temporal stores with non-temporal stores.
16108     if (St->isNonTemporal() != Other->isNonTemporal())
16109       return false;
16110     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16111     // Allow merging constants of different types as integers.
16112     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16113                                            : Other->getMemoryVT() != MemVT;
16114     if (StoreSrc == StoreSource::Load) {
16115       if (NoTypeMatch)
16116         return false;
16117       // The Load's Base Ptr must also match
16118       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
16119         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16120         if (LoadVT != OtherLd->getMemoryVT())
16121           return false;
16122         // Loads must only have one use.
16123         if (!OtherLd->hasNUsesOfValue(1, 0))
16124           return false;
16125         // The memory operands must not be volatile/indexed/atomic.
16126         // TODO: May be able to relax for unordered atomics (see D66309)
16127         if (!OtherLd->isSimple() ||
16128             OtherLd->isIndexed())
16129           return false;
16130         // Don't mix temporal loads with non-temporal loads.
16131         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16132           return false;
16133         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16134           return false;
16135       } else
16136         return false;
16137     }
16138     if (StoreSrc == StoreSource::Constant) {
16139       if (NoTypeMatch)
16140         return false;
16141       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
16142         return false;
16143     }
16144     if (StoreSrc == StoreSource::Extract) {
16145       // Do not merge truncated stores here.
16146       if (Other->isTruncatingStore())
16147         return false;
16148       if (!MemVT.bitsEq(OtherBC.getValueType()))
16149         return false;
16150       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16151           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16152         return false;
16153     }
16154     Ptr = BaseIndexOffset::match(Other, DAG);
16155     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16156   };
16157
16158   // Check if the pair of StoreNode and the RootNode already bail out many
16159   // times which is over the limit in dependence check.
16160   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16161                                         SDNode *RootNode) -> bool {
16162     auto RootCount = StoreRootCountMap.find(StoreNode);
16163     if (RootCount != StoreRootCountMap.end() &&
16164         RootCount->second.first == RootNode &&
16165         RootCount->second.second > StoreMergeDependenceLimit)
16166       return true;
16167     return false;
16168   };
16169
16170   // We looking for a root node which is an ancestor to all mergable
16171   // stores. We search up through a load, to our root and then down
16172   // through all children. For instance we will find Store{1,2,3} if
16173   // St is Store1, Store2. or Store3 where the root is not a load
16174   // which always true for nonvolatile ops. TODO: Expand
16175   // the search to find all valid candidates through multiple layers of loads.
16176   //
16177   // Root
16178   // |-------|-------|
16179   // Load    Load    Store3
16180   // |       |
16181   // Store1   Store2
16182   //
16183   // FIXME: We should be able to climb and
16184   // descend TokenFactors to find candidates as well.
16185
16186   RootNode = St->getChain().getNode();
16187
16188   unsigned NumNodesExplored = 0;
16189   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16190     RootNode = Ldn->getChain().getNode();
16191     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16192          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16193       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
16194         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
16195           if (I2.getOperandNo() == 0)
16196             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
16197               BaseIndexOffset Ptr;
16198               int64_t PtrDiff;
16199               if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16200                   !OverLimitInDependenceCheck(OtherST, RootNode))
16201                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16202             }
16203   } else
16204     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16205          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16206       if (I.getOperandNo() == 0)
16207         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
16208           BaseIndexOffset Ptr;
16209           int64_t PtrDiff;
16210           if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16211               !OverLimitInDependenceCheck(OtherST, RootNode))
16212             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16213         }
16214 }
16215
16216 // We need to check that merging these stores does not cause a loop in
16217 // the DAG. Any store candidate may depend on another candidate
16218 // indirectly through its operand (we already consider dependencies
16219 // through the chain). Check in parallel by searching up from
16220 // non-chain operands of candidates.
16221 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16222     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16223     SDNode *RootNode) {
16224   // FIXME: We should be able to truncate a full search of
16225   // predecessors by doing a BFS and keeping tabs the originating
16226   // stores from which worklist nodes come from in a similar way to
16227   // TokenFactor simplfication.
16228
16229   SmallPtrSet<const SDNode *, 32> Visited;
16230   SmallVector<const SDNode *, 8> Worklist;
16231
16232   // RootNode is a predecessor to all candidates so we need not search
16233   // past it. Add RootNode (peeking through TokenFactors). Do not count
16234   // these towards size check.
16235
16236   Worklist.push_back(RootNode);
16237   while (!Worklist.empty()) {
16238     auto N = Worklist.pop_back_val();
16239     if (!Visited.insert(N).second)
16240       continue; // Already present in Visited.
16241     if (N->getOpcode() == ISD::TokenFactor) {
16242       for (SDValue Op : N->ops())
16243         Worklist.push_back(Op.getNode());
16244     }
16245   }
16246
16247   // Don't count pruning nodes towards max.
16248   unsigned int Max = 1024 + Visited.size();
16249   // Search Ops of store candidates.
16250   for (unsigned i = 0; i < NumStores; ++i) {
16251     SDNode *N = StoreNodes[i].MemNode;
16252     // Of the 4 Store Operands:
16253     //   * Chain (Op 0) -> We have already considered these
16254     //                    in candidate selection and can be
16255     //                    safely ignored
16256     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16257     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
16258     //                       but aren't necessarily fromt the same base node, so
16259     //                       cycles possible (e.g. via indexed store).
16260     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
16261     //               non-indexed stores). Not constant on all targets (e.g. ARM)
16262     //               and so can participate in a cycle.
16263     for (unsigned j = 1; j < N->getNumOperands(); ++j)
16264       Worklist.push_back(N->getOperand(j).getNode());
16265   }
16266   // Search through DAG. We can stop early if we find a store node.
16267   for (unsigned i = 0; i < NumStores; ++i)
16268     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
16269                                      Max)) {
16270       // If the searching bail out, record the StoreNode and RootNode in the
16271       // StoreRootCountMap. If we have seen the pair many times over a limit,
16272       // we won't add the StoreNode into StoreNodes set again.
16273       if (Visited.size() >= Max) {
16274         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
16275         if (RootCount.first == RootNode)
16276           RootCount.second++;
16277         else
16278           RootCount = {RootNode, 1};
16279       }
16280       return false;
16281     }
16282   return true;
16283 }
16284
16285 unsigned
16286 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
16287                                   int64_t ElementSizeBytes) const {
16288   while (true) {
16289     // Find a store past the width of the first store.
16290     size_t StartIdx = 0;
16291     while ((StartIdx + 1 < StoreNodes.size()) &&
16292            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
16293               StoreNodes[StartIdx + 1].OffsetFromBase)
16294       ++StartIdx;
16295
16296     // Bail if we don't have enough candidates to merge.
16297     if (StartIdx + 1 >= StoreNodes.size())
16298       return 0;
16299
16300     // Trim stores that overlapped with the first store.
16301     if (StartIdx)
16302       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
16303
16304     // Scan the memory operations on the chain and find the first
16305     // non-consecutive store memory address.
16306     unsigned NumConsecutiveStores = 1;
16307     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16308     // Check that the addresses are consecutive starting from the second
16309     // element in the list of stores.
16310     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
16311       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
16312       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16313         break;
16314       NumConsecutiveStores = i + 1;
16315     }
16316     if (NumConsecutiveStores > 1)
16317       return NumConsecutiveStores;
16318
16319     // There are no consecutive stores at the start of the list.
16320     // Remove the first store and try again.
16321     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
16322   }
16323 }
16324
16325 bool DAGCombiner::tryStoreMergeOfConstants(
16326     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16327     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
16328   LLVMContext &Context = *DAG.getContext();
16329   const DataLayout &DL = DAG.getDataLayout();
16330   int64_t ElementSizeBytes = MemVT.getStoreSize();
16331   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16332   bool MadeChange = false;
16333
16334   // Store the constants into memory as one consecutive store.
16335   while (NumConsecutiveStores >= 2) {
16336     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16337     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16338     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16339     unsigned LastLegalType = 1;
16340     unsigned LastLegalVectorType = 1;
16341     bool LastIntegerTrunc = false;
16342     bool NonZero = false;
16343     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
16344     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16345       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
16346       SDValue StoredVal = ST->getValue();
16347       bool IsElementZero = false;
16348       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
16349         IsElementZero = C->isNullValue();
16350       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
16351         IsElementZero = C->getConstantFPValue()->isNullValue();
16352       if (IsElementZero) {
16353         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
16354           FirstZeroAfterNonZero = i;
16355       }
16356       NonZero |= !IsElementZero;
16357
16358       // Find a legal type for the constant store.
16359       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16360       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16361       bool IsFast = false;
16362
16363       // Break early when size is too large to be legal.
16364       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16365         break;
16366
16367       if (TLI.isTypeLegal(StoreTy) &&
16368           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16369           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16370                                  *FirstInChain->getMemOperand(), &IsFast) &&
16371           IsFast) {
16372         LastIntegerTrunc = false;
16373         LastLegalType = i + 1;
16374         // Or check whether a truncstore is legal.
16375       } else if (TLI.getTypeAction(Context, StoreTy) ==
16376                  TargetLowering::TypePromoteInteger) {
16377         EVT LegalizedStoredValTy =
16378             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
16379         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16380             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16381             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16382                                    *FirstInChain->getMemOperand(), &IsFast) &&
16383             IsFast) {
16384           LastIntegerTrunc = true;
16385           LastLegalType = i + 1;
16386         }
16387       }
16388
16389       // We only use vectors if the constant is known to be zero or the
16390       // target allows it and the function is not marked with the
16391       // noimplicitfloat attribute.
16392       if ((!NonZero ||
16393            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
16394           AllowVectors) {
16395         // Find a legal type for the vector store.
16396         unsigned Elts = (i + 1) * NumMemElts;
16397         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16398         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
16399             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16400             TLI.allowsMemoryAccess(Context, DL, Ty,
16401                                    *FirstInChain->getMemOperand(), &IsFast) &&
16402             IsFast)
16403           LastLegalVectorType = i + 1;
16404       }
16405     }
16406
16407     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
16408     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
16409
16410     // Check if we found a legal integer type that creates a meaningful
16411     // merge.
16412     if (NumElem < 2) {
16413       // We know that candidate stores are in order and of correct
16414       // shape. While there is no mergeable sequence from the
16415       // beginning one may start later in the sequence. The only
16416       // reason a merge of size N could have failed where another of
16417       // the same size would not have, is if the alignment has
16418       // improved or we've dropped a non-zero value. Drop as many
16419       // candidates as we can here.
16420       unsigned NumSkip = 1;
16421       while ((NumSkip < NumConsecutiveStores) &&
16422              (NumSkip < FirstZeroAfterNonZero) &&
16423              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16424         NumSkip++;
16425
16426       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16427       NumConsecutiveStores -= NumSkip;
16428       continue;
16429     }
16430
16431     // Check that we can merge these candidates without causing a cycle.
16432     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16433                                                   RootNode)) {
16434       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16435       NumConsecutiveStores -= NumElem;
16436       continue;
16437     }
16438
16439     MadeChange |= mergeStoresOfConstantsOrVecElts(
16440         StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
16441
16442     // Remove merged stores for next iteration.
16443     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16444     NumConsecutiveStores -= NumElem;
16445   }
16446   return MadeChange;
16447 }
16448
16449 bool DAGCombiner::tryStoreMergeOfExtracts(
16450     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16451     EVT MemVT, SDNode *RootNode) {
16452   LLVMContext &Context = *DAG.getContext();
16453   const DataLayout &DL = DAG.getDataLayout();
16454   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16455   bool MadeChange = false;
16456
16457   // Loop on Consecutive Stores on success.
16458   while (NumConsecutiveStores >= 2) {
16459     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16460     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16461     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16462     unsigned NumStoresToMerge = 1;
16463     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16464       // Find a legal type for the vector store.
16465       unsigned Elts = (i + 1) * NumMemElts;
16466       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16467       bool IsFast = false;
16468
16469       // Break early when size is too large to be legal.
16470       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16471         break;
16472
16473       if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16474           TLI.allowsMemoryAccess(Context, DL, Ty,
16475                                  *FirstInChain->getMemOperand(), &IsFast) &&
16476           IsFast)
16477         NumStoresToMerge = i + 1;
16478     }
16479
16480     // Check if we found a legal integer type creating a meaningful
16481     // merge.
16482     if (NumStoresToMerge < 2) {
16483       // We know that candidate stores are in order and of correct
16484       // shape. While there is no mergeable sequence from the
16485       // beginning one may start later in the sequence. The only
16486       // reason a merge of size N could have failed where another of
16487       // the same size would not have, is if the alignment has
16488       // improved. Drop as many candidates as we can here.
16489       unsigned NumSkip = 1;
16490       while ((NumSkip < NumConsecutiveStores) &&
16491              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16492         NumSkip++;
16493
16494       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16495       NumConsecutiveStores -= NumSkip;
16496       continue;
16497     }
16498
16499     // Check that we can merge these candidates without causing a cycle.
16500     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
16501                                                   RootNode)) {
16502       StoreNodes.erase(StoreNodes.begin(),
16503                        StoreNodes.begin() + NumStoresToMerge);
16504       NumConsecutiveStores -= NumStoresToMerge;
16505       continue;
16506     }
16507
16508     MadeChange |= mergeStoresOfConstantsOrVecElts(
16509         StoreNodes, MemVT, NumStoresToMerge, false, true, false);
16510
16511     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
16512     NumConsecutiveStores -= NumStoresToMerge;
16513   }
16514   return MadeChange;
16515 }
16516
16517 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
16518                                        unsigned NumConsecutiveStores, EVT MemVT,
16519                                        SDNode *RootNode, bool AllowVectors,
16520                                        bool IsNonTemporalStore,
16521                                        bool IsNonTemporalLoad) {
16522   LLVMContext &Context = *DAG.getContext();
16523   const DataLayout &DL = DAG.getDataLayout();
16524   int64_t ElementSizeBytes = MemVT.getStoreSize();
16525   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16526   bool MadeChange = false;
16527
16528   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16529
16530   // Look for load nodes which are used by the stored values.
16531   SmallVector<MemOpLink, 8> LoadNodes;
16532
16533   // Find acceptable loads. Loads need to have the same chain (token factor),
16534   // must not be zext, volatile, indexed, and they must be consecutive.
16535   BaseIndexOffset LdBasePtr;
16536
16537   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16538     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16539     SDValue Val = peekThroughBitcasts(St->getValue());
16540     LoadSDNode *Ld = cast<LoadSDNode>(Val);
16541
16542     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
16543     // If this is not the first ptr that we check.
16544     int64_t LdOffset = 0;
16545     if (LdBasePtr.getBase().getNode()) {
16546       // The base ptr must be the same.
16547       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
16548         break;
16549     } else {
16550       // Check that all other base pointers are the same as this one.
16551       LdBasePtr = LdPtr;
16552     }
16553
16554     // We found a potential memory operand to merge.
16555     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
16556   }
16557
16558   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
16559     Align RequiredAlignment;
16560     bool NeedRotate = false;
16561     if (LoadNodes.size() == 2) {
16562       // If we have load/store pair instructions and we only have two values,
16563       // don't bother merging.
16564       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
16565           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
16566         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
16567         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
16568         break;
16569       }
16570       // If the loads are reversed, see if we can rotate the halves into place.
16571       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
16572       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
16573       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
16574       if (Offset0 - Offset1 == ElementSizeBytes &&
16575           (hasOperation(ISD::ROTL, PairVT) ||
16576            hasOperation(ISD::ROTR, PairVT))) {
16577         std::swap(LoadNodes[0], LoadNodes[1]);
16578         NeedRotate = true;
16579       }
16580     }
16581     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16582     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16583     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16584     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
16585
16586     // Scan the memory operations on the chain and find the first
16587     // non-consecutive load memory address. These variables hold the index in
16588     // the store node array.
16589
16590     unsigned LastConsecutiveLoad = 1;
16591
16592     // This variable refers to the size and not index in the array.
16593     unsigned LastLegalVectorType = 1;
16594     unsigned LastLegalIntegerType = 1;
16595     bool isDereferenceable = true;
16596     bool DoIntegerTruncate = false;
16597     StartAddress = LoadNodes[0].OffsetFromBase;
16598     SDValue LoadChain = FirstLoad->getChain();
16599     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
16600       // All loads must share the same chain.
16601       if (LoadNodes[i].MemNode->getChain() != LoadChain)
16602         break;
16603
16604       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
16605       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16606         break;
16607       LastConsecutiveLoad = i;
16608
16609       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
16610         isDereferenceable = false;
16611
16612       // Find a legal type for the vector store.
16613       unsigned Elts = (i + 1) * NumMemElts;
16614       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16615
16616       // Break early when size is too large to be legal.
16617       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16618         break;
16619
16620       bool IsFastSt = false;
16621       bool IsFastLd = false;
16622       if (TLI.isTypeLegal(StoreTy) &&
16623           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16624           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16625                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16626           IsFastSt &&
16627           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16628                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16629           IsFastLd) {
16630         LastLegalVectorType = i + 1;
16631       }
16632
16633       // Find a legal type for the integer store.
16634       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16635       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16636       if (TLI.isTypeLegal(StoreTy) &&
16637           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16638           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16639                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
16640           IsFastSt &&
16641           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16642                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
16643           IsFastLd) {
16644         LastLegalIntegerType = i + 1;
16645         DoIntegerTruncate = false;
16646         // Or check whether a truncstore and extload is legal.
16647       } else if (TLI.getTypeAction(Context, StoreTy) ==
16648                  TargetLowering::TypePromoteInteger) {
16649         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
16650         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16651             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16652             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16653             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16654             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
16655             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16656                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
16657             IsFastSt &&
16658             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16659                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
16660             IsFastLd) {
16661           LastLegalIntegerType = i + 1;
16662           DoIntegerTruncate = true;
16663         }
16664       }
16665     }
16666
16667     // Only use vector types if the vector type is larger than the integer
16668     // type. If they are the same, use integers.
16669     bool UseVectorTy =
16670         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
16671     unsigned LastLegalType =
16672         std::max(LastLegalVectorType, LastLegalIntegerType);
16673
16674     // We add +1 here because the LastXXX variables refer to location while
16675     // the NumElem refers to array/index size.
16676     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
16677     NumElem = std::min(LastLegalType, NumElem);
16678     unsigned FirstLoadAlign = FirstLoad->getAlignment();
16679
16680     if (NumElem < 2) {
16681       // We know that candidate stores are in order and of correct
16682       // shape. While there is no mergeable sequence from the
16683       // beginning one may start later in the sequence. The only
16684       // reason a merge of size N could have failed where another of
16685       // the same size would not have is if the alignment or either
16686       // the load or store has improved. Drop as many candidates as we
16687       // can here.
16688       unsigned NumSkip = 1;
16689       while ((NumSkip < LoadNodes.size()) &&
16690              (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
16691              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16692         NumSkip++;
16693       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16694       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16695       NumConsecutiveStores -= NumSkip;
16696       continue;
16697     }
16698
16699     // Check that we can merge these candidates without causing a cycle.
16700     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16701                                                   RootNode)) {
16702       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16703       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16704       NumConsecutiveStores -= NumElem;
16705       continue;
16706     }
16707
16708     // Find if it is better to use vectors or integers to load and store
16709     // to memory.
16710     EVT JointMemOpVT;
16711     if (UseVectorTy) {
16712       // Find a legal type for the vector store.
16713       unsigned Elts = NumElem * NumMemElts;
16714       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16715     } else {
16716       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16717       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16718     }
16719
16720     SDLoc LoadDL(LoadNodes[0].MemNode);
16721     SDLoc StoreDL(StoreNodes[0].MemNode);
16722
16723     // The merged loads are required to have the same incoming chain, so
16724     // using the first's chain is acceptable.
16725
16726     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16727     AddToWorklist(NewStoreChain.getNode());
16728
16729     MachineMemOperand::Flags LdMMOFlags =
16730         isDereferenceable ? MachineMemOperand::MODereferenceable
16731                           : MachineMemOperand::MONone;
16732     if (IsNonTemporalLoad)
16733       LdMMOFlags |= MachineMemOperand::MONonTemporal;
16734
16735     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
16736                                               ? MachineMemOperand::MONonTemporal
16737                                               : MachineMemOperand::MONone;
16738
16739     SDValue NewLoad, NewStore;
16740     if (UseVectorTy || !DoIntegerTruncate) {
16741       NewLoad = DAG.getLoad(
16742           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
16743           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
16744       SDValue StoreOp = NewLoad;
16745       if (NeedRotate) {
16746         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
16747         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
16748                "Unexpected type for rotate-able load pair");
16749         SDValue RotAmt =
16750             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
16751         // Target can convert to the identical ROTR if it does not have ROTL.
16752         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
16753       }
16754       NewStore = DAG.getStore(
16755           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
16756           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16757     } else { // This must be the truncstore/extload case
16758       EVT ExtendedTy =
16759           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16760       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16761                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
16762                                FirstLoad->getPointerInfo(), JointMemOpVT,
16763                                FirstLoadAlign, LdMMOFlags);
16764       NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16765                                    FirstInChain->getBasePtr(),
16766                                    FirstInChain->getPointerInfo(), JointMemOpVT,
16767                                    FirstInChain->getAlignment(),
16768                                    FirstInChain->getMemOperand()->getFlags());
16769     }
16770
16771     // Transfer chain users from old loads to the new load.
16772     for (unsigned i = 0; i < NumElem; ++i) {
16773       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16774       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16775                                     SDValue(NewLoad.getNode(), 1));
16776     }
16777
16778     // Replace all stores with the new store. Recursively remove corresponding
16779     // values if they are no longer used.
16780     for (unsigned i = 0; i < NumElem; ++i) {
16781       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16782       CombineTo(StoreNodes[i].MemNode, NewStore);
16783       if (Val.getNode()->use_empty())
16784         recursivelyDeleteUnusedNodes(Val.getNode());
16785     }
16786
16787     MadeChange = true;
16788     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16789     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16790     NumConsecutiveStores -= NumElem;
16791   }
16792   return MadeChange;
16793 }
16794
16795 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
16796   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
16797     return false;
16798
16799   // TODO: Extend this function to merge stores of scalable vectors.
16800   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
16801   // store since we know <vscale x 16 x i8> is exactly twice as large as
16802   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
16803   EVT MemVT = St->getMemoryVT();
16804   if (MemVT.isScalableVector())
16805     return false;
16806   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
16807     return false;
16808
16809   // This function cannot currently deal with non-byte-sized memory sizes.
16810   int64_t ElementSizeBytes = MemVT.getStoreSize();
16811   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
16812     return false;
16813
16814   // Do not bother looking at stored values that are not constants, loads, or
16815   // extracted vector elements.
16816   SDValue StoredVal = peekThroughBitcasts(St->getValue());
16817   const StoreSource StoreSrc = getStoreSource(StoredVal);
16818   if (StoreSrc == StoreSource::Unknown)
16819     return false;
16820
16821   SmallVector<MemOpLink, 8> StoreNodes;
16822   SDNode *RootNode;
16823   // Find potential store merge candidates by searching through chain sub-DAG
16824   getStoreMergeCandidates(St, StoreNodes, RootNode);
16825
16826   // Check if there is anything to merge.
16827   if (StoreNodes.size() < 2)
16828     return false;
16829
16830   // Sort the memory operands according to their distance from the
16831   // base pointer.
16832   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
16833     return LHS.OffsetFromBase < RHS.OffsetFromBase;
16834   });
16835
16836   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
16837       Attribute::NoImplicitFloat);
16838   bool IsNonTemporalStore = St->isNonTemporal();
16839   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
16840                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
16841
16842   // Store Merge attempts to merge the lowest stores. This generally
16843   // works out as if successful, as the remaining stores are checked
16844   // after the first collection of stores is merged. However, in the
16845   // case that a non-mergeable store is found first, e.g., {p[-2],
16846   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
16847   // mergeable cases. To prevent this, we prune such stores from the
16848   // front of StoreNodes here.
16849   bool MadeChange = false;
16850   while (StoreNodes.size() > 1) {
16851     unsigned NumConsecutiveStores =
16852         getConsecutiveStores(StoreNodes, ElementSizeBytes);
16853     // There are no more stores in the list to examine.
16854     if (NumConsecutiveStores == 0)
16855       return MadeChange;
16856
16857     // We have at least 2 consecutive stores. Try to merge them.
16858     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
16859     switch (StoreSrc) {
16860     case StoreSource::Constant:
16861       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
16862                                              MemVT, RootNode, AllowVectors);
16863       break;
16864
16865     case StoreSource::Extract:
16866       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
16867                                             MemVT, RootNode);
16868       break;
16869
16870     case StoreSource::Load:
16871       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
16872                                          MemVT, RootNode, AllowVectors,
16873                                          IsNonTemporalStore, IsNonTemporalLoad);
16874       break;
16875
16876     default:
16877       llvm_unreachable("Unhandled store source type");
16878     }
16879   }
16880   return MadeChange;
16881 }
16882
16883 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16884   SDLoc SL(ST);
16885   SDValue ReplStore;
16886
16887   // Replace the chain to avoid dependency.
16888   if (ST->isTruncatingStore()) {
16889     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16890                                   ST->getBasePtr(), ST->getMemoryVT(),
16891                                   ST->getMemOperand());
16892   } else {
16893     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16894                              ST->getMemOperand());
16895   }
16896
16897   // Create token to keep both nodes around.
16898   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16899                               MVT::Other, ST->getChain(), ReplStore);
16900
16901   // Make sure the new and old chains are cleaned up.
16902   AddToWorklist(Token.getNode());
16903
16904   // Don't add users to work list.
16905   return CombineTo(ST, Token, false);
16906 }
16907
16908 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16909   SDValue Value = ST->getValue();
16910   if (Value.getOpcode() == ISD::TargetConstantFP)
16911     return SDValue();
16912
16913   if (!ISD::isNormalStore(ST))
16914     return SDValue();
16915
16916   SDLoc DL(ST);
16917
16918   SDValue Chain = ST->getChain();
16919   SDValue Ptr = ST->getBasePtr();
16920
16921   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16922
16923   // NOTE: If the original store is volatile, this transform must not increase
16924   // the number of stores.  For example, on x86-32 an f64 can be stored in one
16925   // processor operation but an i64 (which is not legal) requires two.  So the
16926   // transform should not be done in this case.
16927
16928   SDValue Tmp;
16929   switch (CFP->getSimpleValueType(0).SimpleTy) {
16930   default:
16931     llvm_unreachable("Unknown FP type");
16932   case MVT::f16:    // We don't do this for these yet.
16933   case MVT::f80:
16934   case MVT::f128:
16935   case MVT::ppcf128:
16936     return SDValue();
16937   case MVT::f32:
16938     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
16939         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16940       ;
16941       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16942                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16943                             MVT::i32);
16944       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16945     }
16946
16947     return SDValue();
16948   case MVT::f64:
16949     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16950          ST->isSimple()) ||
16951         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16952       ;
16953       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16954                             getZExtValue(), SDLoc(CFP), MVT::i64);
16955       return DAG.getStore(Chain, DL, Tmp,
16956                           Ptr, ST->getMemOperand());
16957     }
16958
16959     if (ST->isSimple() &&
16960         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16961       // Many FP stores are not made apparent until after legalize, e.g. for
16962       // argument passing.  Since this is so common, custom legalize the
16963       // 64-bit integer store into two 32-bit stores.
16964       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16965       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16966       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16967       if (DAG.getDataLayout().isBigEndian())
16968         std::swap(Lo, Hi);
16969
16970       unsigned Alignment = ST->getAlignment();
16971       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16972       AAMDNodes AAInfo = ST->getAAInfo();
16973
16974       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16975                                  ST->getAlignment(), MMOFlags, AAInfo);
16976       Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
16977       Alignment = MinAlign(Alignment, 4U);
16978       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16979                                  ST->getPointerInfo().getWithOffset(4),
16980                                  Alignment, MMOFlags, AAInfo);
16981       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16982                          St0, St1);
16983     }
16984
16985     return SDValue();
16986   }
16987 }
16988
16989 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16990   StoreSDNode *ST  = cast<StoreSDNode>(N);
16991   SDValue Chain = ST->getChain();
16992   SDValue Value = ST->getValue();
16993   SDValue Ptr   = ST->getBasePtr();
16994
16995   // If this is a store of a bit convert, store the input value if the
16996   // resultant store does not need a higher alignment than the original.
16997   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16998       ST->isUnindexed()) {
16999     EVT SVT = Value.getOperand(0).getValueType();
17000     // If the store is volatile, we only want to change the store type if the
17001     // resulting store is legal. Otherwise we might increase the number of
17002     // memory accesses. We don't care if the original type was legal or not
17003     // as we assume software couldn't rely on the number of accesses of an
17004     // illegal type.
17005     // TODO: May be able to relax for unordered atomics (see D66309)
17006     if (((!LegalOperations && ST->isSimple()) ||
17007          TLI.isOperationLegal(ISD::STORE, SVT)) &&
17008         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17009                                      DAG, *ST->getMemOperand())) {
17010       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17011                           ST->getMemOperand());
17012     }
17013   }
17014
17015   // Turn 'store undef, Ptr' -> nothing.
17016   if (Value.isUndef() && ST->isUnindexed())
17017     return Chain;
17018
17019   // Try to infer better alignment information than the store already has.
17020   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17021     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17022       if (*Alignment > ST->getAlign() &&
17023           isAligned(*Alignment, ST->getSrcValueOffset())) {
17024         SDValue NewStore =
17025             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17026                               ST->getMemoryVT(), *Alignment,
17027                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
17028         // NewStore will always be N as we are only refining the alignment
17029         assert(NewStore.getNode() == N);
17030         (void)NewStore;
17031       }
17032     }
17033   }
17034
17035   // Try transforming a pair floating point load / store ops to integer
17036   // load / store ops.
17037   if (SDValue NewST = TransformFPLoadStorePair(N))
17038     return NewST;
17039
17040   // Try transforming several stores into STORE (BSWAP).
17041   if (SDValue Store = MatchStoreCombine(ST))
17042     return Store;
17043
17044   if (ST->isUnindexed()) {
17045     // Walk up chain skipping non-aliasing memory nodes, on this store and any
17046     // adjacent stores.
17047     if (findBetterNeighborChains(ST)) {
17048       // replaceStoreChain uses CombineTo, which handled all of the worklist
17049       // manipulation. Return the original node to not do anything else.
17050       return SDValue(ST, 0);
17051     }
17052     Chain = ST->getChain();
17053   }
17054
17055   // FIXME: is there such a thing as a truncating indexed store?
17056   if (ST->isTruncatingStore() && ST->isUnindexed() &&
17057       Value.getValueType().isInteger() &&
17058       (!isa<ConstantSDNode>(Value) ||
17059        !cast<ConstantSDNode>(Value)->isOpaque())) {
17060     APInt TruncDemandedBits =
17061         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17062                              ST->getMemoryVT().getScalarSizeInBits());
17063
17064     // See if we can simplify the input to this truncstore with knowledge that
17065     // only the low bits are being used.  For example:
17066     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
17067     AddToWorklist(Value.getNode());
17068     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17069       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17070                                ST->getMemOperand());
17071
17072     // Otherwise, see if we can simplify the operation with
17073     // SimplifyDemandedBits, which only works if the value has a single use.
17074     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17075       // Re-visit the store if anything changed and the store hasn't been merged
17076       // with another node (N is deleted) SimplifyDemandedBits will add Value's
17077       // node back to the worklist if necessary, but we also need to re-visit
17078       // the Store node itself.
17079       if (N->getOpcode() != ISD::DELETED_NODE)
17080         AddToWorklist(N);
17081       return SDValue(N, 0);
17082     }
17083   }
17084
17085   // If this is a load followed by a store to the same location, then the store
17086   // is dead/noop.
17087   // TODO: Can relax for unordered atomics (see D66309)
17088   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17089     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17090         ST->isUnindexed() && ST->isSimple() &&
17091         // There can't be any side effects between the load and store, such as
17092         // a call or store.
17093         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17094       // The store is dead, remove it.
17095       return Chain;
17096     }
17097   }
17098
17099   // TODO: Can relax for unordered atomics (see D66309)
17100   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17101     if (ST->isUnindexed() && ST->isSimple() &&
17102         ST1->isUnindexed() && ST1->isSimple()) {
17103       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17104           ST->getMemoryVT() == ST1->getMemoryVT()) {
17105         // If this is a store followed by a store with the same value to the
17106         // same location, then the store is dead/noop.
17107         return Chain;
17108       }
17109
17110       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17111           !ST1->getBasePtr().isUndef() &&
17112           // BaseIndexOffset and the code below requires knowing the size
17113           // of a vector, so bail out if MemoryVT is scalable.
17114           !ST1->getMemoryVT().isScalableVector()) {
17115         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17116         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17117         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
17118         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
17119         // If this is a store who's preceding store to a subset of the current
17120         // location and no one other node is chained to that store we can
17121         // effectively drop the store. Do not remove stores to undef as they may
17122         // be used as data sinks.
17123         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17124           CombineTo(ST1, ST1->getChain());
17125           return SDValue();
17126         }
17127       }
17128     }
17129   }
17130
17131   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17132   // truncating store.  We can do this even if this is already a truncstore.
17133   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17134       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17135       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17136                             ST->getMemoryVT())) {
17137     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17138                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
17139   }
17140
17141   // Always perform this optimization before types are legal. If the target
17142   // prefers, also try this after legalization to catch stores that were created
17143   // by intrinsics or other nodes.
17144   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17145     while (true) {
17146       // There can be multiple store sequences on the same chain.
17147       // Keep trying to merge store sequences until we are unable to do so
17148       // or until we merge the last store on the chain.
17149       bool Changed = mergeConsecutiveStores(ST);
17150       if (!Changed) break;
17151       // Return N as merge only uses CombineTo and no worklist clean
17152       // up is necessary.
17153       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17154         return SDValue(N, 0);
17155     }
17156   }
17157
17158   // Try transforming N to an indexed store.
17159   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17160     return SDValue(N, 0);
17161
17162   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17163   //
17164   // Make sure to do this only after attempting to merge stores in order to
17165   //  avoid changing the types of some subset of stores due to visit order,
17166   //  preventing their merging.
17167   if (isa<ConstantFPSDNode>(ST->getValue())) {
17168     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17169       return NewSt;
17170   }
17171
17172   if (SDValue NewSt = splitMergedValStore(ST))
17173     return NewSt;
17174
17175   return ReduceLoadOpStoreWidth(N);
17176 }
17177
17178 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17179   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17180   if (!LifetimeEnd->hasOffset())
17181     return SDValue();
17182
17183   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17184                                         LifetimeEnd->getOffset(), false);
17185
17186   // We walk up the chains to find stores.
17187   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17188   while (!Chains.empty()) {
17189     SDValue Chain = Chains.back();
17190     Chains.pop_back();
17191     if (!Chain.hasOneUse())
17192       continue;
17193     switch (Chain.getOpcode()) {
17194     case ISD::TokenFactor:
17195       for (unsigned Nops = Chain.getNumOperands(); Nops;)
17196         Chains.push_back(Chain.getOperand(--Nops));
17197       break;
17198     case ISD::LIFETIME_START:
17199     case ISD::LIFETIME_END:
17200       // We can forward past any lifetime start/end that can be proven not to
17201       // alias the node.
17202       if (!isAlias(Chain.getNode(), N))
17203         Chains.push_back(Chain.getOperand(0));
17204       break;
17205     case ISD::STORE: {
17206       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17207       // TODO: Can relax for unordered atomics (see D66309)
17208       if (!ST->isSimple() || ST->isIndexed())
17209         continue;
17210       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17211       // If we store purely within object bounds just before its lifetime ends,
17212       // we can remove the store.
17213       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17214                                    ST->getMemoryVT().getStoreSizeInBits())) {
17215         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
17216                    dbgs() << "\nwithin LIFETIME_END of : ";
17217                    LifetimeEndBase.dump(); dbgs() << "\n");
17218         CombineTo(ST, ST->getChain());
17219         return SDValue(N, 0);
17220       }
17221     }
17222     }
17223   }
17224   return SDValue();
17225 }
17226
17227 /// For the instruction sequence of store below, F and I values
17228 /// are bundled together as an i64 value before being stored into memory.
17229 /// Sometimes it is more efficent to generate separate stores for F and I,
17230 /// which can remove the bitwise instructions or sink them to colder places.
17231 ///
17232 ///   (store (or (zext (bitcast F to i32) to i64),
17233 ///              (shl (zext I to i64), 32)), addr)  -->
17234 ///   (store F, addr) and (store I, addr+4)
17235 ///
17236 /// Similarly, splitting for other merged store can also be beneficial, like:
17237 /// For pair of {i32, i32}, i64 store --> two i32 stores.
17238 /// For pair of {i32, i16}, i64 store --> two i32 stores.
17239 /// For pair of {i16, i16}, i32 store --> two i16 stores.
17240 /// For pair of {i16, i8},  i32 store --> two i16 stores.
17241 /// For pair of {i8, i8},   i16 store --> two i8 stores.
17242 ///
17243 /// We allow each target to determine specifically which kind of splitting is
17244 /// supported.
17245 ///
17246 /// The store patterns are commonly seen from the simple code snippet below
17247 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17248 ///   void goo(const std::pair<int, float> &);
17249 ///   hoo() {
17250 ///     ...
17251 ///     goo(std::make_pair(tmp, ftmp));
17252 ///     ...
17253 ///   }
17254 ///
17255 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
17256   if (OptLevel == CodeGenOpt::None)
17257     return SDValue();
17258
17259   // Can't change the number of memory accesses for a volatile store or break
17260   // atomicity for an atomic one.
17261   if (!ST->isSimple())
17262     return SDValue();
17263
17264   SDValue Val = ST->getValue();
17265   SDLoc DL(ST);
17266
17267   // Match OR operand.
17268   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
17269     return SDValue();
17270
17271   // Match SHL operand and get Lower and Higher parts of Val.
17272   SDValue Op1 = Val.getOperand(0);
17273   SDValue Op2 = Val.getOperand(1);
17274   SDValue Lo, Hi;
17275   if (Op1.getOpcode() != ISD::SHL) {
17276     std::swap(Op1, Op2);
17277     if (Op1.getOpcode() != ISD::SHL)
17278       return SDValue();
17279   }
17280   Lo = Op2;
17281   Hi = Op1.getOperand(0);
17282   if (!Op1.hasOneUse())
17283     return SDValue();
17284
17285   // Match shift amount to HalfValBitSize.
17286   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
17287   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
17288   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
17289     return SDValue();
17290
17291   // Lo and Hi are zero-extended from int with size less equal than 32
17292   // to i64.
17293   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
17294       !Lo.getOperand(0).getValueType().isScalarInteger() ||
17295       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
17296       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
17297       !Hi.getOperand(0).getValueType().isScalarInteger() ||
17298       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
17299     return SDValue();
17300
17301   // Use the EVT of low and high parts before bitcast as the input
17302   // of target query.
17303   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
17304                   ? Lo.getOperand(0).getValueType()
17305                   : Lo.getValueType();
17306   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
17307                    ? Hi.getOperand(0).getValueType()
17308                    : Hi.getValueType();
17309   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
17310     return SDValue();
17311
17312   // Start to split store.
17313   unsigned Alignment = ST->getAlignment();
17314   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17315   AAMDNodes AAInfo = ST->getAAInfo();
17316
17317   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
17318   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
17319   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
17320   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
17321
17322   SDValue Chain = ST->getChain();
17323   SDValue Ptr = ST->getBasePtr();
17324   // Lower value store.
17325   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17326                              ST->getAlignment(), MMOFlags, AAInfo);
17327   Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
17328   // Higher value store.
17329   SDValue St1 =
17330       DAG.getStore(St0, DL, Hi, Ptr,
17331                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
17332                    Alignment / 2, MMOFlags, AAInfo);
17333   return St1;
17334 }
17335
17336 /// Convert a disguised subvector insertion into a shuffle:
17337 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
17338   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
17339          "Expected extract_vector_elt");
17340   SDValue InsertVal = N->getOperand(1);
17341   SDValue Vec = N->getOperand(0);
17342
17343   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
17344   // InsIndex)
17345   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
17346   //   CONCAT_VECTORS.
17347   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
17348       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17349       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
17350     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
17351     ArrayRef<int> Mask = SVN->getMask();
17352
17353     SDValue X = Vec.getOperand(0);
17354     SDValue Y = Vec.getOperand(1);
17355
17356     // Vec's operand 0 is using indices from 0 to N-1 and
17357     // operand 1 from N to 2N - 1, where N is the number of
17358     // elements in the vectors.
17359     SDValue InsertVal0 = InsertVal.getOperand(0);
17360     int ElementOffset = -1;
17361
17362     // We explore the inputs of the shuffle in order to see if we find the
17363     // source of the extract_vector_elt. If so, we can use it to modify the
17364     // shuffle rather than perform an insert_vector_elt.
17365     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
17366     ArgWorkList.emplace_back(Mask.size(), Y);
17367     ArgWorkList.emplace_back(0, X);
17368
17369     while (!ArgWorkList.empty()) {
17370       int ArgOffset;
17371       SDValue ArgVal;
17372       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
17373
17374       if (ArgVal == InsertVal0) {
17375         ElementOffset = ArgOffset;
17376         break;
17377       }
17378
17379       // Peek through concat_vector.
17380       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
17381         int CurrentArgOffset =
17382             ArgOffset + ArgVal.getValueType().getVectorNumElements();
17383         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
17384         for (SDValue Op : reverse(ArgVal->ops())) {
17385           CurrentArgOffset -= Step;
17386           ArgWorkList.emplace_back(CurrentArgOffset, Op);
17387         }
17388
17389         // Make sure we went through all the elements and did not screw up index
17390         // computation.
17391         assert(CurrentArgOffset == ArgOffset);
17392       }
17393     }
17394
17395     if (ElementOffset != -1) {
17396       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
17397
17398       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
17399       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
17400       assert(NewMask[InsIndex] <
17401                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
17402              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
17403
17404       SDValue LegalShuffle =
17405               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
17406                                           Y, NewMask, DAG);
17407       if (LegalShuffle)
17408         return LegalShuffle;
17409     }
17410   }
17411
17412   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
17413   // bitcast(shuffle (bitcast V), (extended X), Mask)
17414   // Note: We do not use an insert_subvector node because that requires a
17415   // legal subvector type.
17416   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
17417       !InsertVal.getOperand(0).getValueType().isVector())
17418     return SDValue();
17419
17420   SDValue SubVec = InsertVal.getOperand(0);
17421   SDValue DestVec = N->getOperand(0);
17422   EVT SubVecVT = SubVec.getValueType();
17423   EVT VT = DestVec.getValueType();
17424   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
17425   // If the source only has a single vector element, the cost of creating adding
17426   // it to a vector is likely to exceed the cost of a insert_vector_elt.
17427   if (NumSrcElts == 1)
17428     return SDValue();
17429   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
17430   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
17431
17432   // Step 1: Create a shuffle mask that implements this insert operation. The
17433   // vector that we are inserting into will be operand 0 of the shuffle, so
17434   // those elements are just 'i'. The inserted subvector is in the first
17435   // positions of operand 1 of the shuffle. Example:
17436   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
17437   SmallVector<int, 16> Mask(NumMaskVals);
17438   for (unsigned i = 0; i != NumMaskVals; ++i) {
17439     if (i / NumSrcElts == InsIndex)
17440       Mask[i] = (i % NumSrcElts) + NumMaskVals;
17441     else
17442       Mask[i] = i;
17443   }
17444
17445   // Bail out if the target can not handle the shuffle we want to create.
17446   EVT SubVecEltVT = SubVecVT.getVectorElementType();
17447   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
17448   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
17449     return SDValue();
17450
17451   // Step 2: Create a wide vector from the inserted source vector by appending
17452   // undefined elements. This is the same size as our destination vector.
17453   SDLoc DL(N);
17454   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
17455   ConcatOps[0] = SubVec;
17456   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
17457
17458   // Step 3: Shuffle in the padded subvector.
17459   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
17460   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
17461   AddToWorklist(PaddedSubV.getNode());
17462   AddToWorklist(DestVecBC.getNode());
17463   AddToWorklist(Shuf.getNode());
17464   return DAG.getBitcast(VT, Shuf);
17465 }
17466
17467 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
17468   SDValue InVec = N->getOperand(0);
17469   SDValue InVal = N->getOperand(1);
17470   SDValue EltNo = N->getOperand(2);
17471   SDLoc DL(N);
17472
17473   EVT VT = InVec.getValueType();
17474   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17475
17476   // Insert into out-of-bounds element is undefined.
17477   if (IndexC && VT.isFixedLengthVector() &&
17478       IndexC->getZExtValue() >= VT.getVectorNumElements())
17479     return DAG.getUNDEF(VT);
17480
17481   // Remove redundant insertions:
17482   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
17483   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17484       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
17485     return InVec;
17486
17487   if (!IndexC) {
17488     // If this is variable insert to undef vector, it might be better to splat:
17489     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
17490     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
17491       if (VT.isScalableVector())
17492         return DAG.getSplatVector(VT, DL, InVal);
17493       else {
17494         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
17495         return DAG.getBuildVector(VT, DL, Ops);
17496       }
17497     }
17498     return SDValue();
17499   }
17500
17501   if (VT.isScalableVector())
17502     return SDValue();
17503
17504   unsigned NumElts = VT.getVectorNumElements();
17505
17506   // We must know which element is being inserted for folds below here.
17507   unsigned Elt = IndexC->getZExtValue();
17508   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
17509     return Shuf;
17510
17511   // Canonicalize insert_vector_elt dag nodes.
17512   // Example:
17513   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
17514   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
17515   //
17516   // Do this only if the child insert_vector node has one use; also
17517   // do this only if indices are both constants and Idx1 < Idx0.
17518   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
17519       && isa<ConstantSDNode>(InVec.getOperand(2))) {
17520     unsigned OtherElt = InVec.getConstantOperandVal(2);
17521     if (Elt < OtherElt) {
17522       // Swap nodes.
17523       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17524                                   InVec.getOperand(0), InVal, EltNo);
17525       AddToWorklist(NewOp.getNode());
17526       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
17527                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
17528     }
17529   }
17530
17531   // If we can't generate a legal BUILD_VECTOR, exit
17532   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
17533     return SDValue();
17534
17535   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
17536   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
17537   // vector elements.
17538   SmallVector<SDValue, 8> Ops;
17539   // Do not combine these two vectors if the output vector will not replace
17540   // the input vector.
17541   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
17542     Ops.append(InVec.getNode()->op_begin(),
17543                InVec.getNode()->op_end());
17544   } else if (InVec.isUndef()) {
17545     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
17546   } else {
17547     return SDValue();
17548   }
17549   assert(Ops.size() == NumElts && "Unexpected vector size");
17550
17551   // Insert the element
17552   if (Elt < Ops.size()) {
17553     // All the operands of BUILD_VECTOR must have the same type;
17554     // we enforce that here.
17555     EVT OpVT = Ops[0].getValueType();
17556     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
17557   }
17558
17559   // Return the new vector
17560   return DAG.getBuildVector(VT, DL, Ops);
17561 }
17562
17563 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
17564                                                   SDValue EltNo,
17565                                                   LoadSDNode *OriginalLoad) {
17566   assert(OriginalLoad->isSimple());
17567
17568   EVT ResultVT = EVE->getValueType(0);
17569   EVT VecEltVT = InVecVT.getVectorElementType();
17570   Align Alignment = OriginalLoad->getAlign();
17571   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
17572       VecEltVT.getTypeForEVT(*DAG.getContext()));
17573
17574   if (NewAlign > Alignment ||
17575       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
17576     return SDValue();
17577
17578   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
17579     ISD::NON_EXTLOAD : ISD::EXTLOAD;
17580   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
17581     return SDValue();
17582
17583   Alignment = NewAlign;
17584
17585   SDValue NewPtr = OriginalLoad->getBasePtr();
17586   SDValue Offset;
17587   EVT PtrType = NewPtr.getValueType();
17588   MachinePointerInfo MPI;
17589   SDLoc DL(EVE);
17590   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
17591     int Elt = ConstEltNo->getZExtValue();
17592     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
17593     Offset = DAG.getConstant(PtrOff, DL, PtrType);
17594     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
17595   } else {
17596     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
17597     Offset = DAG.getNode(
17598         ISD::MUL, DL, PtrType, Offset,
17599         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
17600     // Discard the pointer info except the address space because the memory
17601     // operand can't represent this new access since the offset is variable.
17602     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
17603   }
17604   NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
17605
17606   // The replacement we need to do here is a little tricky: we need to
17607   // replace an extractelement of a load with a load.
17608   // Use ReplaceAllUsesOfValuesWith to do the replacement.
17609   // Note that this replacement assumes that the extractvalue is the only
17610   // use of the load; that's okay because we don't want to perform this
17611   // transformation in other cases anyway.
17612   SDValue Load;
17613   SDValue Chain;
17614   if (ResultVT.bitsGT(VecEltVT)) {
17615     // If the result type of vextract is wider than the load, then issue an
17616     // extending load instead.
17617     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
17618                                                   VecEltVT)
17619                                    ? ISD::ZEXTLOAD
17620                                    : ISD::EXTLOAD;
17621     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
17622                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
17623                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
17624                           OriginalLoad->getAAInfo());
17625     Chain = Load.getValue(1);
17626   } else {
17627     Load = DAG.getLoad(
17628         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
17629         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
17630     Chain = Load.getValue(1);
17631     if (ResultVT.bitsLT(VecEltVT))
17632       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
17633     else
17634       Load = DAG.getBitcast(ResultVT, Load);
17635   }
17636   WorklistRemover DeadNodes(*this);
17637   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
17638   SDValue To[] = { Load, Chain };
17639   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
17640   // Make sure to revisit this node to clean it up; it will usually be dead.
17641   AddToWorklist(EVE);
17642   // Since we're explicitly calling ReplaceAllUses, add the new node to the
17643   // worklist explicitly as well.
17644   AddToWorklistWithUsers(Load.getNode());
17645   ++OpsNarrowed;
17646   return SDValue(EVE, 0);
17647 }
17648
17649 /// Transform a vector binary operation into a scalar binary operation by moving
17650 /// the math/logic after an extract element of a vector.
17651 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
17652                                        bool LegalOperations) {
17653   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17654   SDValue Vec = ExtElt->getOperand(0);
17655   SDValue Index = ExtElt->getOperand(1);
17656   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17657   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
17658       Vec.getNode()->getNumValues() != 1)
17659     return SDValue();
17660
17661   // Targets may want to avoid this to prevent an expensive register transfer.
17662   if (!TLI.shouldScalarizeBinop(Vec))
17663     return SDValue();
17664
17665   // Extracting an element of a vector constant is constant-folded, so this
17666   // transform is just replacing a vector op with a scalar op while moving the
17667   // extract.
17668   SDValue Op0 = Vec.getOperand(0);
17669   SDValue Op1 = Vec.getOperand(1);
17670   if (isAnyConstantBuildVector(Op0, true) ||
17671       isAnyConstantBuildVector(Op1, true)) {
17672     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
17673     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
17674     SDLoc DL(ExtElt);
17675     EVT VT = ExtElt->getValueType(0);
17676     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
17677     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
17678     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
17679   }
17680
17681   return SDValue();
17682 }
17683
17684 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
17685   SDValue VecOp = N->getOperand(0);
17686   SDValue Index = N->getOperand(1);
17687   EVT ScalarVT = N->getValueType(0);
17688   EVT VecVT = VecOp.getValueType();
17689   if (VecOp.isUndef())
17690     return DAG.getUNDEF(ScalarVT);
17691
17692   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
17693   //
17694   // This only really matters if the index is non-constant since other combines
17695   // on the constant elements already work.
17696   SDLoc DL(N);
17697   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
17698       Index == VecOp.getOperand(2)) {
17699     SDValue Elt = VecOp.getOperand(1);
17700     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
17701   }
17702
17703   // (vextract (scalar_to_vector val, 0) -> val
17704   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17705     // Only 0'th element of SCALAR_TO_VECTOR is defined.
17706     if (DAG.isKnownNeverZero(Index))
17707       return DAG.getUNDEF(ScalarVT);
17708
17709     // Check if the result type doesn't match the inserted element type. A
17710     // SCALAR_TO_VECTOR may truncate the inserted element and the
17711     // EXTRACT_VECTOR_ELT may widen the extracted vector.
17712     SDValue InOp = VecOp.getOperand(0);
17713     if (InOp.getValueType() != ScalarVT) {
17714       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17715       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17716     }
17717     return InOp;
17718   }
17719
17720   // extract_vector_elt of out-of-bounds element -> UNDEF
17721   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17722   if (IndexC && VecVT.isFixedLengthVector() &&
17723       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
17724     return DAG.getUNDEF(ScalarVT);
17725
17726   // extract_vector_elt (build_vector x, y), 1 -> y
17727   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
17728        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
17729       TLI.isTypeLegal(VecVT) &&
17730       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
17731     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
17732             VecVT.isFixedLengthVector()) &&
17733            "BUILD_VECTOR used for scalable vectors");
17734     unsigned IndexVal =
17735         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
17736     SDValue Elt = VecOp.getOperand(IndexVal);
17737     EVT InEltVT = Elt.getValueType();
17738
17739     // Sometimes build_vector's scalar input types do not match result type.
17740     if (ScalarVT == InEltVT)
17741       return Elt;
17742
17743     // TODO: It may be useful to truncate if free if the build_vector implicitly
17744     // converts.
17745   }
17746
17747   if (VecVT.isScalableVector())
17748     return SDValue();
17749
17750   // All the code from this point onwards assumes fixed width vectors, but it's
17751   // possible that some of the combinations could be made to work for scalable
17752   // vectors too.
17753   unsigned NumElts = VecVT.getVectorNumElements();
17754   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
17755
17756   // TODO: These transforms should not require the 'hasOneUse' restriction, but
17757   // there are regressions on multiple targets without it. We can end up with a
17758   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
17759   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
17760       VecOp.hasOneUse()) {
17761     // The vector index of the LSBs of the source depend on the endian-ness.
17762     bool IsLE = DAG.getDataLayout().isLittleEndian();
17763     unsigned ExtractIndex = IndexC->getZExtValue();
17764     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
17765     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
17766     SDValue BCSrc = VecOp.getOperand(0);
17767     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
17768       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
17769
17770     if (LegalTypes && BCSrc.getValueType().isInteger() &&
17771         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17772       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
17773       // trunc i64 X to i32
17774       SDValue X = BCSrc.getOperand(0);
17775       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
17776              "Extract element and scalar to vector can't change element type "
17777              "from FP to integer.");
17778       unsigned XBitWidth = X.getValueSizeInBits();
17779       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
17780
17781       // An extract element return value type can be wider than its vector
17782       // operand element type. In that case, the high bits are undefined, so
17783       // it's possible that we may need to extend rather than truncate.
17784       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
17785         assert(XBitWidth % VecEltBitWidth == 0 &&
17786                "Scalar bitwidth must be a multiple of vector element bitwidth");
17787         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
17788       }
17789     }
17790   }
17791
17792   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
17793     return BO;
17794
17795   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
17796   // We only perform this optimization before the op legalization phase because
17797   // we may introduce new vector instructions which are not backed by TD
17798   // patterns. For example on AVX, extracting elements from a wide vector
17799   // without using extract_subvector. However, if we can find an underlying
17800   // scalar value, then we can always use that.
17801   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
17802     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
17803     // Find the new index to extract from.
17804     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
17805
17806     // Extracting an undef index is undef.
17807     if (OrigElt == -1)
17808       return DAG.getUNDEF(ScalarVT);
17809
17810     // Select the right vector half to extract from.
17811     SDValue SVInVec;
17812     if (OrigElt < (int)NumElts) {
17813       SVInVec = VecOp.getOperand(0);
17814     } else {
17815       SVInVec = VecOp.getOperand(1);
17816       OrigElt -= NumElts;
17817     }
17818
17819     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
17820       SDValue InOp = SVInVec.getOperand(OrigElt);
17821       if (InOp.getValueType() != ScalarVT) {
17822         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17823         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17824       }
17825
17826       return InOp;
17827     }
17828
17829     // FIXME: We should handle recursing on other vector shuffles and
17830     // scalar_to_vector here as well.
17831
17832     if (!LegalOperations ||
17833         // FIXME: Should really be just isOperationLegalOrCustom.
17834         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
17835         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
17836       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
17837                          DAG.getVectorIdxConstant(OrigElt, DL));
17838     }
17839   }
17840
17841   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
17842   // simplify it based on the (valid) extraction indices.
17843   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
17844         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17845                Use->getOperand(0) == VecOp &&
17846                isa<ConstantSDNode>(Use->getOperand(1));
17847       })) {
17848     APInt DemandedElts = APInt::getNullValue(NumElts);
17849     for (SDNode *Use : VecOp->uses()) {
17850       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
17851       if (CstElt->getAPIntValue().ult(NumElts))
17852         DemandedElts.setBit(CstElt->getZExtValue());
17853     }
17854     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
17855       // We simplified the vector operand of this extract element. If this
17856       // extract is not dead, visit it again so it is folded properly.
17857       if (N->getOpcode() != ISD::DELETED_NODE)
17858         AddToWorklist(N);
17859       return SDValue(N, 0);
17860     }
17861     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
17862     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
17863       // We simplified the vector operand of this extract element. If this
17864       // extract is not dead, visit it again so it is folded properly.
17865       if (N->getOpcode() != ISD::DELETED_NODE)
17866         AddToWorklist(N);
17867       return SDValue(N, 0);
17868     }
17869   }
17870
17871   // Everything under here is trying to match an extract of a loaded value.
17872   // If the result of load has to be truncated, then it's not necessarily
17873   // profitable.
17874   bool BCNumEltsChanged = false;
17875   EVT ExtVT = VecVT.getVectorElementType();
17876   EVT LVT = ExtVT;
17877   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
17878     return SDValue();
17879
17880   if (VecOp.getOpcode() == ISD::BITCAST) {
17881     // Don't duplicate a load with other uses.
17882     if (!VecOp.hasOneUse())
17883       return SDValue();
17884
17885     EVT BCVT = VecOp.getOperand(0).getValueType();
17886     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
17887       return SDValue();
17888     if (NumElts != BCVT.getVectorNumElements())
17889       BCNumEltsChanged = true;
17890     VecOp = VecOp.getOperand(0);
17891     ExtVT = BCVT.getVectorElementType();
17892   }
17893
17894   // extract (vector load $addr), i --> load $addr + i * size
17895   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
17896       ISD::isNormalLoad(VecOp.getNode()) &&
17897       !Index->hasPredecessor(VecOp.getNode())) {
17898     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
17899     if (VecLoad && VecLoad->isSimple())
17900       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
17901   }
17902
17903   // Perform only after legalization to ensure build_vector / vector_shuffle
17904   // optimizations have already been done.
17905   if (!LegalOperations || !IndexC)
17906     return SDValue();
17907
17908   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
17909   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
17910   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
17911   int Elt = IndexC->getZExtValue();
17912   LoadSDNode *LN0 = nullptr;
17913   if (ISD::isNormalLoad(VecOp.getNode())) {
17914     LN0 = cast<LoadSDNode>(VecOp);
17915   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17916              VecOp.getOperand(0).getValueType() == ExtVT &&
17917              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17918     // Don't duplicate a load with other uses.
17919     if (!VecOp.hasOneUse())
17920       return SDValue();
17921
17922     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17923   }
17924   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17925     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17926     // =>
17927     // (load $addr+1*size)
17928
17929     // Don't duplicate a load with other uses.
17930     if (!VecOp.hasOneUse())
17931       return SDValue();
17932
17933     // If the bit convert changed the number of elements, it is unsafe
17934     // to examine the mask.
17935     if (BCNumEltsChanged)
17936       return SDValue();
17937
17938     // Select the input vector, guarding against out of range extract vector.
17939     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17940     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17941
17942     if (VecOp.getOpcode() == ISD::BITCAST) {
17943       // Don't duplicate a load with other uses.
17944       if (!VecOp.hasOneUse())
17945         return SDValue();
17946
17947       VecOp = VecOp.getOperand(0);
17948     }
17949     if (ISD::isNormalLoad(VecOp.getNode())) {
17950       LN0 = cast<LoadSDNode>(VecOp);
17951       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17952       Index = DAG.getConstant(Elt, DL, Index.getValueType());
17953     }
17954   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
17955              VecVT.getVectorElementType() == ScalarVT &&
17956              (!LegalTypes ||
17957               TLI.isTypeLegal(
17958                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
17959     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
17960     //      -> extract_vector_elt a, 0
17961     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
17962     //      -> extract_vector_elt a, 1
17963     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
17964     //      -> extract_vector_elt b, 0
17965     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
17966     //      -> extract_vector_elt b, 1
17967     SDLoc SL(N);
17968     EVT ConcatVT = VecOp.getOperand(0).getValueType();
17969     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17970     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
17971                                      Index.getValueType());
17972
17973     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
17974     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
17975                               ConcatVT.getVectorElementType(),
17976                               ConcatOp, NewIdx);
17977     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
17978   }
17979
17980   // Make sure we found a non-volatile load and the extractelement is
17981   // the only use.
17982   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
17983     return SDValue();
17984
17985   // If Idx was -1 above, Elt is going to be -1, so just return undef.
17986   if (Elt == -1)
17987     return DAG.getUNDEF(LVT);
17988
17989   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17990 }
17991
17992 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17993 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17994   // We perform this optimization post type-legalization because
17995   // the type-legalizer often scalarizes integer-promoted vectors.
17996   // Performing this optimization before may create bit-casts which
17997   // will be type-legalized to complex code sequences.
17998   // We perform this optimization only before the operation legalizer because we
17999   // may introduce illegal operations.
18000   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18001     return SDValue();
18002
18003   unsigned NumInScalars = N->getNumOperands();
18004   SDLoc DL(N);
18005   EVT VT = N->getValueType(0);
18006
18007   // Check to see if this is a BUILD_VECTOR of a bunch of values
18008   // which come from any_extend or zero_extend nodes. If so, we can create
18009   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18010   // optimizations. We do not handle sign-extend because we can't fill the sign
18011   // using shuffles.
18012   EVT SourceType = MVT::Other;
18013   bool AllAnyExt = true;
18014
18015   for (unsigned i = 0; i != NumInScalars; ++i) {
18016     SDValue In = N->getOperand(i);
18017     // Ignore undef inputs.
18018     if (In.isUndef()) continue;
18019
18020     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
18021     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18022
18023     // Abort if the element is not an extension.
18024     if (!ZeroExt && !AnyExt) {
18025       SourceType = MVT::Other;
18026       break;
18027     }
18028
18029     // The input is a ZeroExt or AnyExt. Check the original type.
18030     EVT InTy = In.getOperand(0).getValueType();
18031
18032     // Check that all of the widened source types are the same.
18033     if (SourceType == MVT::Other)
18034       // First time.
18035       SourceType = InTy;
18036     else if (InTy != SourceType) {
18037       // Multiple income types. Abort.
18038       SourceType = MVT::Other;
18039       break;
18040     }
18041
18042     // Check if all of the extends are ANY_EXTENDs.
18043     AllAnyExt &= AnyExt;
18044   }
18045
18046   // In order to have valid types, all of the inputs must be extended from the
18047   // same source type and all of the inputs must be any or zero extend.
18048   // Scalar sizes must be a power of two.
18049   EVT OutScalarTy = VT.getScalarType();
18050   bool ValidTypes = SourceType != MVT::Other &&
18051                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18052                  isPowerOf2_32(SourceType.getSizeInBits());
18053
18054   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18055   // turn into a single shuffle instruction.
18056   if (!ValidTypes)
18057     return SDValue();
18058
18059   // If we already have a splat buildvector, then don't fold it if it means
18060   // introducing zeros.
18061   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18062     return SDValue();
18063
18064   bool isLE = DAG.getDataLayout().isLittleEndian();
18065   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18066   assert(ElemRatio > 1 && "Invalid element size ratio");
18067   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18068                                DAG.getConstant(0, DL, SourceType);
18069
18070   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18071   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18072
18073   // Populate the new build_vector
18074   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18075     SDValue Cast = N->getOperand(i);
18076     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
18077             Cast.getOpcode() == ISD::ZERO_EXTEND ||
18078             Cast.isUndef()) && "Invalid cast opcode");
18079     SDValue In;
18080     if (Cast.isUndef())
18081       In = DAG.getUNDEF(SourceType);
18082     else
18083       In = Cast->getOperand(0);
18084     unsigned Index = isLE ? (i * ElemRatio) :
18085                             (i * ElemRatio + (ElemRatio - 1));
18086
18087     assert(Index < Ops.size() && "Invalid index");
18088     Ops[Index] = In;
18089   }
18090
18091   // The type of the new BUILD_VECTOR node.
18092   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18093   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
18094          "Invalid vector size");
18095   // Check if the new vector type is legal.
18096   if (!isTypeLegal(VecVT) ||
18097       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18098        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18099     return SDValue();
18100
18101   // Make the new BUILD_VECTOR.
18102   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18103
18104   // The new BUILD_VECTOR node has the potential to be further optimized.
18105   AddToWorklist(BV.getNode());
18106   // Bitcast to the desired type.
18107   return DAG.getBitcast(VT, BV);
18108 }
18109
18110 // Simplify (build_vec (trunc $1)
18111 //                     (trunc (srl $1 half-width))
18112 //                     (trunc (srl $1 (2 * half-width))) …)
18113 // to (bitcast $1)
18114 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18115   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18116
18117   // Only for little endian
18118   if (!DAG.getDataLayout().isLittleEndian())
18119     return SDValue();
18120
18121   SDLoc DL(N);
18122   EVT VT = N->getValueType(0);
18123   EVT OutScalarTy = VT.getScalarType();
18124   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18125
18126   // Only for power of two types to be sure that bitcast works well
18127   if (!isPowerOf2_64(ScalarTypeBitsize))
18128     return SDValue();
18129
18130   unsigned NumInScalars = N->getNumOperands();
18131
18132   // Look through bitcasts
18133   auto PeekThroughBitcast = [](SDValue Op) {
18134     if (Op.getOpcode() == ISD::BITCAST)
18135       return Op.getOperand(0);
18136     return Op;
18137   };
18138
18139   // The source value where all the parts are extracted.
18140   SDValue Src;
18141   for (unsigned i = 0; i != NumInScalars; ++i) {
18142     SDValue In = PeekThroughBitcast(N->getOperand(i));
18143     // Ignore undef inputs.
18144     if (In.isUndef()) continue;
18145
18146     if (In.getOpcode() != ISD::TRUNCATE)
18147       return SDValue();
18148
18149     In = PeekThroughBitcast(In.getOperand(0));
18150
18151     if (In.getOpcode() != ISD::SRL) {
18152       // For now only build_vec without shuffling, handle shifts here in the
18153       // future.
18154       if (i != 0)
18155         return SDValue();
18156
18157       Src = In;
18158     } else {
18159       // In is SRL
18160       SDValue part = PeekThroughBitcast(In.getOperand(0));
18161
18162       if (!Src) {
18163         Src = part;
18164       } else if (Src != part) {
18165         // Vector parts do not stem from the same variable
18166         return SDValue();
18167       }
18168
18169       SDValue ShiftAmtVal = In.getOperand(1);
18170       if (!isa<ConstantSDNode>(ShiftAmtVal))
18171         return SDValue();
18172
18173       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18174
18175       // The extracted value is not extracted at the right position
18176       if (ShiftAmt != i * ScalarTypeBitsize)
18177         return SDValue();
18178     }
18179   }
18180
18181   // Only cast if the size is the same
18182   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18183     return SDValue();
18184
18185   return DAG.getBitcast(VT, Src);
18186 }
18187
18188 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18189                                            ArrayRef<int> VectorMask,
18190                                            SDValue VecIn1, SDValue VecIn2,
18191                                            unsigned LeftIdx, bool DidSplitVec) {
18192   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18193
18194   EVT VT = N->getValueType(0);
18195   EVT InVT1 = VecIn1.getValueType();
18196   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18197
18198   unsigned NumElems = VT.getVectorNumElements();
18199   unsigned ShuffleNumElems = NumElems;
18200
18201   // If we artificially split a vector in two already, then the offsets in the
18202   // operands will all be based off of VecIn1, even those in VecIn2.
18203   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18204
18205   // We can't generate a shuffle node with mismatched input and output types.
18206   // Try to make the types match the type of the output.
18207   if (InVT1 != VT || InVT2 != VT) {
18208     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
18209       // If the output vector length is a multiple of both input lengths,
18210       // we can concatenate them and pad the rest with undefs.
18211       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
18212       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
18213       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18214       ConcatOps[0] = VecIn1;
18215       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18216       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18217       VecIn2 = SDValue();
18218     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
18219       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18220         return SDValue();
18221
18222       if (!VecIn2.getNode()) {
18223         // If we only have one input vector, and it's twice the size of the
18224         // output, split it in two.
18225         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18226                              DAG.getVectorIdxConstant(NumElems, DL));
18227         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18228         // Since we now have shorter input vectors, adjust the offset of the
18229         // second vector's start.
18230         Vec2Offset = NumElems;
18231       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
18232         // VecIn1 is wider than the output, and we have another, possibly
18233         // smaller input. Pad the smaller input with undefs, shuffle at the
18234         // input vector width, and extract the output.
18235         // The shuffle type is different than VT, so check legality again.
18236         if (LegalOperations &&
18237             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18238           return SDValue();
18239
18240         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18241         // lower it back into a BUILD_VECTOR. So if the inserted type is
18242         // illegal, don't even try.
18243         if (InVT1 != InVT2) {
18244           if (!TLI.isTypeLegal(InVT2))
18245             return SDValue();
18246           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
18247                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
18248         }
18249         ShuffleNumElems = NumElems * 2;
18250       } else {
18251         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
18252         // than VecIn1. We can't handle this for now - this case will disappear
18253         // when we start sorting the vectors by type.
18254         return SDValue();
18255       }
18256     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
18257                InVT1.getSizeInBits() == VT.getSizeInBits()) {
18258       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
18259       ConcatOps[0] = VecIn2;
18260       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18261     } else {
18262       // TODO: Support cases where the length mismatch isn't exactly by a
18263       // factor of 2.
18264       // TODO: Move this check upwards, so that if we have bad type
18265       // mismatches, we don't create any DAG nodes.
18266       return SDValue();
18267     }
18268   }
18269
18270   // Initialize mask to undef.
18271   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
18272
18273   // Only need to run up to the number of elements actually used, not the
18274   // total number of elements in the shuffle - if we are shuffling a wider
18275   // vector, the high lanes should be set to undef.
18276   for (unsigned i = 0; i != NumElems; ++i) {
18277     if (VectorMask[i] <= 0)
18278       continue;
18279
18280     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
18281     if (VectorMask[i] == (int)LeftIdx) {
18282       Mask[i] = ExtIndex;
18283     } else if (VectorMask[i] == (int)LeftIdx + 1) {
18284       Mask[i] = Vec2Offset + ExtIndex;
18285     }
18286   }
18287
18288   // The type the input vectors may have changed above.
18289   InVT1 = VecIn1.getValueType();
18290
18291   // If we already have a VecIn2, it should have the same type as VecIn1.
18292   // If we don't, get an undef/zero vector of the appropriate type.
18293   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
18294   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
18295
18296   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
18297   if (ShuffleNumElems > NumElems)
18298     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
18299
18300   return Shuffle;
18301 }
18302
18303 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
18304   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18305
18306   // First, determine where the build vector is not undef.
18307   // TODO: We could extend this to handle zero elements as well as undefs.
18308   int NumBVOps = BV->getNumOperands();
18309   int ZextElt = -1;
18310   for (int i = 0; i != NumBVOps; ++i) {
18311     SDValue Op = BV->getOperand(i);
18312     if (Op.isUndef())
18313       continue;
18314     if (ZextElt == -1)
18315       ZextElt = i;
18316     else
18317       return SDValue();
18318   }
18319   // Bail out if there's no non-undef element.
18320   if (ZextElt == -1)
18321     return SDValue();
18322
18323   // The build vector contains some number of undef elements and exactly
18324   // one other element. That other element must be a zero-extended scalar
18325   // extracted from a vector at a constant index to turn this into a shuffle.
18326   // Also, require that the build vector does not implicitly truncate/extend
18327   // its elements.
18328   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
18329   EVT VT = BV->getValueType(0);
18330   SDValue Zext = BV->getOperand(ZextElt);
18331   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
18332       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18333       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
18334       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
18335     return SDValue();
18336
18337   // The zero-extend must be a multiple of the source size, and we must be
18338   // building a vector of the same size as the source of the extract element.
18339   SDValue Extract = Zext.getOperand(0);
18340   unsigned DestSize = Zext.getValueSizeInBits();
18341   unsigned SrcSize = Extract.getValueSizeInBits();
18342   if (DestSize % SrcSize != 0 ||
18343       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
18344     return SDValue();
18345
18346   // Create a shuffle mask that will combine the extracted element with zeros
18347   // and undefs.
18348   int ZextRatio = DestSize / SrcSize;
18349   int NumMaskElts = NumBVOps * ZextRatio;
18350   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
18351   for (int i = 0; i != NumMaskElts; ++i) {
18352     if (i / ZextRatio == ZextElt) {
18353       // The low bits of the (potentially translated) extracted element map to
18354       // the source vector. The high bits map to zero. We will use a zero vector
18355       // as the 2nd source operand of the shuffle, so use the 1st element of
18356       // that vector (mask value is number-of-elements) for the high bits.
18357       if (i % ZextRatio == 0)
18358         ShufMask[i] = Extract.getConstantOperandVal(1);
18359       else
18360         ShufMask[i] = NumMaskElts;
18361     }
18362
18363     // Undef elements of the build vector remain undef because we initialize
18364     // the shuffle mask with -1.
18365   }
18366
18367   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
18368   // bitcast (shuffle V, ZeroVec, VectorMask)
18369   SDLoc DL(BV);
18370   EVT VecVT = Extract.getOperand(0).getValueType();
18371   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
18372   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18373   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
18374                                              ZeroVec, ShufMask, DAG);
18375   if (!Shuf)
18376     return SDValue();
18377   return DAG.getBitcast(VT, Shuf);
18378 }
18379
18380 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
18381 // operations. If the types of the vectors we're extracting from allow it,
18382 // turn this into a vector_shuffle node.
18383 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
18384   SDLoc DL(N);
18385   EVT VT = N->getValueType(0);
18386
18387   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
18388   if (!isTypeLegal(VT))
18389     return SDValue();
18390
18391   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
18392     return V;
18393
18394   // May only combine to shuffle after legalize if shuffle is legal.
18395   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
18396     return SDValue();
18397
18398   bool UsesZeroVector = false;
18399   unsigned NumElems = N->getNumOperands();
18400
18401   // Record, for each element of the newly built vector, which input vector
18402   // that element comes from. -1 stands for undef, 0 for the zero vector,
18403   // and positive values for the input vectors.
18404   // VectorMask maps each element to its vector number, and VecIn maps vector
18405   // numbers to their initial SDValues.
18406
18407   SmallVector<int, 8> VectorMask(NumElems, -1);
18408   SmallVector<SDValue, 8> VecIn;
18409   VecIn.push_back(SDValue());
18410
18411   for (unsigned i = 0; i != NumElems; ++i) {
18412     SDValue Op = N->getOperand(i);
18413
18414     if (Op.isUndef())
18415       continue;
18416
18417     // See if we can use a blend with a zero vector.
18418     // TODO: Should we generalize this to a blend with an arbitrary constant
18419     // vector?
18420     if (isNullConstant(Op) || isNullFPConstant(Op)) {
18421       UsesZeroVector = true;
18422       VectorMask[i] = 0;
18423       continue;
18424     }
18425
18426     // Not an undef or zero. If the input is something other than an
18427     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
18428     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18429         !isa<ConstantSDNode>(Op.getOperand(1)))
18430       return SDValue();
18431     SDValue ExtractedFromVec = Op.getOperand(0);
18432
18433     if (ExtractedFromVec.getValueType().isScalableVector())
18434       return SDValue();
18435
18436     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
18437     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
18438       return SDValue();
18439
18440     // All inputs must have the same element type as the output.
18441     if (VT.getVectorElementType() !=
18442         ExtractedFromVec.getValueType().getVectorElementType())
18443       return SDValue();
18444
18445     // Have we seen this input vector before?
18446     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
18447     // a map back from SDValues to numbers isn't worth it.
18448     unsigned Idx = std::distance(
18449         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
18450     if (Idx == VecIn.size())
18451       VecIn.push_back(ExtractedFromVec);
18452
18453     VectorMask[i] = Idx;
18454   }
18455
18456   // If we didn't find at least one input vector, bail out.
18457   if (VecIn.size() < 2)
18458     return SDValue();
18459
18460   // If all the Operands of BUILD_VECTOR extract from same
18461   // vector, then split the vector efficiently based on the maximum
18462   // vector access index and adjust the VectorMask and
18463   // VecIn accordingly.
18464   bool DidSplitVec = false;
18465   if (VecIn.size() == 2) {
18466     unsigned MaxIndex = 0;
18467     unsigned NearestPow2 = 0;
18468     SDValue Vec = VecIn.back();
18469     EVT InVT = Vec.getValueType();
18470     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
18471
18472     for (unsigned i = 0; i < NumElems; i++) {
18473       if (VectorMask[i] <= 0)
18474         continue;
18475       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
18476       IndexVec[i] = Index;
18477       MaxIndex = std::max(MaxIndex, Index);
18478     }
18479
18480     NearestPow2 = PowerOf2Ceil(MaxIndex);
18481     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
18482         NumElems * 2 < NearestPow2) {
18483       unsigned SplitSize = NearestPow2 / 2;
18484       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
18485                                      InVT.getVectorElementType(), SplitSize);
18486       if (TLI.isTypeLegal(SplitVT)) {
18487         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18488                                      DAG.getVectorIdxConstant(SplitSize, DL));
18489         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18490                                      DAG.getVectorIdxConstant(0, DL));
18491         VecIn.pop_back();
18492         VecIn.push_back(VecIn1);
18493         VecIn.push_back(VecIn2);
18494         DidSplitVec = true;
18495
18496         for (unsigned i = 0; i < NumElems; i++) {
18497           if (VectorMask[i] <= 0)
18498             continue;
18499           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
18500         }
18501       }
18502     }
18503   }
18504
18505   // TODO: We want to sort the vectors by descending length, so that adjacent
18506   // pairs have similar length, and the longer vector is always first in the
18507   // pair.
18508
18509   // TODO: Should this fire if some of the input vectors has illegal type (like
18510   // it does now), or should we let legalization run its course first?
18511
18512   // Shuffle phase:
18513   // Take pairs of vectors, and shuffle them so that the result has elements
18514   // from these vectors in the correct places.
18515   // For example, given:
18516   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
18517   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
18518   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
18519   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
18520   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
18521   // We will generate:
18522   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
18523   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
18524   SmallVector<SDValue, 4> Shuffles;
18525   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
18526     unsigned LeftIdx = 2 * In + 1;
18527     SDValue VecLeft = VecIn[LeftIdx];
18528     SDValue VecRight =
18529         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
18530
18531     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
18532                                                 VecRight, LeftIdx, DidSplitVec))
18533       Shuffles.push_back(Shuffle);
18534     else
18535       return SDValue();
18536   }
18537
18538   // If we need the zero vector as an "ingredient" in the blend tree, add it
18539   // to the list of shuffles.
18540   if (UsesZeroVector)
18541     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
18542                                       : DAG.getConstantFP(0.0, DL, VT));
18543
18544   // If we only have one shuffle, we're done.
18545   if (Shuffles.size() == 1)
18546     return Shuffles[0];
18547
18548   // Update the vector mask to point to the post-shuffle vectors.
18549   for (int &Vec : VectorMask)
18550     if (Vec == 0)
18551       Vec = Shuffles.size() - 1;
18552     else
18553       Vec = (Vec - 1) / 2;
18554
18555   // More than one shuffle. Generate a binary tree of blends, e.g. if from
18556   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
18557   // generate:
18558   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
18559   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
18560   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
18561   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
18562   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
18563   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
18564   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
18565
18566   // Make sure the initial size of the shuffle list is even.
18567   if (Shuffles.size() % 2)
18568     Shuffles.push_back(DAG.getUNDEF(VT));
18569
18570   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
18571     if (CurSize % 2) {
18572       Shuffles[CurSize] = DAG.getUNDEF(VT);
18573       CurSize++;
18574     }
18575     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
18576       int Left = 2 * In;
18577       int Right = 2 * In + 1;
18578       SmallVector<int, 8> Mask(NumElems, -1);
18579       for (unsigned i = 0; i != NumElems; ++i) {
18580         if (VectorMask[i] == Left) {
18581           Mask[i] = i;
18582           VectorMask[i] = In;
18583         } else if (VectorMask[i] == Right) {
18584           Mask[i] = i + NumElems;
18585           VectorMask[i] = In;
18586         }
18587       }
18588
18589       Shuffles[In] =
18590           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
18591     }
18592   }
18593   return Shuffles[0];
18594 }
18595
18596 // Try to turn a build vector of zero extends of extract vector elts into a
18597 // a vector zero extend and possibly an extract subvector.
18598 // TODO: Support sign extend?
18599 // TODO: Allow undef elements?
18600 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
18601   if (LegalOperations)
18602     return SDValue();
18603
18604   EVT VT = N->getValueType(0);
18605
18606   bool FoundZeroExtend = false;
18607   SDValue Op0 = N->getOperand(0);
18608   auto checkElem = [&](SDValue Op) -> int64_t {
18609     unsigned Opc = Op.getOpcode();
18610     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
18611     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
18612         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18613         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
18614       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
18615         return C->getZExtValue();
18616     return -1;
18617   };
18618
18619   // Make sure the first element matches
18620   // (zext (extract_vector_elt X, C))
18621   int64_t Offset = checkElem(Op0);
18622   if (Offset < 0)
18623     return SDValue();
18624
18625   unsigned NumElems = N->getNumOperands();
18626   SDValue In = Op0.getOperand(0).getOperand(0);
18627   EVT InSVT = In.getValueType().getScalarType();
18628   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
18629
18630   // Don't create an illegal input type after type legalization.
18631   if (LegalTypes && !TLI.isTypeLegal(InVT))
18632     return SDValue();
18633
18634   // Ensure all the elements come from the same vector and are adjacent.
18635   for (unsigned i = 1; i != NumElems; ++i) {
18636     if ((Offset + i) != checkElem(N->getOperand(i)))
18637       return SDValue();
18638   }
18639
18640   SDLoc DL(N);
18641   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
18642                    Op0.getOperand(0).getOperand(1));
18643   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
18644                      VT, In);
18645 }
18646
18647 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
18648   EVT VT = N->getValueType(0);
18649
18650   // A vector built entirely of undefs is undef.
18651   if (ISD::allOperandsUndef(N))
18652     return DAG.getUNDEF(VT);
18653
18654   // If this is a splat of a bitcast from another vector, change to a
18655   // concat_vector.
18656   // For example:
18657   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
18658   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
18659   //
18660   // If X is a build_vector itself, the concat can become a larger build_vector.
18661   // TODO: Maybe this is useful for non-splat too?
18662   if (!LegalOperations) {
18663     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18664       Splat = peekThroughBitcasts(Splat);
18665       EVT SrcVT = Splat.getValueType();
18666       if (SrcVT.isVector()) {
18667         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
18668         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
18669                                      SrcVT.getVectorElementType(), NumElts);
18670         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
18671           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
18672           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
18673                                        NewVT, Ops);
18674           return DAG.getBitcast(VT, Concat);
18675         }
18676       }
18677     }
18678   }
18679
18680   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
18681   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
18682     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18683       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
18684       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
18685     }
18686
18687   // Check if we can express BUILD VECTOR via subvector extract.
18688   if (!LegalTypes && (N->getNumOperands() > 1)) {
18689     SDValue Op0 = N->getOperand(0);
18690     auto checkElem = [&](SDValue Op) -> uint64_t {
18691       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
18692           (Op0.getOperand(0) == Op.getOperand(0)))
18693         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
18694           return CNode->getZExtValue();
18695       return -1;
18696     };
18697
18698     int Offset = checkElem(Op0);
18699     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
18700       if (Offset + i != checkElem(N->getOperand(i))) {
18701         Offset = -1;
18702         break;
18703       }
18704     }
18705
18706     if ((Offset == 0) &&
18707         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
18708       return Op0.getOperand(0);
18709     if ((Offset != -1) &&
18710         ((Offset % N->getValueType(0).getVectorNumElements()) ==
18711          0)) // IDX must be multiple of output size.
18712       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
18713                          Op0.getOperand(0), Op0.getOperand(1));
18714   }
18715
18716   if (SDValue V = convertBuildVecZextToZext(N))
18717     return V;
18718
18719   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
18720     return V;
18721
18722   if (SDValue V = reduceBuildVecTruncToBitCast(N))
18723     return V;
18724
18725   if (SDValue V = reduceBuildVecToShuffle(N))
18726     return V;
18727
18728   return SDValue();
18729 }
18730
18731 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
18732   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18733   EVT OpVT = N->getOperand(0).getValueType();
18734
18735   // If the operands are legal vectors, leave them alone.
18736   if (TLI.isTypeLegal(OpVT))
18737     return SDValue();
18738
18739   SDLoc DL(N);
18740   EVT VT = N->getValueType(0);
18741   SmallVector<SDValue, 8> Ops;
18742
18743   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
18744   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18745
18746   // Keep track of what we encounter.
18747   bool AnyInteger = false;
18748   bool AnyFP = false;
18749   for (const SDValue &Op : N->ops()) {
18750     if (ISD::BITCAST == Op.getOpcode() &&
18751         !Op.getOperand(0).getValueType().isVector())
18752       Ops.push_back(Op.getOperand(0));
18753     else if (ISD::UNDEF == Op.getOpcode())
18754       Ops.push_back(ScalarUndef);
18755     else
18756       return SDValue();
18757
18758     // Note whether we encounter an integer or floating point scalar.
18759     // If it's neither, bail out, it could be something weird like x86mmx.
18760     EVT LastOpVT = Ops.back().getValueType();
18761     if (LastOpVT.isFloatingPoint())
18762       AnyFP = true;
18763     else if (LastOpVT.isInteger())
18764       AnyInteger = true;
18765     else
18766       return SDValue();
18767   }
18768
18769   // If any of the operands is a floating point scalar bitcast to a vector,
18770   // use floating point types throughout, and bitcast everything.
18771   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
18772   if (AnyFP) {
18773     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
18774     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18775     if (AnyInteger) {
18776       for (SDValue &Op : Ops) {
18777         if (Op.getValueType() == SVT)
18778           continue;
18779         if (Op.isUndef())
18780           Op = ScalarUndef;
18781         else
18782           Op = DAG.getBitcast(SVT, Op);
18783       }
18784     }
18785   }
18786
18787   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
18788                                VT.getSizeInBits() / SVT.getSizeInBits());
18789   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
18790 }
18791
18792 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
18793 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
18794 // most two distinct vectors the same size as the result, attempt to turn this
18795 // into a legal shuffle.
18796 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
18797   EVT VT = N->getValueType(0);
18798   EVT OpVT = N->getOperand(0).getValueType();
18799
18800   // We currently can't generate an appropriate shuffle for a scalable vector.
18801   if (VT.isScalableVector())
18802     return SDValue();
18803
18804   int NumElts = VT.getVectorNumElements();
18805   int NumOpElts = OpVT.getVectorNumElements();
18806
18807   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
18808   SmallVector<int, 8> Mask;
18809
18810   for (SDValue Op : N->ops()) {
18811     Op = peekThroughBitcasts(Op);
18812
18813     // UNDEF nodes convert to UNDEF shuffle mask values.
18814     if (Op.isUndef()) {
18815       Mask.append((unsigned)NumOpElts, -1);
18816       continue;
18817     }
18818
18819     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18820       return SDValue();
18821
18822     // What vector are we extracting the subvector from and at what index?
18823     SDValue ExtVec = Op.getOperand(0);
18824     int ExtIdx = Op.getConstantOperandVal(1);
18825
18826     // We want the EVT of the original extraction to correctly scale the
18827     // extraction index.
18828     EVT ExtVT = ExtVec.getValueType();
18829     ExtVec = peekThroughBitcasts(ExtVec);
18830
18831     // UNDEF nodes convert to UNDEF shuffle mask values.
18832     if (ExtVec.isUndef()) {
18833       Mask.append((unsigned)NumOpElts, -1);
18834       continue;
18835     }
18836
18837     // Ensure that we are extracting a subvector from a vector the same
18838     // size as the result.
18839     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
18840       return SDValue();
18841
18842     // Scale the subvector index to account for any bitcast.
18843     int NumExtElts = ExtVT.getVectorNumElements();
18844     if (0 == (NumExtElts % NumElts))
18845       ExtIdx /= (NumExtElts / NumElts);
18846     else if (0 == (NumElts % NumExtElts))
18847       ExtIdx *= (NumElts / NumExtElts);
18848     else
18849       return SDValue();
18850
18851     // At most we can reference 2 inputs in the final shuffle.
18852     if (SV0.isUndef() || SV0 == ExtVec) {
18853       SV0 = ExtVec;
18854       for (int i = 0; i != NumOpElts; ++i)
18855         Mask.push_back(i + ExtIdx);
18856     } else if (SV1.isUndef() || SV1 == ExtVec) {
18857       SV1 = ExtVec;
18858       for (int i = 0; i != NumOpElts; ++i)
18859         Mask.push_back(i + ExtIdx + NumElts);
18860     } else {
18861       return SDValue();
18862     }
18863   }
18864
18865   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18866   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
18867                                      DAG.getBitcast(VT, SV1), Mask, DAG);
18868 }
18869
18870 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
18871   unsigned CastOpcode = N->getOperand(0).getOpcode();
18872   switch (CastOpcode) {
18873   case ISD::SINT_TO_FP:
18874   case ISD::UINT_TO_FP:
18875   case ISD::FP_TO_SINT:
18876   case ISD::FP_TO_UINT:
18877     // TODO: Allow more opcodes?
18878     //  case ISD::BITCAST:
18879     //  case ISD::TRUNCATE:
18880     //  case ISD::ZERO_EXTEND:
18881     //  case ISD::SIGN_EXTEND:
18882     //  case ISD::FP_EXTEND:
18883     break;
18884   default:
18885     return SDValue();
18886   }
18887
18888   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
18889   if (!SrcVT.isVector())
18890     return SDValue();
18891
18892   // All operands of the concat must be the same kind of cast from the same
18893   // source type.
18894   SmallVector<SDValue, 4> SrcOps;
18895   for (SDValue Op : N->ops()) {
18896     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
18897         Op.getOperand(0).getValueType() != SrcVT)
18898       return SDValue();
18899     SrcOps.push_back(Op.getOperand(0));
18900   }
18901
18902   // The wider cast must be supported by the target. This is unusual because
18903   // the operation support type parameter depends on the opcode. In addition,
18904   // check the other type in the cast to make sure this is really legal.
18905   EVT VT = N->getValueType(0);
18906   EVT SrcEltVT = SrcVT.getVectorElementType();
18907   unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
18908   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
18909   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18910   switch (CastOpcode) {
18911   case ISD::SINT_TO_FP:
18912   case ISD::UINT_TO_FP:
18913     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
18914         !TLI.isTypeLegal(VT))
18915       return SDValue();
18916     break;
18917   case ISD::FP_TO_SINT:
18918   case ISD::FP_TO_UINT:
18919     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
18920         !TLI.isTypeLegal(ConcatSrcVT))
18921       return SDValue();
18922     break;
18923   default:
18924     llvm_unreachable("Unexpected cast opcode");
18925   }
18926
18927   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
18928   SDLoc DL(N);
18929   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
18930   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
18931 }
18932
18933 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
18934   // If we only have one input vector, we don't need to do any concatenation.
18935   if (N->getNumOperands() == 1)
18936     return N->getOperand(0);
18937
18938   // Check if all of the operands are undefs.
18939   EVT VT = N->getValueType(0);
18940   if (ISD::allOperandsUndef(N))
18941     return DAG.getUNDEF(VT);
18942
18943   // Optimize concat_vectors where all but the first of the vectors are undef.
18944   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
18945         return Op.isUndef();
18946       })) {
18947     SDValue In = N->getOperand(0);
18948     assert(In.getValueType().isVector() && "Must concat vectors");
18949
18950     // If the input is a concat_vectors, just make a larger concat by padding
18951     // with smaller undefs.
18952     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
18953       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
18954       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
18955       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
18956       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18957     }
18958
18959     SDValue Scalar = peekThroughOneUseBitcasts(In);
18960
18961     // concat_vectors(scalar_to_vector(scalar), undef) ->
18962     //     scalar_to_vector(scalar)
18963     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18964          Scalar.hasOneUse()) {
18965       EVT SVT = Scalar.getValueType().getVectorElementType();
18966       if (SVT == Scalar.getOperand(0).getValueType())
18967         Scalar = Scalar.getOperand(0);
18968     }
18969
18970     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
18971     if (!Scalar.getValueType().isVector()) {
18972       // If the bitcast type isn't legal, it might be a trunc of a legal type;
18973       // look through the trunc so we can still do the transform:
18974       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
18975       if (Scalar->getOpcode() == ISD::TRUNCATE &&
18976           !TLI.isTypeLegal(Scalar.getValueType()) &&
18977           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
18978         Scalar = Scalar->getOperand(0);
18979
18980       EVT SclTy = Scalar.getValueType();
18981
18982       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
18983         return SDValue();
18984
18985       // Bail out if the vector size is not a multiple of the scalar size.
18986       if (VT.getSizeInBits() % SclTy.getSizeInBits())
18987         return SDValue();
18988
18989       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
18990       if (VNTNumElms < 2)
18991         return SDValue();
18992
18993       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
18994       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
18995         return SDValue();
18996
18997       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
18998       return DAG.getBitcast(VT, Res);
18999     }
19000   }
19001
19002   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19003   // We have already tested above for an UNDEF only concatenation.
19004   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19005   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19006   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19007     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19008   };
19009   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19010     SmallVector<SDValue, 8> Opnds;
19011     EVT SVT = VT.getScalarType();
19012
19013     EVT MinVT = SVT;
19014     if (!SVT.isFloatingPoint()) {
19015       // If BUILD_VECTOR are from built from integer, they may have different
19016       // operand types. Get the smallest type and truncate all operands to it.
19017       bool FoundMinVT = false;
19018       for (const SDValue &Op : N->ops())
19019         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19020           EVT OpSVT = Op.getOperand(0).getValueType();
19021           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19022           FoundMinVT = true;
19023         }
19024       assert(FoundMinVT && "Concat vector type mismatch");
19025     }
19026
19027     for (const SDValue &Op : N->ops()) {
19028       EVT OpVT = Op.getValueType();
19029       unsigned NumElts = OpVT.getVectorNumElements();
19030
19031       if (ISD::UNDEF == Op.getOpcode())
19032         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19033
19034       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19035         if (SVT.isFloatingPoint()) {
19036           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
19037           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19038         } else {
19039           for (unsigned i = 0; i != NumElts; ++i)
19040             Opnds.push_back(
19041                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19042         }
19043       }
19044     }
19045
19046     assert(VT.getVectorNumElements() == Opnds.size() &&
19047            "Concat vector type mismatch");
19048     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19049   }
19050
19051   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19052   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19053     return V;
19054
19055   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19056   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19057     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19058       return V;
19059
19060   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19061     return V;
19062
19063   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19064   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19065   // operands and look for a CONCAT operations that place the incoming vectors
19066   // at the exact same location.
19067   //
19068   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19069   SDValue SingleSource = SDValue();
19070   unsigned PartNumElem =
19071       N->getOperand(0).getValueType().getVectorMinNumElements();
19072
19073   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19074     SDValue Op = N->getOperand(i);
19075
19076     if (Op.isUndef())
19077       continue;
19078
19079     // Check if this is the identity extract:
19080     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19081       return SDValue();
19082
19083     // Find the single incoming vector for the extract_subvector.
19084     if (SingleSource.getNode()) {
19085       if (Op.getOperand(0) != SingleSource)
19086         return SDValue();
19087     } else {
19088       SingleSource = Op.getOperand(0);
19089
19090       // Check the source type is the same as the type of the result.
19091       // If not, this concat may extend the vector, so we can not
19092       // optimize it away.
19093       if (SingleSource.getValueType() != N->getValueType(0))
19094         return SDValue();
19095     }
19096
19097     // Check that we are reading from the identity index.
19098     unsigned IdentityIndex = i * PartNumElem;
19099     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19100       return SDValue();
19101   }
19102
19103   if (SingleSource.getNode())
19104     return SingleSource;
19105
19106   return SDValue();
19107 }
19108
19109 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19110 // if the subvector can be sourced for free.
19111 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19112   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19113       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19114     return V.getOperand(1);
19115   }
19116   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19117   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19118       V.getOperand(0).getValueType() == SubVT &&
19119       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
19120     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
19121     return V.getOperand(SubIdx);
19122   }
19123   return SDValue();
19124 }
19125
19126 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19127                                               SelectionDAG &DAG) {
19128   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19129   SDValue BinOp = Extract->getOperand(0);
19130   unsigned BinOpcode = BinOp.getOpcode();
19131   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19132     return SDValue();
19133
19134   EVT VecVT = BinOp.getValueType();
19135   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19136   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19137     return SDValue();
19138
19139   SDValue Index = Extract->getOperand(1);
19140   EVT SubVT = Extract->getValueType(0);
19141   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
19142     return SDValue();
19143
19144   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19145   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19146
19147   // TODO: We could handle the case where only 1 operand is being inserted by
19148   //       creating an extract of the other operand, but that requires checking
19149   //       number of uses and/or costs.
19150   if (!Sub0 || !Sub1)
19151     return SDValue();
19152
19153   // We are inserting both operands of the wide binop only to extract back
19154   // to the narrow vector size. Eliminate all of the insert/extract:
19155   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19156   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19157                      BinOp->getFlags());
19158 }
19159
19160 /// If we are extracting a subvector produced by a wide binary operator try
19161 /// to use a narrow binary operator and/or avoid concatenation and extraction.
19162 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
19163   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19164   // some of these bailouts with other transforms.
19165
19166   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
19167     return V;
19168
19169   // The extract index must be a constant, so we can map it to a concat operand.
19170   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19171   if (!ExtractIndexC)
19172     return SDValue();
19173
19174   // We are looking for an optionally bitcasted wide vector binary operator
19175   // feeding an extract subvector.
19176   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19177   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19178   unsigned BOpcode = BinOp.getOpcode();
19179   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
19180     return SDValue();
19181
19182   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19183   // reduced to the unary fneg when it is visited, and we probably want to deal
19184   // with fneg in a target-specific way.
19185   if (BOpcode == ISD::FSUB) {
19186     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
19187     if (C && C->getValueAPF().isNegZero())
19188       return SDValue();
19189   }
19190
19191   // The binop must be a vector type, so we can extract some fraction of it.
19192   EVT WideBVT = BinOp.getValueType();
19193   // The optimisations below currently assume we are dealing with fixed length
19194   // vectors. It is possible to add support for scalable vectors, but at the
19195   // moment we've done no analysis to prove whether they are profitable or not.
19196   if (!WideBVT.isFixedLengthVector())
19197     return SDValue();
19198
19199   EVT VT = Extract->getValueType(0);
19200   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19201   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
19202          "Extract index is not a multiple of the vector length.");
19203
19204   // Bail out if this is not a proper multiple width extraction.
19205   unsigned WideWidth = WideBVT.getSizeInBits();
19206   unsigned NarrowWidth = VT.getSizeInBits();
19207   if (WideWidth % NarrowWidth != 0)
19208     return SDValue();
19209
19210   // Bail out if we are extracting a fraction of a single operation. This can
19211   // occur because we potentially looked through a bitcast of the binop.
19212   unsigned NarrowingRatio = WideWidth / NarrowWidth;
19213   unsigned WideNumElts = WideBVT.getVectorNumElements();
19214   if (WideNumElts % NarrowingRatio != 0)
19215     return SDValue();
19216
19217   // Bail out if the target does not support a narrower version of the binop.
19218   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19219                                    WideNumElts / NarrowingRatio);
19220   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19221     return SDValue();
19222
19223   // If extraction is cheap, we don't need to look at the binop operands
19224   // for concat ops. The narrow binop alone makes this transform profitable.
19225   // We can't just reuse the original extract index operand because we may have
19226   // bitcasted.
19227   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19228   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19229   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19230       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19231     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19232     SDLoc DL(Extract);
19233     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19234     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19235                             BinOp.getOperand(0), NewExtIndex);
19236     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19237                             BinOp.getOperand(1), NewExtIndex);
19238     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19239                                       BinOp.getNode()->getFlags());
19240     return DAG.getBitcast(VT, NarrowBinOp);
19241   }
19242
19243   // Only handle the case where we are doubling and then halving. A larger ratio
19244   // may require more than two narrow binops to replace the wide binop.
19245   if (NarrowingRatio != 2)
19246     return SDValue();
19247
19248   // TODO: The motivating case for this transform is an x86 AVX1 target. That
19249   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
19250   // flavors, but no other 256-bit integer support. This could be extended to
19251   // handle any binop, but that may require fixing/adding other folds to avoid
19252   // codegen regressions.
19253   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
19254     return SDValue();
19255
19256   // We need at least one concatenation operation of a binop operand to make
19257   // this transform worthwhile. The concat must double the input vector sizes.
19258   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
19259     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
19260       return V.getOperand(ConcatOpNum);
19261     return SDValue();
19262   };
19263   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
19264   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
19265
19266   if (SubVecL || SubVecR) {
19267     // If a binop operand was not the result of a concat, we must extract a
19268     // half-sized operand for our new narrow binop:
19269     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
19270     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
19271     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
19272     SDLoc DL(Extract);
19273     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19274     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
19275                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19276                                       BinOp.getOperand(0), IndexC);
19277
19278     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
19279                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19280                                       BinOp.getOperand(1), IndexC);
19281
19282     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
19283     return DAG.getBitcast(VT, NarrowBinOp);
19284   }
19285
19286   return SDValue();
19287 }
19288
19289 /// If we are extracting a subvector from a wide vector load, convert to a
19290 /// narrow load to eliminate the extraction:
19291 /// (extract_subvector (load wide vector)) --> (load narrow vector)
19292 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
19293   // TODO: Add support for big-endian. The offset calculation must be adjusted.
19294   if (DAG.getDataLayout().isBigEndian())
19295     return SDValue();
19296
19297   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
19298   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19299   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
19300       !ExtIdx)
19301     return SDValue();
19302
19303   // Allow targets to opt-out.
19304   EVT VT = Extract->getValueType(0);
19305
19306   // We can only create byte sized loads.
19307   if (!VT.isByteSized())
19308     return SDValue();
19309
19310   unsigned Index = ExtIdx->getZExtValue();
19311   unsigned NumElts = VT.getVectorNumElements();
19312
19313   // If the index is a multiple of the extract element count, we can offset the
19314   // address by the store size multiplied by the subvector index. Otherwise if
19315   // the scalar type is byte sized, we can just use the index multiplied by
19316   // the element size in bytes as the offset.
19317   unsigned Offset;
19318   if (Index % NumElts == 0)
19319     Offset = (Index / NumElts) * VT.getStoreSize();
19320   else if (VT.getScalarType().isByteSized())
19321     Offset = Index * VT.getScalarType().getStoreSize();
19322   else
19323     return SDValue();
19324
19325   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19326   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
19327     return SDValue();
19328
19329   // The narrow load will be offset from the base address of the old load if
19330   // we are extracting from something besides index 0 (little-endian).
19331   SDLoc DL(Extract);
19332   SDValue BaseAddr = Ld->getBasePtr();
19333
19334   // TODO: Use "BaseIndexOffset" to make this more effective.
19335   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
19336   MachineFunction &MF = DAG.getMachineFunction();
19337   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
19338                                                    VT.getStoreSize());
19339   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
19340   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
19341   return NewLd;
19342 }
19343
19344 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
19345   EVT NVT = N->getValueType(0);
19346   SDValue V = N->getOperand(0);
19347   uint64_t ExtIdx = N->getConstantOperandVal(1);
19348
19349   // Extract from UNDEF is UNDEF.
19350   if (V.isUndef())
19351     return DAG.getUNDEF(NVT);
19352
19353   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
19354     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
19355       return NarrowLoad;
19356
19357   // Combine an extract of an extract into a single extract_subvector.
19358   // ext (ext X, C), 0 --> ext X, C
19359   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
19360     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
19361                                     V.getConstantOperandVal(1)) &&
19362         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
19363       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
19364                          V.getOperand(1));
19365     }
19366   }
19367
19368   // Try to move vector bitcast after extract_subv by scaling extraction index:
19369   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
19370   if (V.getOpcode() == ISD::BITCAST &&
19371       V.getOperand(0).getValueType().isVector()) {
19372     SDValue SrcOp = V.getOperand(0);
19373     EVT SrcVT = SrcOp.getValueType();
19374     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
19375     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
19376     if ((SrcNumElts % DestNumElts) == 0) {
19377       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
19378       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
19379       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
19380                                       NewExtEC);
19381       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19382         SDLoc DL(N);
19383         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
19384         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19385                                          V.getOperand(0), NewIndex);
19386         return DAG.getBitcast(NVT, NewExtract);
19387       }
19388     }
19389     if ((DestNumElts % SrcNumElts) == 0) {
19390       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
19391       if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
19392         ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
19393         EVT ScalarVT = SrcVT.getScalarType();
19394         if ((ExtIdx % DestSrcRatio) == 0) {
19395           SDLoc DL(N);
19396           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
19397           EVT NewExtVT =
19398               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
19399           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19400             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19401             SDValue NewExtract =
19402                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19403                             V.getOperand(0), NewIndex);
19404             return DAG.getBitcast(NVT, NewExtract);
19405           }
19406           if (NewExtEC == 1 &&
19407               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
19408             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19409             SDValue NewExtract =
19410                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
19411                             V.getOperand(0), NewIndex);
19412             return DAG.getBitcast(NVT, NewExtract);
19413           }
19414         }
19415       }
19416     }
19417   }
19418
19419   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
19420     unsigned ExtNumElts = NVT.getVectorMinNumElements();
19421     EVT ConcatSrcVT = V.getOperand(0).getValueType();
19422     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
19423            "Concat and extract subvector do not change element type");
19424     assert((ExtIdx % ExtNumElts) == 0 &&
19425            "Extract index is not a multiple of the input vector length.");
19426
19427     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
19428     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
19429
19430     // If the concatenated source types match this extract, it's a direct
19431     // simplification:
19432     // extract_subvec (concat V1, V2, ...), i --> Vi
19433     if (ConcatSrcNumElts == ExtNumElts)
19434       return V.getOperand(ConcatOpIdx);
19435
19436     // If the concatenated source vectors are a multiple length of this extract,
19437     // then extract a fraction of one of those source vectors directly from a
19438     // concat operand. Example:
19439     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
19440     //   v2i8 extract_subvec v8i8 Y, 6
19441     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
19442       SDLoc DL(N);
19443       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
19444       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
19445              "Trying to extract from >1 concat operand?");
19446       assert(NewExtIdx % ExtNumElts == 0 &&
19447              "Extract index is not a multiple of the input vector length.");
19448       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
19449       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
19450                          V.getOperand(ConcatOpIdx), NewIndexC);
19451     }
19452   }
19453
19454   V = peekThroughBitcasts(V);
19455
19456   // If the input is a build vector. Try to make a smaller build vector.
19457   if (V.getOpcode() == ISD::BUILD_VECTOR) {
19458     EVT InVT = V.getValueType();
19459     unsigned ExtractSize = NVT.getSizeInBits();
19460     unsigned EltSize = InVT.getScalarSizeInBits();
19461     // Only do this if we won't split any elements.
19462     if (ExtractSize % EltSize == 0) {
19463       unsigned NumElems = ExtractSize / EltSize;
19464       EVT EltVT = InVT.getVectorElementType();
19465       EVT ExtractVT =
19466           NumElems == 1 ? EltVT
19467                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
19468       if ((Level < AfterLegalizeDAG ||
19469            (NumElems == 1 ||
19470             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
19471           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
19472         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
19473
19474         if (NumElems == 1) {
19475           SDValue Src = V->getOperand(IdxVal);
19476           if (EltVT != Src.getValueType())
19477             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
19478           return DAG.getBitcast(NVT, Src);
19479         }
19480
19481         // Extract the pieces from the original build_vector.
19482         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
19483                                               V->ops().slice(IdxVal, NumElems));
19484         return DAG.getBitcast(NVT, BuildVec);
19485       }
19486     }
19487   }
19488
19489   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
19490     // Handle only simple case where vector being inserted and vector
19491     // being extracted are of same size.
19492     EVT SmallVT = V.getOperand(1).getValueType();
19493     if (!NVT.bitsEq(SmallVT))
19494       return SDValue();
19495
19496     // Combine:
19497     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
19498     // Into:
19499     //    indices are equal or bit offsets are equal => V1
19500     //    otherwise => (extract_subvec V1, ExtIdx)
19501     uint64_t InsIdx = V.getConstantOperandVal(2);
19502     if (InsIdx * SmallVT.getScalarSizeInBits() ==
19503         ExtIdx * NVT.getScalarSizeInBits())
19504       return DAG.getBitcast(NVT, V.getOperand(1));
19505     return DAG.getNode(
19506         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
19507         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
19508         N->getOperand(1));
19509   }
19510
19511   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
19512     return NarrowBOp;
19513
19514   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19515     return SDValue(N, 0);
19516
19517   return SDValue();
19518 }
19519
19520 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
19521 /// followed by concatenation. Narrow vector ops may have better performance
19522 /// than wide ops, and this can unlock further narrowing of other vector ops.
19523 /// Targets can invert this transform later if it is not profitable.
19524 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
19525                                          SelectionDAG &DAG) {
19526   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
19527   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
19528       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
19529       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
19530     return SDValue();
19531
19532   // Split the wide shuffle mask into halves. Any mask element that is accessing
19533   // operand 1 is offset down to account for narrowing of the vectors.
19534   ArrayRef<int> Mask = Shuf->getMask();
19535   EVT VT = Shuf->getValueType(0);
19536   unsigned NumElts = VT.getVectorNumElements();
19537   unsigned HalfNumElts = NumElts / 2;
19538   SmallVector<int, 16> Mask0(HalfNumElts, -1);
19539   SmallVector<int, 16> Mask1(HalfNumElts, -1);
19540   for (unsigned i = 0; i != NumElts; ++i) {
19541     if (Mask[i] == -1)
19542       continue;
19543     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
19544     if (i < HalfNumElts)
19545       Mask0[i] = M;
19546     else
19547       Mask1[i - HalfNumElts] = M;
19548   }
19549
19550   // Ask the target if this is a valid transform.
19551   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19552   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
19553                                 HalfNumElts);
19554   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
19555       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
19556     return SDValue();
19557
19558   // shuffle (concat X, undef), (concat Y, undef), Mask -->
19559   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
19560   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
19561   SDLoc DL(Shuf);
19562   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
19563   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
19564   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
19565 }
19566
19567 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
19568 // or turn a shuffle of a single concat into simpler shuffle then concat.
19569 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
19570   EVT VT = N->getValueType(0);
19571   unsigned NumElts = VT.getVectorNumElements();
19572
19573   SDValue N0 = N->getOperand(0);
19574   SDValue N1 = N->getOperand(1);
19575   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
19576   ArrayRef<int> Mask = SVN->getMask();
19577
19578   SmallVector<SDValue, 4> Ops;
19579   EVT ConcatVT = N0.getOperand(0).getValueType();
19580   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
19581   unsigned NumConcats = NumElts / NumElemsPerConcat;
19582
19583   auto IsUndefMaskElt = [](int i) { return i == -1; };
19584
19585   // Special case: shuffle(concat(A,B)) can be more efficiently represented
19586   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
19587   // half vector elements.
19588   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
19589       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
19590                    IsUndefMaskElt)) {
19591     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
19592                               N0.getOperand(1),
19593                               Mask.slice(0, NumElemsPerConcat));
19594     N1 = DAG.getUNDEF(ConcatVT);
19595     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
19596   }
19597
19598   // Look at every vector that's inserted. We're looking for exact
19599   // subvector-sized copies from a concatenated vector
19600   for (unsigned I = 0; I != NumConcats; ++I) {
19601     unsigned Begin = I * NumElemsPerConcat;
19602     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
19603
19604     // Make sure we're dealing with a copy.
19605     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
19606       Ops.push_back(DAG.getUNDEF(ConcatVT));
19607       continue;
19608     }
19609
19610     int OpIdx = -1;
19611     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
19612       if (IsUndefMaskElt(SubMask[i]))
19613         continue;
19614       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
19615         return SDValue();
19616       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
19617       if (0 <= OpIdx && EltOpIdx != OpIdx)
19618         return SDValue();
19619       OpIdx = EltOpIdx;
19620     }
19621     assert(0 <= OpIdx && "Unknown concat_vectors op");
19622
19623     if (OpIdx < (int)N0.getNumOperands())
19624       Ops.push_back(N0.getOperand(OpIdx));
19625     else
19626       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
19627   }
19628
19629   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19630 }
19631
19632 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19633 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19634 //
19635 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
19636 // a simplification in some sense, but it isn't appropriate in general: some
19637 // BUILD_VECTORs are substantially cheaper than others. The general case
19638 // of a BUILD_VECTOR requires inserting each element individually (or
19639 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
19640 // all constants is a single constant pool load.  A BUILD_VECTOR where each
19641 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
19642 // are undef lowers to a small number of element insertions.
19643 //
19644 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
19645 // We don't fold shuffles where one side is a non-zero constant, and we don't
19646 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
19647 // non-constant operands. This seems to work out reasonably well in practice.
19648 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
19649                                        SelectionDAG &DAG,
19650                                        const TargetLowering &TLI) {
19651   EVT VT = SVN->getValueType(0);
19652   unsigned NumElts = VT.getVectorNumElements();
19653   SDValue N0 = SVN->getOperand(0);
19654   SDValue N1 = SVN->getOperand(1);
19655
19656   if (!N0->hasOneUse())
19657     return SDValue();
19658
19659   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
19660   // discussed above.
19661   if (!N1.isUndef()) {
19662     if (!N1->hasOneUse())
19663       return SDValue();
19664
19665     bool N0AnyConst = isAnyConstantBuildVector(N0);
19666     bool N1AnyConst = isAnyConstantBuildVector(N1);
19667     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
19668       return SDValue();
19669     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
19670       return SDValue();
19671   }
19672
19673   // If both inputs are splats of the same value then we can safely merge this
19674   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
19675   bool IsSplat = false;
19676   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
19677   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
19678   if (BV0 && BV1)
19679     if (SDValue Splat0 = BV0->getSplatValue())
19680       IsSplat = (Splat0 == BV1->getSplatValue());
19681
19682   SmallVector<SDValue, 8> Ops;
19683   SmallSet<SDValue, 16> DuplicateOps;
19684   for (int M : SVN->getMask()) {
19685     SDValue Op = DAG.getUNDEF(VT.getScalarType());
19686     if (M >= 0) {
19687       int Idx = M < (int)NumElts ? M : M - NumElts;
19688       SDValue &S = (M < (int)NumElts ? N0 : N1);
19689       if (S.getOpcode() == ISD::BUILD_VECTOR) {
19690         Op = S.getOperand(Idx);
19691       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19692         SDValue Op0 = S.getOperand(0);
19693         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
19694       } else {
19695         // Operand can't be combined - bail out.
19696         return SDValue();
19697       }
19698     }
19699
19700     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
19701     // generating a splat; semantically, this is fine, but it's likely to
19702     // generate low-quality code if the target can't reconstruct an appropriate
19703     // shuffle.
19704     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
19705       if (!IsSplat && !DuplicateOps.insert(Op).second)
19706         return SDValue();
19707
19708     Ops.push_back(Op);
19709   }
19710
19711   // BUILD_VECTOR requires all inputs to be of the same type, find the
19712   // maximum type and extend them all.
19713   EVT SVT = VT.getScalarType();
19714   if (SVT.isInteger())
19715     for (SDValue &Op : Ops)
19716       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
19717   if (SVT != VT.getScalarType())
19718     for (SDValue &Op : Ops)
19719       Op = TLI.isZExtFree(Op.getValueType(), SVT)
19720                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
19721                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
19722   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
19723 }
19724
19725 // Match shuffles that can be converted to any_vector_extend_in_reg.
19726 // This is often generated during legalization.
19727 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
19728 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
19729 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
19730                                             SelectionDAG &DAG,
19731                                             const TargetLowering &TLI,
19732                                             bool LegalOperations) {
19733   EVT VT = SVN->getValueType(0);
19734   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19735
19736   // TODO Add support for big-endian when we have a test case.
19737   if (!VT.isInteger() || IsBigEndian)
19738     return SDValue();
19739
19740   unsigned NumElts = VT.getVectorNumElements();
19741   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19742   ArrayRef<int> Mask = SVN->getMask();
19743   SDValue N0 = SVN->getOperand(0);
19744
19745   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
19746   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
19747     for (unsigned i = 0; i != NumElts; ++i) {
19748       if (Mask[i] < 0)
19749         continue;
19750       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
19751         continue;
19752       return false;
19753     }
19754     return true;
19755   };
19756
19757   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
19758   // power-of-2 extensions as they are the most likely.
19759   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
19760     // Check for non power of 2 vector sizes
19761     if (NumElts % Scale != 0)
19762       continue;
19763     if (!isAnyExtend(Scale))
19764       continue;
19765
19766     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
19767     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
19768     // Never create an illegal type. Only create unsupported operations if we
19769     // are pre-legalization.
19770     if (TLI.isTypeLegal(OutVT))
19771       if (!LegalOperations ||
19772           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
19773         return DAG.getBitcast(VT,
19774                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
19775                                           SDLoc(SVN), OutVT, N0));
19776   }
19777
19778   return SDValue();
19779 }
19780
19781 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
19782 // each source element of a large type into the lowest elements of a smaller
19783 // destination type. This is often generated during legalization.
19784 // If the source node itself was a '*_extend_vector_inreg' node then we should
19785 // then be able to remove it.
19786 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
19787                                         SelectionDAG &DAG) {
19788   EVT VT = SVN->getValueType(0);
19789   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19790
19791   // TODO Add support for big-endian when we have a test case.
19792   if (!VT.isInteger() || IsBigEndian)
19793     return SDValue();
19794
19795   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
19796
19797   unsigned Opcode = N0.getOpcode();
19798   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
19799       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
19800       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
19801     return SDValue();
19802
19803   SDValue N00 = N0.getOperand(0);
19804   ArrayRef<int> Mask = SVN->getMask();
19805   unsigned NumElts = VT.getVectorNumElements();
19806   unsigned EltSizeInBits = VT.getScalarSizeInBits();
19807   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
19808   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
19809
19810   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
19811     return SDValue();
19812   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
19813
19814   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
19815   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
19816   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
19817   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
19818     for (unsigned i = 0; i != NumElts; ++i) {
19819       if (Mask[i] < 0)
19820         continue;
19821       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
19822         continue;
19823       return false;
19824     }
19825     return true;
19826   };
19827
19828   // At the moment we just handle the case where we've truncated back to the
19829   // same size as before the extension.
19830   // TODO: handle more extension/truncation cases as cases arise.
19831   if (EltSizeInBits != ExtSrcSizeInBits)
19832     return SDValue();
19833
19834   // We can remove *extend_vector_inreg only if the truncation happens at
19835   // the same scale as the extension.
19836   if (isTruncate(ExtScale))
19837     return DAG.getBitcast(VT, N00);
19838
19839   return SDValue();
19840 }
19841
19842 // Combine shuffles of splat-shuffles of the form:
19843 // shuffle (shuffle V, undef, splat-mask), undef, M
19844 // If splat-mask contains undef elements, we need to be careful about
19845 // introducing undef's in the folded mask which are not the result of composing
19846 // the masks of the shuffles.
19847 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
19848                                         SelectionDAG &DAG) {
19849   if (!Shuf->getOperand(1).isUndef())
19850     return SDValue();
19851   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19852   if (!Splat || !Splat->isSplat())
19853     return SDValue();
19854
19855   ArrayRef<int> ShufMask = Shuf->getMask();
19856   ArrayRef<int> SplatMask = Splat->getMask();
19857   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
19858
19859   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
19860   // every undef mask element in the splat-shuffle has a corresponding undef
19861   // element in the user-shuffle's mask or if the composition of mask elements
19862   // would result in undef.
19863   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
19864   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
19865   //   In this case it is not legal to simplify to the splat-shuffle because we
19866   //   may be exposing the users of the shuffle an undef element at index 1
19867   //   which was not there before the combine.
19868   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
19869   //   In this case the composition of masks yields SplatMask, so it's ok to
19870   //   simplify to the splat-shuffle.
19871   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
19872   //   In this case the composed mask includes all undef elements of SplatMask
19873   //   and in addition sets element zero to undef. It is safe to simplify to
19874   //   the splat-shuffle.
19875   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
19876                                        ArrayRef<int> SplatMask) {
19877     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
19878       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
19879           SplatMask[UserMask[i]] != -1)
19880         return false;
19881     return true;
19882   };
19883   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
19884     return Shuf->getOperand(0);
19885
19886   // Create a new shuffle with a mask that is composed of the two shuffles'
19887   // masks.
19888   SmallVector<int, 32> NewMask;
19889   for (int Idx : ShufMask)
19890     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
19891
19892   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
19893                               Splat->getOperand(0), Splat->getOperand(1),
19894                               NewMask);
19895 }
19896
19897 /// Combine shuffle of shuffle of the form:
19898 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
19899 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
19900                                      SelectionDAG &DAG) {
19901   if (!OuterShuf->getOperand(1).isUndef())
19902     return SDValue();
19903   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
19904   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
19905     return SDValue();
19906
19907   ArrayRef<int> OuterMask = OuterShuf->getMask();
19908   ArrayRef<int> InnerMask = InnerShuf->getMask();
19909   unsigned NumElts = OuterMask.size();
19910   assert(NumElts == InnerMask.size() && "Mask length mismatch");
19911   SmallVector<int, 32> CombinedMask(NumElts, -1);
19912   int SplatIndex = -1;
19913   for (unsigned i = 0; i != NumElts; ++i) {
19914     // Undef lanes remain undef.
19915     int OuterMaskElt = OuterMask[i];
19916     if (OuterMaskElt == -1)
19917       continue;
19918
19919     // Peek through the shuffle masks to get the underlying source element.
19920     int InnerMaskElt = InnerMask[OuterMaskElt];
19921     if (InnerMaskElt == -1)
19922       continue;
19923
19924     // Initialize the splatted element.
19925     if (SplatIndex == -1)
19926       SplatIndex = InnerMaskElt;
19927
19928     // Non-matching index - this is not a splat.
19929     if (SplatIndex != InnerMaskElt)
19930       return SDValue();
19931
19932     CombinedMask[i] = InnerMaskElt;
19933   }
19934   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
19935           getSplatIndex(CombinedMask) != -1) &&
19936          "Expected a splat mask");
19937
19938   // TODO: The transform may be a win even if the mask is not legal.
19939   EVT VT = OuterShuf->getValueType(0);
19940   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
19941   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
19942     return SDValue();
19943
19944   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
19945                               InnerShuf->getOperand(1), CombinedMask);
19946 }
19947
19948 /// If the shuffle mask is taking exactly one element from the first vector
19949 /// operand and passing through all other elements from the second vector
19950 /// operand, return the index of the mask element that is choosing an element
19951 /// from the first operand. Otherwise, return -1.
19952 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
19953   int MaskSize = Mask.size();
19954   int EltFromOp0 = -1;
19955   // TODO: This does not match if there are undef elements in the shuffle mask.
19956   // Should we ignore undefs in the shuffle mask instead? The trade-off is
19957   // removing an instruction (a shuffle), but losing the knowledge that some
19958   // vector lanes are not needed.
19959   for (int i = 0; i != MaskSize; ++i) {
19960     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
19961       // We're looking for a shuffle of exactly one element from operand 0.
19962       if (EltFromOp0 != -1)
19963         return -1;
19964       EltFromOp0 = i;
19965     } else if (Mask[i] != i + MaskSize) {
19966       // Nothing from operand 1 can change lanes.
19967       return -1;
19968     }
19969   }
19970   return EltFromOp0;
19971 }
19972
19973 /// If a shuffle inserts exactly one element from a source vector operand into
19974 /// another vector operand and we can access the specified element as a scalar,
19975 /// then we can eliminate the shuffle.
19976 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
19977                                       SelectionDAG &DAG) {
19978   // First, check if we are taking one element of a vector and shuffling that
19979   // element into another vector.
19980   ArrayRef<int> Mask = Shuf->getMask();
19981   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
19982   SDValue Op0 = Shuf->getOperand(0);
19983   SDValue Op1 = Shuf->getOperand(1);
19984   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
19985   if (ShufOp0Index == -1) {
19986     // Commute mask and check again.
19987     ShuffleVectorSDNode::commuteMask(CommutedMask);
19988     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
19989     if (ShufOp0Index == -1)
19990       return SDValue();
19991     // Commute operands to match the commuted shuffle mask.
19992     std::swap(Op0, Op1);
19993     Mask = CommutedMask;
19994   }
19995
19996   // The shuffle inserts exactly one element from operand 0 into operand 1.
19997   // Now see if we can access that element as a scalar via a real insert element
19998   // instruction.
19999   // TODO: We can try harder to locate the element as a scalar. Examples: it
20000   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
20001   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
20002          "Shuffle mask value must be from operand 0");
20003   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20004     return SDValue();
20005
20006   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20007   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20008     return SDValue();
20009
20010   // There's an existing insertelement with constant insertion index, so we
20011   // don't need to check the legality/profitability of a replacement operation
20012   // that differs at most in the constant value. The target should be able to
20013   // lower any of those in a similar way. If not, legalization will expand this
20014   // to a scalar-to-vector plus shuffle.
20015   //
20016   // Note that the shuffle may move the scalar from the position that the insert
20017   // element used. Therefore, our new insert element occurs at the shuffle's
20018   // mask index value, not the insert's index value.
20019   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20020   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20021   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20022                      Op1, Op0.getOperand(1), NewInsIndex);
20023 }
20024
20025 /// If we have a unary shuffle of a shuffle, see if it can be folded away
20026 /// completely. This has the potential to lose undef knowledge because the first
20027 /// shuffle may not have an undef mask element where the second one does. So
20028 /// only call this after doing simplifications based on demanded elements.
20029 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20030   // shuf (shuf0 X, Y, Mask0), undef, Mask
20031   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20032   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20033     return SDValue();
20034
20035   ArrayRef<int> Mask = Shuf->getMask();
20036   ArrayRef<int> Mask0 = Shuf0->getMask();
20037   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20038     // Ignore undef elements.
20039     if (Mask[i] == -1)
20040       continue;
20041     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
20042
20043     // Is the element of the shuffle operand chosen by this shuffle the same as
20044     // the element chosen by the shuffle operand itself?
20045     if (Mask0[Mask[i]] != Mask0[i])
20046       return SDValue();
20047   }
20048   // Every element of this shuffle is identical to the result of the previous
20049   // shuffle, so we can replace this value.
20050   return Shuf->getOperand(0);
20051 }
20052
20053 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20054   EVT VT = N->getValueType(0);
20055   unsigned NumElts = VT.getVectorNumElements();
20056
20057   SDValue N0 = N->getOperand(0);
20058   SDValue N1 = N->getOperand(1);
20059
20060   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
20061
20062   // Canonicalize shuffle undef, undef -> undef
20063   if (N0.isUndef() && N1.isUndef())
20064     return DAG.getUNDEF(VT);
20065
20066   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20067
20068   // Canonicalize shuffle v, v -> v, undef
20069   if (N0 == N1) {
20070     SmallVector<int, 8> NewMask;
20071     for (unsigned i = 0; i != NumElts; ++i) {
20072       int Idx = SVN->getMaskElt(i);
20073       if (Idx >= (int)NumElts) Idx -= NumElts;
20074       NewMask.push_back(Idx);
20075     }
20076     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20077   }
20078
20079   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
20080   if (N0.isUndef())
20081     return DAG.getCommutedVectorShuffle(*SVN);
20082
20083   // Remove references to rhs if it is undef
20084   if (N1.isUndef()) {
20085     bool Changed = false;
20086     SmallVector<int, 8> NewMask;
20087     for (unsigned i = 0; i != NumElts; ++i) {
20088       int Idx = SVN->getMaskElt(i);
20089       if (Idx >= (int)NumElts) {
20090         Idx = -1;
20091         Changed = true;
20092       }
20093       NewMask.push_back(Idx);
20094     }
20095     if (Changed)
20096       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20097   }
20098
20099   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20100     return InsElt;
20101
20102   // A shuffle of a single vector that is a splatted value can always be folded.
20103   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20104     return V;
20105
20106   if (SDValue V = formSplatFromShuffles(SVN, DAG))
20107     return V;
20108
20109   // If it is a splat, check if the argument vector is another splat or a
20110   // build_vector.
20111   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20112     int SplatIndex = SVN->getSplatIndex();
20113     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20114         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20115       // splat (vector_bo L, R), Index -->
20116       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20117       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20118       SDLoc DL(N);
20119       EVT EltVT = VT.getScalarType();
20120       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20121       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20122       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20123       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20124                                   N0.getNode()->getFlags());
20125       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20126       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20127       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20128     }
20129
20130     // If this is a bit convert that changes the element type of the vector but
20131     // not the number of vector elements, look through it.  Be careful not to
20132     // look though conversions that change things like v4f32 to v2f64.
20133     SDNode *V = N0.getNode();
20134     if (V->getOpcode() == ISD::BITCAST) {
20135       SDValue ConvInput = V->getOperand(0);
20136       if (ConvInput.getValueType().isVector() &&
20137           ConvInput.getValueType().getVectorNumElements() == NumElts)
20138         V = ConvInput.getNode();
20139     }
20140
20141     if (V->getOpcode() == ISD::BUILD_VECTOR) {
20142       assert(V->getNumOperands() == NumElts &&
20143              "BUILD_VECTOR has wrong number of operands");
20144       SDValue Base;
20145       bool AllSame = true;
20146       for (unsigned i = 0; i != NumElts; ++i) {
20147         if (!V->getOperand(i).isUndef()) {
20148           Base = V->getOperand(i);
20149           break;
20150         }
20151       }
20152       // Splat of <u, u, u, u>, return <u, u, u, u>
20153       if (!Base.getNode())
20154         return N0;
20155       for (unsigned i = 0; i != NumElts; ++i) {
20156         if (V->getOperand(i) != Base) {
20157           AllSame = false;
20158           break;
20159         }
20160       }
20161       // Splat of <x, x, x, x>, return <x, x, x, x>
20162       if (AllSame)
20163         return N0;
20164
20165       // Canonicalize any other splat as a build_vector.
20166       SDValue Splatted = V->getOperand(SplatIndex);
20167       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20168       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20169
20170       // We may have jumped through bitcasts, so the type of the
20171       // BUILD_VECTOR may not match the type of the shuffle.
20172       if (V->getValueType(0) != VT)
20173         NewBV = DAG.getBitcast(VT, NewBV);
20174       return NewBV;
20175     }
20176   }
20177
20178   // Simplify source operands based on shuffle mask.
20179   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20180     return SDValue(N, 0);
20181
20182   // This is intentionally placed after demanded elements simplification because
20183   // it could eliminate knowledge of undef elements created by this shuffle.
20184   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20185     return ShufOp;
20186
20187   // Match shuffles that can be converted to any_vector_extend_in_reg.
20188   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20189     return V;
20190
20191   // Combine "truncate_vector_in_reg" style shuffles.
20192   if (SDValue V = combineTruncationShuffle(SVN, DAG))
20193     return V;
20194
20195   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20196       Level < AfterLegalizeVectorOps &&
20197       (N1.isUndef() ||
20198       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
20199        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20200     if (SDValue V = partitionShuffleOfConcats(N, DAG))
20201       return V;
20202   }
20203
20204   // A shuffle of a concat of the same narrow vector can be reduced to use
20205   // only low-half elements of a concat with undef:
20206   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20207   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20208       N0.getNumOperands() == 2 &&
20209       N0.getOperand(0) == N0.getOperand(1)) {
20210     int HalfNumElts = (int)NumElts / 2;
20211     SmallVector<int, 8> NewMask;
20212     for (unsigned i = 0; i != NumElts; ++i) {
20213       int Idx = SVN->getMaskElt(i);
20214       if (Idx >= HalfNumElts) {
20215         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
20216         Idx -= HalfNumElts;
20217       }
20218       NewMask.push_back(Idx);
20219     }
20220     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20221       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20222       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20223                                    N0.getOperand(0), UndefVec);
20224       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20225     }
20226   }
20227
20228   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20229   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20230   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20231     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20232       return Res;
20233
20234   // If this shuffle only has a single input that is a bitcasted shuffle,
20235   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20236   // back to their original types.
20237   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20238       N1.isUndef() && Level < AfterLegalizeVectorOps &&
20239       TLI.isTypeLegal(VT)) {
20240
20241     SDValue BC0 = peekThroughOneUseBitcasts(N0);
20242     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
20243       EVT SVT = VT.getScalarType();
20244       EVT InnerVT = BC0->getValueType(0);
20245       EVT InnerSVT = InnerVT.getScalarType();
20246
20247       // Determine which shuffle works with the smaller scalar type.
20248       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
20249       EVT ScaleSVT = ScaleVT.getScalarType();
20250
20251       if (TLI.isTypeLegal(ScaleVT) &&
20252           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
20253           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
20254         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20255         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20256
20257         // Scale the shuffle masks to the smaller scalar type.
20258         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
20259         SmallVector<int, 8> InnerMask;
20260         SmallVector<int, 8> OuterMask;
20261         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
20262         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
20263
20264         // Merge the shuffle masks.
20265         SmallVector<int, 8> NewMask;
20266         for (int M : OuterMask)
20267           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
20268
20269         // Test for shuffle mask legality over both commutations.
20270         SDValue SV0 = BC0->getOperand(0);
20271         SDValue SV1 = BC0->getOperand(1);
20272         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20273         if (!LegalMask) {
20274           std::swap(SV0, SV1);
20275           ShuffleVectorSDNode::commuteMask(NewMask);
20276           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20277         }
20278
20279         if (LegalMask) {
20280           SV0 = DAG.getBitcast(ScaleVT, SV0);
20281           SV1 = DAG.getBitcast(ScaleVT, SV1);
20282           return DAG.getBitcast(
20283               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
20284         }
20285       }
20286     }
20287   }
20288
20289   // Canonicalize shuffles according to rules:
20290   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20291   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20292   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20293   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20294       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
20295       TLI.isTypeLegal(VT)) {
20296     // The incoming shuffle must be of the same type as the result of the
20297     // current shuffle.
20298     assert(N1->getOperand(0).getValueType() == VT &&
20299            "Shuffle types don't match");
20300
20301     SDValue SV0 = N1->getOperand(0);
20302     SDValue SV1 = N1->getOperand(1);
20303     bool HasSameOp0 = N0 == SV0;
20304     bool IsSV1Undef = SV1.isUndef();
20305     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20306       // Commute the operands of this shuffle so that next rule
20307       // will trigger.
20308       return DAG.getCommutedVectorShuffle(*SVN);
20309   }
20310
20311   // Try to fold according to rules:
20312   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20313   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20314   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20315   // Don't try to fold shuffles with illegal type.
20316   // Only fold if this shuffle is the only user of the other shuffle.
20317   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
20318       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
20319     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
20320
20321     // Don't try to fold splats; they're likely to simplify somehow, or they
20322     // might be free.
20323     if (OtherSV->isSplat())
20324       return SDValue();
20325
20326     // The incoming shuffle must be of the same type as the result of the
20327     // current shuffle.
20328     assert(OtherSV->getOperand(0).getValueType() == VT &&
20329            "Shuffle types don't match");
20330
20331     SDValue SV0, SV1;
20332     SmallVector<int, 4> Mask;
20333     // Compute the combined shuffle mask for a shuffle with SV0 as the first
20334     // operand, and SV1 as the second operand.
20335     for (unsigned i = 0; i != NumElts; ++i) {
20336       int Idx = SVN->getMaskElt(i);
20337       if (Idx < 0) {
20338         // Propagate Undef.
20339         Mask.push_back(Idx);
20340         continue;
20341       }
20342
20343       SDValue CurrentVec;
20344       if (Idx < (int)NumElts) {
20345         // This shuffle index refers to the inner shuffle N0. Lookup the inner
20346         // shuffle mask to identify which vector is actually referenced.
20347         Idx = OtherSV->getMaskElt(Idx);
20348         if (Idx < 0) {
20349           // Propagate Undef.
20350           Mask.push_back(Idx);
20351           continue;
20352         }
20353
20354         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
20355                                            : OtherSV->getOperand(1);
20356       } else {
20357         // This shuffle index references an element within N1.
20358         CurrentVec = N1;
20359       }
20360
20361       // Simple case where 'CurrentVec' is UNDEF.
20362       if (CurrentVec.isUndef()) {
20363         Mask.push_back(-1);
20364         continue;
20365       }
20366
20367       // Canonicalize the shuffle index. We don't know yet if CurrentVec
20368       // will be the first or second operand of the combined shuffle.
20369       Idx = Idx % NumElts;
20370       if (!SV0.getNode() || SV0 == CurrentVec) {
20371         // Ok. CurrentVec is the left hand side.
20372         // Update the mask accordingly.
20373         SV0 = CurrentVec;
20374         Mask.push_back(Idx);
20375         continue;
20376       }
20377
20378       // Bail out if we cannot convert the shuffle pair into a single shuffle.
20379       if (SV1.getNode() && SV1 != CurrentVec)
20380         return SDValue();
20381
20382       // Ok. CurrentVec is the right hand side.
20383       // Update the mask accordingly.
20384       SV1 = CurrentVec;
20385       Mask.push_back(Idx + NumElts);
20386     }
20387
20388     // Check if all indices in Mask are Undef. In case, propagate Undef.
20389     bool isUndefMask = true;
20390     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
20391       isUndefMask &= Mask[i] < 0;
20392
20393     if (isUndefMask)
20394       return DAG.getUNDEF(VT);
20395
20396     if (!SV0.getNode())
20397       SV0 = DAG.getUNDEF(VT);
20398     if (!SV1.getNode())
20399       SV1 = DAG.getUNDEF(VT);
20400
20401     // Avoid introducing shuffles with illegal mask.
20402     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20403     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20404     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20405     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
20406     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
20407     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
20408     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
20409   }
20410
20411   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
20412     return V;
20413
20414   return SDValue();
20415 }
20416
20417 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
20418   SDValue InVal = N->getOperand(0);
20419   EVT VT = N->getValueType(0);
20420
20421   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
20422   // with a VECTOR_SHUFFLE and possible truncate.
20423   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20424       VT.isFixedLengthVector() &&
20425       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
20426     SDValue InVec = InVal->getOperand(0);
20427     SDValue EltNo = InVal->getOperand(1);
20428     auto InVecT = InVec.getValueType();
20429     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
20430       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
20431       int Elt = C0->getZExtValue();
20432       NewMask[0] = Elt;
20433       // If we have an implict truncate do truncate here as long as it's legal.
20434       // if it's not legal, this should
20435       if (VT.getScalarType() != InVal.getValueType() &&
20436           InVal.getValueType().isScalarInteger() &&
20437           isTypeLegal(VT.getScalarType())) {
20438         SDValue Val =
20439             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
20440         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
20441       }
20442       if (VT.getScalarType() == InVecT.getScalarType() &&
20443           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
20444         SDValue LegalShuffle =
20445           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
20446                                       DAG.getUNDEF(InVecT), NewMask, DAG);
20447         if (LegalShuffle) {
20448           // If the initial vector is the correct size this shuffle is a
20449           // valid result.
20450           if (VT == InVecT)
20451             return LegalShuffle;
20452           // If not we must truncate the vector.
20453           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
20454             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
20455             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
20456                                          InVecT.getVectorElementType(),
20457                                          VT.getVectorNumElements());
20458             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
20459                                LegalShuffle, ZeroIdx);
20460           }
20461         }
20462       }
20463     }
20464   }
20465
20466   return SDValue();
20467 }
20468
20469 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
20470   EVT VT = N->getValueType(0);
20471   SDValue N0 = N->getOperand(0);
20472   SDValue N1 = N->getOperand(1);
20473   SDValue N2 = N->getOperand(2);
20474   uint64_t InsIdx = N->getConstantOperandVal(2);
20475
20476   // If inserting an UNDEF, just return the original vector.
20477   if (N1.isUndef())
20478     return N0;
20479
20480   // If this is an insert of an extracted vector into an undef vector, we can
20481   // just use the input to the extract.
20482   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20483       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
20484     return N1.getOperand(0);
20485
20486   // If we are inserting a bitcast value into an undef, with the same
20487   // number of elements, just use the bitcast input of the extract.
20488   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
20489   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
20490   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
20491       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20492       N1.getOperand(0).getOperand(1) == N2 &&
20493       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
20494           VT.getVectorNumElements() &&
20495       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
20496           VT.getSizeInBits()) {
20497     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
20498   }
20499
20500   // If both N1 and N2 are bitcast values on which insert_subvector
20501   // would makes sense, pull the bitcast through.
20502   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
20503   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
20504   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
20505     SDValue CN0 = N0.getOperand(0);
20506     SDValue CN1 = N1.getOperand(0);
20507     EVT CN0VT = CN0.getValueType();
20508     EVT CN1VT = CN1.getValueType();
20509     if (CN0VT.isVector() && CN1VT.isVector() &&
20510         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
20511         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
20512       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
20513                                       CN0.getValueType(), CN0, CN1, N2);
20514       return DAG.getBitcast(VT, NewINSERT);
20515     }
20516   }
20517
20518   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
20519   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
20520   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
20521   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
20522       N0.getOperand(1).getValueType() == N1.getValueType() &&
20523       N0.getOperand(2) == N2)
20524     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
20525                        N1, N2);
20526
20527   // Eliminate an intermediate insert into an undef vector:
20528   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
20529   // insert_subvector undef, X, N2
20530   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
20531       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
20532     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
20533                        N1.getOperand(1), N2);
20534
20535   // Push subvector bitcasts to the output, adjusting the index as we go.
20536   // insert_subvector(bitcast(v), bitcast(s), c1)
20537   // -> bitcast(insert_subvector(v, s, c2))
20538   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
20539       N1.getOpcode() == ISD::BITCAST) {
20540     SDValue N0Src = peekThroughBitcasts(N0);
20541     SDValue N1Src = peekThroughBitcasts(N1);
20542     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
20543     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
20544     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
20545         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
20546       EVT NewVT;
20547       SDLoc DL(N);
20548       SDValue NewIdx;
20549       LLVMContext &Ctx = *DAG.getContext();
20550       unsigned NumElts = VT.getVectorNumElements();
20551       unsigned EltSizeInBits = VT.getScalarSizeInBits();
20552       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
20553         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
20554         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
20555         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
20556       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
20557         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
20558         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
20559           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
20560           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
20561         }
20562       }
20563       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
20564         SDValue Res = DAG.getBitcast(NewVT, N0Src);
20565         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
20566         return DAG.getBitcast(VT, Res);
20567       }
20568     }
20569   }
20570
20571   // Canonicalize insert_subvector dag nodes.
20572   // Example:
20573   // (insert_subvector (insert_subvector A, Idx0), Idx1)
20574   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
20575   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
20576       N1.getValueType() == N0.getOperand(1).getValueType()) {
20577     unsigned OtherIdx = N0.getConstantOperandVal(2);
20578     if (InsIdx < OtherIdx) {
20579       // Swap nodes.
20580       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
20581                                   N0.getOperand(0), N1, N2);
20582       AddToWorklist(NewOp.getNode());
20583       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
20584                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
20585     }
20586   }
20587
20588   // If the input vector is a concatenation, and the insert replaces
20589   // one of the pieces, we can optimize into a single concat_vectors.
20590   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
20591       N0.getOperand(0).getValueType() == N1.getValueType()) {
20592     unsigned Factor = N1.getValueType().getVectorNumElements();
20593     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
20594     Ops[InsIdx / Factor] = N1;
20595     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20596   }
20597
20598   // Simplify source operands based on insertion.
20599   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20600     return SDValue(N, 0);
20601
20602   return SDValue();
20603 }
20604
20605 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
20606   SDValue N0 = N->getOperand(0);
20607
20608   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
20609   if (N0->getOpcode() == ISD::FP16_TO_FP)
20610     return N0->getOperand(0);
20611
20612   return SDValue();
20613 }
20614
20615 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
20616   SDValue N0 = N->getOperand(0);
20617
20618   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
20619   if (N0->getOpcode() == ISD::AND) {
20620     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
20621     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
20622       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
20623                          N0.getOperand(0));
20624     }
20625   }
20626
20627   return SDValue();
20628 }
20629
20630 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
20631   SDValue N0 = N->getOperand(0);
20632   EVT VT = N0.getValueType();
20633   unsigned Opcode = N->getOpcode();
20634
20635   // VECREDUCE over 1-element vector is just an extract.
20636   if (VT.getVectorNumElements() == 1) {
20637     SDLoc dl(N);
20638     SDValue Res =
20639         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
20640                     DAG.getVectorIdxConstant(0, dl));
20641     if (Res.getValueType() != N->getValueType(0))
20642       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
20643     return Res;
20644   }
20645
20646   // On an boolean vector an and/or reduction is the same as a umin/umax
20647   // reduction. Convert them if the latter is legal while the former isn't.
20648   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
20649     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
20650         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
20651     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
20652         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
20653         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
20654       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
20655   }
20656
20657   return SDValue();
20658 }
20659
20660 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
20661 /// with the destination vector and a zero vector.
20662 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
20663 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
20664 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
20665   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
20666
20667   EVT VT = N->getValueType(0);
20668   SDValue LHS = N->getOperand(0);
20669   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
20670   SDLoc DL(N);
20671
20672   // Make sure we're not running after operation legalization where it
20673   // may have custom lowered the vector shuffles.
20674   if (LegalOperations)
20675     return SDValue();
20676
20677   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20678     return SDValue();
20679
20680   EVT RVT = RHS.getValueType();
20681   unsigned NumElts = RHS.getNumOperands();
20682
20683   // Attempt to create a valid clear mask, splitting the mask into
20684   // sub elements and checking to see if each is
20685   // all zeros or all ones - suitable for shuffle masking.
20686   auto BuildClearMask = [&](int Split) {
20687     int NumSubElts = NumElts * Split;
20688     int NumSubBits = RVT.getScalarSizeInBits() / Split;
20689
20690     SmallVector<int, 8> Indices;
20691     for (int i = 0; i != NumSubElts; ++i) {
20692       int EltIdx = i / Split;
20693       int SubIdx = i % Split;
20694       SDValue Elt = RHS.getOperand(EltIdx);
20695       // X & undef --> 0 (not undef). So this lane must be converted to choose
20696       // from the zero constant vector (same as if the element had all 0-bits).
20697       if (Elt.isUndef()) {
20698         Indices.push_back(i + NumSubElts);
20699         continue;
20700       }
20701
20702       APInt Bits;
20703       if (isa<ConstantSDNode>(Elt))
20704         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
20705       else if (isa<ConstantFPSDNode>(Elt))
20706         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
20707       else
20708         return SDValue();
20709
20710       // Extract the sub element from the constant bit mask.
20711       if (DAG.getDataLayout().isBigEndian())
20712         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
20713       else
20714         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
20715
20716       if (Bits.isAllOnesValue())
20717         Indices.push_back(i);
20718       else if (Bits == 0)
20719         Indices.push_back(i + NumSubElts);
20720       else
20721         return SDValue();
20722     }
20723
20724     // Let's see if the target supports this vector_shuffle.
20725     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
20726     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
20727     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
20728       return SDValue();
20729
20730     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
20731     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
20732                                                    DAG.getBitcast(ClearVT, LHS),
20733                                                    Zero, Indices));
20734   };
20735
20736   // Determine maximum split level (byte level masking).
20737   int MaxSplit = 1;
20738   if (RVT.getScalarSizeInBits() % 8 == 0)
20739     MaxSplit = RVT.getScalarSizeInBits() / 8;
20740
20741   for (int Split = 1; Split <= MaxSplit; ++Split)
20742     if (RVT.getScalarSizeInBits() % Split == 0)
20743       if (SDValue S = BuildClearMask(Split))
20744         return S;
20745
20746   return SDValue();
20747 }
20748
20749 /// If a vector binop is performed on splat values, it may be profitable to
20750 /// extract, scalarize, and insert/splat.
20751 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
20752   SDValue N0 = N->getOperand(0);
20753   SDValue N1 = N->getOperand(1);
20754   unsigned Opcode = N->getOpcode();
20755   EVT VT = N->getValueType(0);
20756   EVT EltVT = VT.getVectorElementType();
20757   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20758
20759   // TODO: Remove/replace the extract cost check? If the elements are available
20760   //       as scalars, then there may be no extract cost. Should we ask if
20761   //       inserting a scalar back into a vector is cheap instead?
20762   int Index0, Index1;
20763   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
20764   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
20765   if (!Src0 || !Src1 || Index0 != Index1 ||
20766       Src0.getValueType().getVectorElementType() != EltVT ||
20767       Src1.getValueType().getVectorElementType() != EltVT ||
20768       !TLI.isExtractVecEltCheap(VT, Index0) ||
20769       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
20770     return SDValue();
20771
20772   SDLoc DL(N);
20773   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
20774   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
20775   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
20776   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
20777
20778   // If all lanes but 1 are undefined, no need to splat the scalar result.
20779   // TODO: Keep track of undefs and use that info in the general case.
20780   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
20781       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
20782       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
20783     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
20784     // build_vec ..undef, (bo X, Y), undef...
20785     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
20786     Ops[Index0] = ScalarBO;
20787     return DAG.getBuildVector(VT, DL, Ops);
20788   }
20789
20790   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
20791   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
20792   return DAG.getBuildVector(VT, DL, Ops);
20793 }
20794
20795 /// Visit a binary vector operation, like ADD.
20796 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
20797   assert(N->getValueType(0).isVector() &&
20798          "SimplifyVBinOp only works on vectors!");
20799
20800   SDValue LHS = N->getOperand(0);
20801   SDValue RHS = N->getOperand(1);
20802   SDValue Ops[] = {LHS, RHS};
20803   EVT VT = N->getValueType(0);
20804   unsigned Opcode = N->getOpcode();
20805   SDNodeFlags Flags = N->getFlags();
20806
20807   // See if we can constant fold the vector operation.
20808   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
20809           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
20810     return Fold;
20811
20812   // Move unary shuffles with identical masks after a vector binop:
20813   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
20814   //   --> shuffle (VBinOp A, B), Undef, Mask
20815   // This does not require type legality checks because we are creating the
20816   // same types of operations that are in the original sequence. We do have to
20817   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
20818   // though. This code is adapted from the identical transform in instcombine.
20819   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
20820       Opcode != ISD::UREM && Opcode != ISD::SREM &&
20821       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
20822     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
20823     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
20824     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
20825         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
20826         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
20827       SDLoc DL(N);
20828       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
20829                                      RHS.getOperand(0), Flags);
20830       SDValue UndefV = LHS.getOperand(1);
20831       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
20832     }
20833
20834     // Try to sink a splat shuffle after a binop with a uniform constant.
20835     // This is limited to cases where neither the shuffle nor the constant have
20836     // undefined elements because that could be poison-unsafe or inhibit
20837     // demanded elements analysis. It is further limited to not change a splat
20838     // of an inserted scalar because that may be optimized better by
20839     // load-folding or other target-specific behaviors.
20840     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
20841         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
20842         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20843       // binop (splat X), (splat C) --> splat (binop X, C)
20844       SDLoc DL(N);
20845       SDValue X = Shuf0->getOperand(0);
20846       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
20847       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20848                                   Shuf0->getMask());
20849     }
20850     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
20851         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
20852         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20853       // binop (splat C), (splat X) --> splat (binop C, X)
20854       SDLoc DL(N);
20855       SDValue X = Shuf1->getOperand(0);
20856       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
20857       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20858                                   Shuf1->getMask());
20859     }
20860   }
20861
20862   // The following pattern is likely to emerge with vector reduction ops. Moving
20863   // the binary operation ahead of insertion may allow using a narrower vector
20864   // instruction that has better performance than the wide version of the op:
20865   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
20866   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
20867       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
20868       LHS.getOperand(2) == RHS.getOperand(2) &&
20869       (LHS.hasOneUse() || RHS.hasOneUse())) {
20870     SDValue X = LHS.getOperand(1);
20871     SDValue Y = RHS.getOperand(1);
20872     SDValue Z = LHS.getOperand(2);
20873     EVT NarrowVT = X.getValueType();
20874     if (NarrowVT == Y.getValueType() &&
20875         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20876       // (binop undef, undef) may not return undef, so compute that result.
20877       SDLoc DL(N);
20878       SDValue VecC =
20879           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
20880       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
20881       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
20882     }
20883   }
20884
20885   // Make sure all but the first op are undef or constant.
20886   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
20887     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
20888            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
20889                      [](const SDValue &Op) {
20890                        return Op.isUndef() ||
20891                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
20892                      });
20893   };
20894
20895   // The following pattern is likely to emerge with vector reduction ops. Moving
20896   // the binary operation ahead of the concat may allow using a narrower vector
20897   // instruction that has better performance than the wide version of the op:
20898   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
20899   //   concat (VBinOp X, Y), VecC
20900   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
20901       (LHS.hasOneUse() || RHS.hasOneUse())) {
20902     EVT NarrowVT = LHS.getOperand(0).getValueType();
20903     if (NarrowVT == RHS.getOperand(0).getValueType() &&
20904         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20905       SDLoc DL(N);
20906       unsigned NumOperands = LHS.getNumOperands();
20907       SmallVector<SDValue, 4> ConcatOps;
20908       for (unsigned i = 0; i != NumOperands; ++i) {
20909         // This constant fold for operands 1 and up.
20910         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
20911                                         RHS.getOperand(i)));
20912       }
20913
20914       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20915     }
20916   }
20917
20918   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
20919     return V;
20920
20921   return SDValue();
20922 }
20923
20924 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
20925                                     SDValue N2) {
20926   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
20927
20928   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
20929                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
20930
20931   // If we got a simplified select_cc node back from SimplifySelectCC, then
20932   // break it down into a new SETCC node, and a new SELECT node, and then return
20933   // the SELECT node, since we were called with a SELECT node.
20934   if (SCC.getNode()) {
20935     // Check to see if we got a select_cc back (to turn into setcc/select).
20936     // Otherwise, just return whatever node we got back, like fabs.
20937     if (SCC.getOpcode() == ISD::SELECT_CC) {
20938       const SDNodeFlags Flags = N0.getNode()->getFlags();
20939       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
20940                                   N0.getValueType(),
20941                                   SCC.getOperand(0), SCC.getOperand(1),
20942                                   SCC.getOperand(4), Flags);
20943       AddToWorklist(SETCC.getNode());
20944       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
20945                                          SCC.getOperand(2), SCC.getOperand(3));
20946       SelectNode->setFlags(Flags);
20947       return SelectNode;
20948     }
20949
20950     return SCC;
20951   }
20952   return SDValue();
20953 }
20954
20955 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
20956 /// being selected between, see if we can simplify the select.  Callers of this
20957 /// should assume that TheSelect is deleted if this returns true.  As such, they
20958 /// should return the appropriate thing (e.g. the node) back to the top-level of
20959 /// the DAG combiner loop to avoid it being looked at.
20960 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
20961                                     SDValue RHS) {
20962   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20963   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
20964   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
20965     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
20966       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
20967       SDValue Sqrt = RHS;
20968       ISD::CondCode CC;
20969       SDValue CmpLHS;
20970       const ConstantFPSDNode *Zero = nullptr;
20971
20972       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
20973         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
20974         CmpLHS = TheSelect->getOperand(0);
20975         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
20976       } else {
20977         // SELECT or VSELECT
20978         SDValue Cmp = TheSelect->getOperand(0);
20979         if (Cmp.getOpcode() == ISD::SETCC) {
20980           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
20981           CmpLHS = Cmp.getOperand(0);
20982           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
20983         }
20984       }
20985       if (Zero && Zero->isZero() &&
20986           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
20987           CC == ISD::SETULT || CC == ISD::SETLT)) {
20988         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20989         CombineTo(TheSelect, Sqrt);
20990         return true;
20991       }
20992     }
20993   }
20994   // Cannot simplify select with vector condition
20995   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
20996
20997   // If this is a select from two identical things, try to pull the operation
20998   // through the select.
20999   if (LHS.getOpcode() != RHS.getOpcode() ||
21000       !LHS.hasOneUse() || !RHS.hasOneUse())
21001     return false;
21002
21003   // If this is a load and the token chain is identical, replace the select
21004   // of two loads with a load through a select of the address to load from.
21005   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
21006   // constants have been dropped into the constant pool.
21007   if (LHS.getOpcode() == ISD::LOAD) {
21008     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21009     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21010
21011     // Token chains must be identical.
21012     if (LHS.getOperand(0) != RHS.getOperand(0) ||
21013         // Do not let this transformation reduce the number of volatile loads.
21014         // Be conservative for atomics for the moment
21015         // TODO: This does appear to be legal for unordered atomics (see D66309)
21016         !LLD->isSimple() || !RLD->isSimple() ||
21017         // FIXME: If either is a pre/post inc/dec load,
21018         // we'd need to split out the address adjustment.
21019         LLD->isIndexed() || RLD->isIndexed() ||
21020         // If this is an EXTLOAD, the VT's must match.
21021         LLD->getMemoryVT() != RLD->getMemoryVT() ||
21022         // If this is an EXTLOAD, the kind of extension must match.
21023         (LLD->getExtensionType() != RLD->getExtensionType() &&
21024          // The only exception is if one of the extensions is anyext.
21025          LLD->getExtensionType() != ISD::EXTLOAD &&
21026          RLD->getExtensionType() != ISD::EXTLOAD) ||
21027         // FIXME: this discards src value information.  This is
21028         // over-conservative. It would be beneficial to be able to remember
21029         // both potential memory locations.  Since we are discarding
21030         // src value info, don't do the transformation if the memory
21031         // locations are not in the default address space.
21032         LLD->getPointerInfo().getAddrSpace() != 0 ||
21033         RLD->getPointerInfo().getAddrSpace() != 0 ||
21034         // We can't produce a CMOV of a TargetFrameIndex since we won't
21035         // generate the address generation required.
21036         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21037         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21038         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21039                                       LLD->getBasePtr().getValueType()))
21040       return false;
21041
21042     // The loads must not depend on one another.
21043     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
21044       return false;
21045
21046     // Check that the select condition doesn't reach either load.  If so,
21047     // folding this will induce a cycle into the DAG.  If not, this is safe to
21048     // xform, so create a select of the addresses.
21049
21050     SmallPtrSet<const SDNode *, 32> Visited;
21051     SmallVector<const SDNode *, 16> Worklist;
21052
21053     // Always fail if LLD and RLD are not independent. TheSelect is a
21054     // predecessor to all Nodes in question so we need not search past it.
21055
21056     Visited.insert(TheSelect);
21057     Worklist.push_back(LLD);
21058     Worklist.push_back(RLD);
21059
21060     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
21061         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21062       return false;
21063
21064     SDValue Addr;
21065     if (TheSelect->getOpcode() == ISD::SELECT) {
21066       // We cannot do this optimization if any pair of {RLD, LLD} is a
21067       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
21068       // Loads, we only need to check if CondNode is a successor to one of the
21069       // loads. We can further avoid this if there's no use of their chain
21070       // value.
21071       SDNode *CondNode = TheSelect->getOperand(0).getNode();
21072       Worklist.push_back(CondNode);
21073
21074       if ((LLD->hasAnyUseOfValue(1) &&
21075            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21076           (RLD->hasAnyUseOfValue(1) &&
21077            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21078         return false;
21079
21080       Addr = DAG.getSelect(SDLoc(TheSelect),
21081                            LLD->getBasePtr().getValueType(),
21082                            TheSelect->getOperand(0), LLD->getBasePtr(),
21083                            RLD->getBasePtr());
21084     } else {  // Otherwise SELECT_CC
21085       // We cannot do this optimization if any pair of {RLD, LLD} is a
21086       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21087       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
21088       // one of the loads. We can further avoid this if there's no use of their
21089       // chain value.
21090
21091       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21092       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21093       Worklist.push_back(CondLHS);
21094       Worklist.push_back(CondRHS);
21095
21096       if ((LLD->hasAnyUseOfValue(1) &&
21097            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21098           (RLD->hasAnyUseOfValue(1) &&
21099            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21100         return false;
21101
21102       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21103                          LLD->getBasePtr().getValueType(),
21104                          TheSelect->getOperand(0),
21105                          TheSelect->getOperand(1),
21106                          LLD->getBasePtr(), RLD->getBasePtr(),
21107                          TheSelect->getOperand(4));
21108     }
21109
21110     SDValue Load;
21111     // It is safe to replace the two loads if they have different alignments,
21112     // but the new load must be the minimum (most restrictive) alignment of the
21113     // inputs.
21114     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
21115     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
21116     if (!RLD->isInvariant())
21117       MMOFlags &= ~MachineMemOperand::MOInvariant;
21118     if (!RLD->isDereferenceable())
21119       MMOFlags &= ~MachineMemOperand::MODereferenceable;
21120     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
21121       // FIXME: Discards pointer and AA info.
21122       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
21123                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
21124                          MMOFlags);
21125     } else {
21126       // FIXME: Discards pointer and AA info.
21127       Load = DAG.getExtLoad(
21128           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
21129                                                   : LLD->getExtensionType(),
21130           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
21131           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
21132     }
21133
21134     // Users of the select now use the result of the load.
21135     CombineTo(TheSelect, Load);
21136
21137     // Users of the old loads now use the new load's chain.  We know the
21138     // old-load value is dead now.
21139     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
21140     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
21141     return true;
21142   }
21143
21144   return false;
21145 }
21146
21147 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
21148 /// bitwise 'and'.
21149 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
21150                                             SDValue N1, SDValue N2, SDValue N3,
21151                                             ISD::CondCode CC) {
21152   // If this is a select where the false operand is zero and the compare is a
21153   // check of the sign bit, see if we can perform the "gzip trick":
21154   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
21155   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
21156   EVT XType = N0.getValueType();
21157   EVT AType = N2.getValueType();
21158   if (!isNullConstant(N3) || !XType.bitsGE(AType))
21159     return SDValue();
21160
21161   // If the comparison is testing for a positive value, we have to invert
21162   // the sign bit mask, so only do that transform if the target has a bitwise
21163   // 'and not' instruction (the invert is free).
21164   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
21165     // (X > -1) ? A : 0
21166     // (X >  0) ? X : 0 <-- This is canonical signed max.
21167     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
21168       return SDValue();
21169   } else if (CC == ISD::SETLT) {
21170     // (X <  0) ? A : 0
21171     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
21172     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
21173       return SDValue();
21174   } else {
21175     return SDValue();
21176   }
21177
21178   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
21179   // constant.
21180   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
21181   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21182   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
21183     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
21184     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
21185       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21186       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
21187       AddToWorklist(Shift.getNode());
21188
21189       if (XType.bitsGT(AType)) {
21190         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21191         AddToWorklist(Shift.getNode());
21192       }
21193
21194       if (CC == ISD::SETGT)
21195         Shift = DAG.getNOT(DL, Shift, AType);
21196
21197       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21198     }
21199   }
21200
21201   unsigned ShCt = XType.getSizeInBits() - 1;
21202   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
21203     return SDValue();
21204
21205   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21206   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
21207   AddToWorklist(Shift.getNode());
21208
21209   if (XType.bitsGT(AType)) {
21210     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21211     AddToWorklist(Shift.getNode());
21212   }
21213
21214   if (CC == ISD::SETGT)
21215     Shift = DAG.getNOT(DL, Shift, AType);
21216
21217   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21218 }
21219
21220 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
21221 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
21222 /// in it. This may be a win when the constant is not otherwise available
21223 /// because it replaces two constant pool loads with one.
21224 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
21225     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
21226     ISD::CondCode CC) {
21227   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
21228     return SDValue();
21229
21230   // If we are before legalize types, we want the other legalization to happen
21231   // first (for example, to avoid messing with soft float).
21232   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
21233   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
21234   EVT VT = N2.getValueType();
21235   if (!TV || !FV || !TLI.isTypeLegal(VT))
21236     return SDValue();
21237
21238   // If a constant can be materialized without loads, this does not make sense.
21239   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
21240       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
21241       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
21242     return SDValue();
21243
21244   // If both constants have multiple uses, then we won't need to do an extra
21245   // load. The values are likely around in registers for other users.
21246   if (!TV->hasOneUse() && !FV->hasOneUse())
21247     return SDValue();
21248
21249   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
21250                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
21251   Type *FPTy = Elts[0]->getType();
21252   const DataLayout &TD = DAG.getDataLayout();
21253
21254   // Create a ConstantArray of the two constants.
21255   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
21256   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
21257                                       TD.getPrefTypeAlign(FPTy));
21258   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
21259
21260   // Get offsets to the 0 and 1 elements of the array, so we can select between
21261   // them.
21262   SDValue Zero = DAG.getIntPtrConstant(0, DL);
21263   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
21264   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
21265   SDValue Cond =
21266       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
21267   AddToWorklist(Cond.getNode());
21268   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
21269   AddToWorklist(CstOffset.getNode());
21270   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
21271   AddToWorklist(CPIdx.getNode());
21272   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
21273                      MachinePointerInfo::getConstantPool(
21274                          DAG.getMachineFunction()), Alignment);
21275 }
21276
21277 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
21278 /// where 'cond' is the comparison specified by CC.
21279 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
21280                                       SDValue N2, SDValue N3, ISD::CondCode CC,
21281                                       bool NotExtCompare) {
21282   // (x ? y : y) -> y.
21283   if (N2 == N3) return N2;
21284
21285   EVT CmpOpVT = N0.getValueType();
21286   EVT CmpResVT = getSetCCResultType(CmpOpVT);
21287   EVT VT = N2.getValueType();
21288   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
21289   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21290   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
21291
21292   // Determine if the condition we're dealing with is constant.
21293   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
21294     AddToWorklist(SCC.getNode());
21295     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
21296       // fold select_cc true, x, y -> x
21297       // fold select_cc false, x, y -> y
21298       return !(SCCC->isNullValue()) ? N2 : N3;
21299     }
21300   }
21301
21302   if (SDValue V =
21303           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
21304     return V;
21305
21306   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
21307     return V;
21308
21309   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
21310   // where y is has a single bit set.
21311   // A plaintext description would be, we can turn the SELECT_CC into an AND
21312   // when the condition can be materialized as an all-ones register.  Any
21313   // single bit-test can be materialized as an all-ones register with
21314   // shift-left and shift-right-arith.
21315   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
21316       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
21317     SDValue AndLHS = N0->getOperand(0);
21318     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21319     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
21320       // Shift the tested bit over the sign bit.
21321       const APInt &AndMask = ConstAndRHS->getAPIntValue();
21322       unsigned ShCt = AndMask.getBitWidth() - 1;
21323       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
21324         SDValue ShlAmt =
21325           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
21326                           getShiftAmountTy(AndLHS.getValueType()));
21327         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
21328
21329         // Now arithmetic right shift it all the way over, so the result is
21330         // either all-ones, or zero.
21331         SDValue ShrAmt =
21332           DAG.getConstant(ShCt, SDLoc(Shl),
21333                           getShiftAmountTy(Shl.getValueType()));
21334         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
21335
21336         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
21337       }
21338     }
21339   }
21340
21341   // fold select C, 16, 0 -> shl C, 4
21342   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
21343   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
21344
21345   if ((Fold || Swap) &&
21346       TLI.getBooleanContents(CmpOpVT) ==
21347           TargetLowering::ZeroOrOneBooleanContent &&
21348       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
21349
21350     if (Swap) {
21351       CC = ISD::getSetCCInverse(CC, CmpOpVT);
21352       std::swap(N2C, N3C);
21353     }
21354
21355     // If the caller doesn't want us to simplify this into a zext of a compare,
21356     // don't do it.
21357     if (NotExtCompare && N2C->isOne())
21358       return SDValue();
21359
21360     SDValue Temp, SCC;
21361     // zext (setcc n0, n1)
21362     if (LegalTypes) {
21363       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
21364       if (VT.bitsLT(SCC.getValueType()))
21365         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
21366       else
21367         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21368     } else {
21369       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
21370       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21371     }
21372
21373     AddToWorklist(SCC.getNode());
21374     AddToWorklist(Temp.getNode());
21375
21376     if (N2C->isOne())
21377       return Temp;
21378
21379     unsigned ShCt = N2C->getAPIntValue().logBase2();
21380     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
21381       return SDValue();
21382
21383     // shl setcc result by log2 n2c
21384     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
21385                        DAG.getConstant(ShCt, SDLoc(Temp),
21386                                        getShiftAmountTy(Temp.getValueType())));
21387   }
21388
21389   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
21390   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
21391   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
21392   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
21393   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
21394   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
21395   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
21396   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
21397   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
21398     SDValue ValueOnZero = N2;
21399     SDValue Count = N3;
21400     // If the condition is NE instead of E, swap the operands.
21401     if (CC == ISD::SETNE)
21402       std::swap(ValueOnZero, Count);
21403     // Check if the value on zero is a constant equal to the bits in the type.
21404     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
21405       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
21406         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
21407         // legal, combine to just cttz.
21408         if ((Count.getOpcode() == ISD::CTTZ ||
21409              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
21410             N0 == Count.getOperand(0) &&
21411             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
21412           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
21413         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
21414         // legal, combine to just ctlz.
21415         if ((Count.getOpcode() == ISD::CTLZ ||
21416              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
21417             N0 == Count.getOperand(0) &&
21418             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
21419           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
21420       }
21421     }
21422   }
21423
21424   return SDValue();
21425 }
21426
21427 /// This is a stub for TargetLowering::SimplifySetCC.
21428 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
21429                                    ISD::CondCode Cond, const SDLoc &DL,
21430                                    bool foldBooleans) {
21431   TargetLowering::DAGCombinerInfo
21432     DagCombineInfo(DAG, Level, false, this);
21433   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
21434 }
21435
21436 /// Given an ISD::SDIV node expressing a divide by constant, return
21437 /// a DAG expression to select that will generate the same value by multiplying
21438 /// by a magic number.
21439 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21440 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
21441   // when optimising for minimum size, we don't want to expand a div to a mul
21442   // and a shift.
21443   if (DAG.getMachineFunction().getFunction().hasMinSize())
21444     return SDValue();
21445
21446   SmallVector<SDNode *, 8> Built;
21447   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
21448     for (SDNode *N : Built)
21449       AddToWorklist(N);
21450     return S;
21451   }
21452
21453   return SDValue();
21454 }
21455
21456 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
21457 /// DAG expression that will generate the same value by right shifting.
21458 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
21459   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
21460   if (!C)
21461     return SDValue();
21462
21463   // Avoid division by zero.
21464   if (C->isNullValue())
21465     return SDValue();
21466
21467   SmallVector<SDNode *, 8> Built;
21468   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
21469     for (SDNode *N : Built)
21470       AddToWorklist(N);
21471     return S;
21472   }
21473
21474   return SDValue();
21475 }
21476
21477 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
21478 /// expression that will generate the same value by multiplying by a magic
21479 /// number.
21480 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21481 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
21482   // when optimising for minimum size, we don't want to expand a div to a mul
21483   // and a shift.
21484   if (DAG.getMachineFunction().getFunction().hasMinSize())
21485     return SDValue();
21486
21487   SmallVector<SDNode *, 8> Built;
21488   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
21489     for (SDNode *N : Built)
21490       AddToWorklist(N);
21491     return S;
21492   }
21493
21494   return SDValue();
21495 }
21496
21497 /// Determines the LogBase2 value for a non-null input value using the
21498 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
21499 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
21500   EVT VT = V.getValueType();
21501   unsigned EltBits = VT.getScalarSizeInBits();
21502   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
21503   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
21504   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
21505   return LogBase2;
21506 }
21507
21508 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21509 /// For the reciprocal, we need to find the zero of the function:
21510 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
21511 ///     =>
21512 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
21513 ///     does not require additional intermediate precision]
21514 /// For the last iteration, put numerator N into it to gain more precision:
21515 ///   Result = N X_i + X_i (N - N A X_i)
21516 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
21517                                       SDNodeFlags Flags) {
21518   if (LegalDAG)
21519     return SDValue();
21520
21521   // TODO: Handle half and/or extended types?
21522   EVT VT = Op.getValueType();
21523   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21524     return SDValue();
21525
21526   // If estimates are explicitly disabled for this function, we're done.
21527   MachineFunction &MF = DAG.getMachineFunction();
21528   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
21529   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21530     return SDValue();
21531
21532   // Estimates may be explicitly enabled for this type with a custom number of
21533   // refinement steps.
21534   int Iterations = TLI.getDivRefinementSteps(VT, MF);
21535   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
21536     AddToWorklist(Est.getNode());
21537
21538     SDLoc DL(Op);
21539     if (Iterations) {
21540       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
21541
21542       // Newton iterations: Est = Est + Est (N - Arg * Est)
21543       // If this is the last iteration, also multiply by the numerator.
21544       for (int i = 0; i < Iterations; ++i) {
21545         SDValue MulEst = Est;
21546
21547         if (i == Iterations - 1) {
21548           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
21549           AddToWorklist(MulEst.getNode());
21550         }
21551
21552         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
21553         AddToWorklist(NewEst.getNode());
21554
21555         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
21556                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
21557         AddToWorklist(NewEst.getNode());
21558
21559         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21560         AddToWorklist(NewEst.getNode());
21561
21562         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
21563         AddToWorklist(Est.getNode());
21564       }
21565     } else {
21566       // If no iterations are available, multiply with N.
21567       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
21568       AddToWorklist(Est.getNode());
21569     }
21570
21571     return Est;
21572   }
21573
21574   return SDValue();
21575 }
21576
21577 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21578 /// For the reciprocal sqrt, we need to find the zero of the function:
21579 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21580 ///     =>
21581 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
21582 /// As a result, we precompute A/2 prior to the iteration loop.
21583 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
21584                                          unsigned Iterations,
21585                                          SDNodeFlags Flags, bool Reciprocal) {
21586   EVT VT = Arg.getValueType();
21587   SDLoc DL(Arg);
21588   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
21589
21590   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
21591   // this entire sequence requires only one FP constant.
21592   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
21593   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
21594
21595   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
21596   for (unsigned i = 0; i < Iterations; ++i) {
21597     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
21598     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
21599     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
21600     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21601   }
21602
21603   // If non-reciprocal square root is requested, multiply the result by Arg.
21604   if (!Reciprocal)
21605     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
21606
21607   return Est;
21608 }
21609
21610 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21611 /// For the reciprocal sqrt, we need to find the zero of the function:
21612 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21613 ///     =>
21614 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
21615 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
21616                                          unsigned Iterations,
21617                                          SDNodeFlags Flags, bool Reciprocal) {
21618   EVT VT = Arg.getValueType();
21619   SDLoc DL(Arg);
21620   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
21621   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
21622
21623   // This routine must enter the loop below to work correctly
21624   // when (Reciprocal == false).
21625   assert(Iterations > 0);
21626
21627   // Newton iterations for reciprocal square root:
21628   // E = (E * -0.5) * ((A * E) * E + -3.0)
21629   for (unsigned i = 0; i < Iterations; ++i) {
21630     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
21631     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
21632     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
21633
21634     // When calculating a square root at the last iteration build:
21635     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
21636     // (notice a common subexpression)
21637     SDValue LHS;
21638     if (Reciprocal || (i + 1) < Iterations) {
21639       // RSQRT: LHS = (E * -0.5)
21640       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
21641     } else {
21642       // SQRT: LHS = (A * E) * -0.5
21643       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
21644     }
21645
21646     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
21647   }
21648
21649   return Est;
21650 }
21651
21652 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
21653 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
21654 /// Op can be zero.
21655 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
21656                                            bool Reciprocal) {
21657   if (LegalDAG)
21658     return SDValue();
21659
21660   // TODO: Handle half and/or extended types?
21661   EVT VT = Op.getValueType();
21662   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21663     return SDValue();
21664
21665   // If estimates are explicitly disabled for this function, we're done.
21666   MachineFunction &MF = DAG.getMachineFunction();
21667   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
21668   if (Enabled == TLI.ReciprocalEstimate::Disabled)
21669     return SDValue();
21670
21671   // Estimates may be explicitly enabled for this type with a custom number of
21672   // refinement steps.
21673   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
21674
21675   bool UseOneConstNR = false;
21676   if (SDValue Est =
21677       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
21678                           Reciprocal)) {
21679     AddToWorklist(Est.getNode());
21680
21681     if (Iterations) {
21682       Est = UseOneConstNR
21683             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
21684             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
21685
21686       if (!Reciprocal) {
21687         // The estimate is now completely wrong if the input was exactly 0.0 or
21688         // possibly a denormal. Force the answer to 0.0 for those cases.
21689         SDLoc DL(Op);
21690         EVT CCVT = getSetCCResultType(VT);
21691         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
21692         DenormalMode DenormMode = DAG.getDenormalMode(VT);
21693         if (DenormMode.Input == DenormalMode::IEEE) {
21694           // This is specifically a check for the handling of denormal inputs,
21695           // not the result.
21696
21697           // fabs(X) < SmallestNormal ? 0.0 : Est
21698           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
21699           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
21700           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
21701           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21702           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
21703           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
21704           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
21705         } else {
21706           // X == 0.0 ? 0.0 : Est
21707           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21708           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
21709           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
21710         }
21711       }
21712     }
21713     return Est;
21714   }
21715
21716   return SDValue();
21717 }
21718
21719 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21720   return buildSqrtEstimateImpl(Op, Flags, true);
21721 }
21722
21723 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21724   return buildSqrtEstimateImpl(Op, Flags, false);
21725 }
21726
21727 /// Return true if there is any possibility that the two addresses overlap.
21728 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
21729
21730   struct MemUseCharacteristics {
21731     bool IsVolatile;
21732     bool IsAtomic;
21733     SDValue BasePtr;
21734     int64_t Offset;
21735     Optional<int64_t> NumBytes;
21736     MachineMemOperand *MMO;
21737   };
21738
21739   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
21740     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
21741       int64_t Offset = 0;
21742       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
21743         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
21744                      ? C->getSExtValue()
21745                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
21746                            ? -1 * C->getSExtValue()
21747                            : 0;
21748       uint64_t Size =
21749           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
21750       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
21751               Offset /*base offset*/,
21752               Optional<int64_t>(Size),
21753               LSN->getMemOperand()};
21754     }
21755     if (const auto *LN = cast<LifetimeSDNode>(N))
21756       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
21757               (LN->hasOffset()) ? LN->getOffset() : 0,
21758               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
21759                                 : Optional<int64_t>(),
21760               (MachineMemOperand *)nullptr};
21761     // Default.
21762     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
21763             (int64_t)0 /*offset*/,
21764             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
21765   };
21766
21767   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
21768                         MUC1 = getCharacteristics(Op1);
21769
21770   // If they are to the same address, then they must be aliases.
21771   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
21772       MUC0.Offset == MUC1.Offset)
21773     return true;
21774
21775   // If they are both volatile then they cannot be reordered.
21776   if (MUC0.IsVolatile && MUC1.IsVolatile)
21777     return true;
21778
21779   // Be conservative about atomics for the moment
21780   // TODO: This is way overconservative for unordered atomics (see D66309)
21781   if (MUC0.IsAtomic && MUC1.IsAtomic)
21782     return true;
21783
21784   if (MUC0.MMO && MUC1.MMO) {
21785     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21786         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21787       return false;
21788   }
21789
21790   // Try to prove that there is aliasing, or that there is no aliasing. Either
21791   // way, we can return now. If nothing can be proved, proceed with more tests.
21792   bool IsAlias;
21793   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
21794                                        DAG, IsAlias))
21795     return IsAlias;
21796
21797   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
21798   // either are not known.
21799   if (!MUC0.MMO || !MUC1.MMO)
21800     return true;
21801
21802   // If one operation reads from invariant memory, and the other may store, they
21803   // cannot alias. These should really be checking the equivalent of mayWrite,
21804   // but it only matters for memory nodes other than load /store.
21805   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21806       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21807     return false;
21808
21809   // If we know required SrcValue1 and SrcValue2 have relatively large
21810   // alignment compared to the size and offset of the access, we may be able
21811   // to prove they do not alias. This check is conservative for now to catch
21812   // cases created by splitting vector types, it only works when the offsets are
21813   // multiples of the size of the data.
21814   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
21815   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
21816   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
21817   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
21818   auto &Size0 = MUC0.NumBytes;
21819   auto &Size1 = MUC1.NumBytes;
21820   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
21821       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
21822       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
21823       SrcValOffset1 % *Size1 == 0) {
21824     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
21825     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
21826
21827     // There is no overlap between these relatively aligned accesses of
21828     // similar size. Return no alias.
21829     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
21830       return false;
21831   }
21832
21833   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
21834                    ? CombinerGlobalAA
21835                    : DAG.getSubtarget().useAA();
21836 #ifndef NDEBUG
21837   if (CombinerAAOnlyFunc.getNumOccurrences() &&
21838       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
21839     UseAA = false;
21840 #endif
21841
21842   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
21843       Size0.hasValue() && Size1.hasValue()) {
21844     // Use alias analysis information.
21845     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
21846     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
21847     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
21848     AliasResult AAResult = AA->alias(
21849         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
21850                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
21851         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
21852                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
21853     if (AAResult == NoAlias)
21854       return false;
21855   }
21856
21857   // Otherwise we have to assume they alias.
21858   return true;
21859 }
21860
21861 /// Walk up chain skipping non-aliasing memory nodes,
21862 /// looking for aliasing nodes and adding them to the Aliases vector.
21863 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
21864                                    SmallVectorImpl<SDValue> &Aliases) {
21865   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
21866   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
21867
21868   // Get alias information for node.
21869   // TODO: relax aliasing for unordered atomics (see D66309)
21870   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
21871
21872   // Starting off.
21873   Chains.push_back(OriginalChain);
21874   unsigned Depth = 0;
21875
21876   // Attempt to improve chain by a single step
21877   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
21878     switch (C.getOpcode()) {
21879     case ISD::EntryToken:
21880       // No need to mark EntryToken.
21881       C = SDValue();
21882       return true;
21883     case ISD::LOAD:
21884     case ISD::STORE: {
21885       // Get alias information for C.
21886       // TODO: Relax aliasing for unordered atomics (see D66309)
21887       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
21888                       cast<LSBaseSDNode>(C.getNode())->isSimple();
21889       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
21890         // Look further up the chain.
21891         C = C.getOperand(0);
21892         return true;
21893       }
21894       // Alias, so stop here.
21895       return false;
21896     }
21897
21898     case ISD::CopyFromReg:
21899       // Always forward past past CopyFromReg.
21900       C = C.getOperand(0);
21901       return true;
21902
21903     case ISD::LIFETIME_START:
21904     case ISD::LIFETIME_END: {
21905       // We can forward past any lifetime start/end that can be proven not to
21906       // alias the memory access.
21907       if (!isAlias(N, C.getNode())) {
21908         // Look further up the chain.
21909         C = C.getOperand(0);
21910         return true;
21911       }
21912       return false;
21913     }
21914     default:
21915       return false;
21916     }
21917   };
21918
21919   // Look at each chain and determine if it is an alias.  If so, add it to the
21920   // aliases list.  If not, then continue up the chain looking for the next
21921   // candidate.
21922   while (!Chains.empty()) {
21923     SDValue Chain = Chains.pop_back_val();
21924
21925     // Don't bother if we've seen Chain before.
21926     if (!Visited.insert(Chain.getNode()).second)
21927       continue;
21928
21929     // For TokenFactor nodes, look at each operand and only continue up the
21930     // chain until we reach the depth limit.
21931     //
21932     // FIXME: The depth check could be made to return the last non-aliasing
21933     // chain we found before we hit a tokenfactor rather than the original
21934     // chain.
21935     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
21936       Aliases.clear();
21937       Aliases.push_back(OriginalChain);
21938       return;
21939     }
21940
21941     if (Chain.getOpcode() == ISD::TokenFactor) {
21942       // We have to check each of the operands of the token factor for "small"
21943       // token factors, so we queue them up.  Adding the operands to the queue
21944       // (stack) in reverse order maintains the original order and increases the
21945       // likelihood that getNode will find a matching token factor (CSE.)
21946       if (Chain.getNumOperands() > 16) {
21947         Aliases.push_back(Chain);
21948         continue;
21949       }
21950       for (unsigned n = Chain.getNumOperands(); n;)
21951         Chains.push_back(Chain.getOperand(--n));
21952       ++Depth;
21953       continue;
21954     }
21955     // Everything else
21956     if (ImproveChain(Chain)) {
21957       // Updated Chain Found, Consider new chain if one exists.
21958       if (Chain.getNode())
21959         Chains.push_back(Chain);
21960       ++Depth;
21961       continue;
21962     }
21963     // No Improved Chain Possible, treat as Alias.
21964     Aliases.push_back(Chain);
21965   }
21966 }
21967
21968 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
21969 /// (aliasing node.)
21970 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
21971   if (OptLevel == CodeGenOpt::None)
21972     return OldChain;
21973
21974   // Ops for replacing token factor.
21975   SmallVector<SDValue, 8> Aliases;
21976
21977   // Accumulate all the aliases to this node.
21978   GatherAllAliases(N, OldChain, Aliases);
21979
21980   // If no operands then chain to entry token.
21981   if (Aliases.size() == 0)
21982     return DAG.getEntryNode();
21983
21984   // If a single operand then chain to it.  We don't need to revisit it.
21985   if (Aliases.size() == 1)
21986     return Aliases[0];
21987
21988   // Construct a custom tailored token factor.
21989   return DAG.getTokenFactor(SDLoc(N), Aliases);
21990 }
21991
21992 namespace {
21993 // TODO: Replace with with std::monostate when we move to C++17.
21994 struct UnitT { } Unit;
21995 bool operator==(const UnitT &, const UnitT &) { return true; }
21996 bool operator!=(const UnitT &, const UnitT &) { return false; }
21997 } // namespace
21998
21999 // This function tries to collect a bunch of potentially interesting
22000 // nodes to improve the chains of, all at once. This might seem
22001 // redundant, as this function gets called when visiting every store
22002 // node, so why not let the work be done on each store as it's visited?
22003 //
22004 // I believe this is mainly important because mergeConsecutiveStores
22005 // is unable to deal with merging stores of different sizes, so unless
22006 // we improve the chains of all the potential candidates up-front
22007 // before running mergeConsecutiveStores, it might only see some of
22008 // the nodes that will eventually be candidates, and then not be able
22009 // to go from a partially-merged state to the desired final
22010 // fully-merged state.
22011
22012 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22013   SmallVector<StoreSDNode *, 8> ChainedStores;
22014   StoreSDNode *STChain = St;
22015   // Intervals records which offsets from BaseIndex have been covered. In
22016   // the common case, every store writes to the immediately previous address
22017   // space and thus merged with the previous interval at insertion time.
22018
22019   using IMap =
22020       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22021   IMap::Allocator A;
22022   IMap Intervals(A);
22023
22024   // This holds the base pointer, index, and the offset in bytes from the base
22025   // pointer.
22026   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22027
22028   // We must have a base and an offset.
22029   if (!BasePtr.getBase().getNode())
22030     return false;
22031
22032   // Do not handle stores to undef base pointers.
22033   if (BasePtr.getBase().isUndef())
22034     return false;
22035
22036   // BaseIndexOffset assumes that offsets are fixed-size, which
22037   // is not valid for scalable vectors where the offsets are
22038   // scaled by `vscale`, so bail out early.
22039   if (St->getMemoryVT().isScalableVector())
22040     return false;
22041
22042   // Add ST's interval.
22043   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22044
22045   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22046     // If the chain has more than one use, then we can't reorder the mem ops.
22047     if (!SDValue(Chain, 0)->hasOneUse())
22048       break;
22049     // TODO: Relax for unordered atomics (see D66309)
22050     if (!Chain->isSimple() || Chain->isIndexed())
22051       break;
22052
22053     // Find the base pointer and offset for this memory node.
22054     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22055     // Check that the base pointer is the same as the original one.
22056     int64_t Offset;
22057     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22058       break;
22059     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22060     // Make sure we don't overlap with other intervals by checking the ones to
22061     // the left or right before inserting.
22062     auto I = Intervals.find(Offset);
22063     // If there's a next interval, we should end before it.
22064     if (I != Intervals.end() && I.start() < (Offset + Length))
22065       break;
22066     // If there's a previous interval, we should start after it.
22067     if (I != Intervals.begin() && (--I).stop() <= Offset)
22068       break;
22069     Intervals.insert(Offset, Offset + Length, Unit);
22070
22071     ChainedStores.push_back(Chain);
22072     STChain = Chain;
22073   }
22074
22075   // If we didn't find a chained store, exit.
22076   if (ChainedStores.size() == 0)
22077     return false;
22078
22079   // Improve all chained stores (St and ChainedStores members) starting from
22080   // where the store chain ended and return single TokenFactor.
22081   SDValue NewChain = STChain->getChain();
22082   SmallVector<SDValue, 8> TFOps;
22083   for (unsigned I = ChainedStores.size(); I;) {
22084     StoreSDNode *S = ChainedStores[--I];
22085     SDValue BetterChain = FindBetterChain(S, NewChain);
22086     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22087         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22088     TFOps.push_back(SDValue(S, 0));
22089     ChainedStores[I] = S;
22090   }
22091
22092   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
22093   SDValue BetterChain = FindBetterChain(St, NewChain);
22094   SDValue NewST;
22095   if (St->isTruncatingStore())
22096     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
22097                               St->getBasePtr(), St->getMemoryVT(),
22098                               St->getMemOperand());
22099   else
22100     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
22101                          St->getBasePtr(), St->getMemOperand());
22102
22103   TFOps.push_back(NewST);
22104
22105   // If we improved every element of TFOps, then we've lost the dependence on
22106   // NewChain to successors of St and we need to add it back to TFOps. Do so at
22107   // the beginning to keep relative order consistent with FindBetterChains.
22108   auto hasImprovedChain = [&](SDValue ST) -> bool {
22109     return ST->getOperand(0) != NewChain;
22110   };
22111   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
22112   if (AddNewChain)
22113     TFOps.insert(TFOps.begin(), NewChain);
22114
22115   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
22116   CombineTo(St, TF);
22117
22118   // Add TF and its operands to the worklist.
22119   AddToWorklist(TF.getNode());
22120   for (const SDValue &Op : TF->ops())
22121     AddToWorklist(Op.getNode());
22122   AddToWorklist(STChain);
22123   return true;
22124 }
22125
22126 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
22127   if (OptLevel == CodeGenOpt::None)
22128     return false;
22129
22130   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22131
22132   // We must have a base and an offset.
22133   if (!BasePtr.getBase().getNode())
22134     return false;
22135
22136   // Do not handle stores to undef base pointers.
22137   if (BasePtr.getBase().isUndef())
22138     return false;
22139
22140   // Directly improve a chain of disjoint stores starting at St.
22141   if (parallelizeChainedStores(St))
22142     return true;
22143
22144   // Improve St's Chain..
22145   SDValue BetterChain = FindBetterChain(St, St->getChain());
22146   if (St->getChain() != BetterChain) {
22147     replaceStoreChain(St, BetterChain);
22148     return true;
22149   }
22150   return false;
22151 }
22152
22153 /// This is the entry point for the file.
22154 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
22155                            CodeGenOpt::Level OptLevel) {
22156   /// This is the main entry point to this class.
22157   DAGCombiner(*this, AA, OptLevel).Run(Level);
22158 }