1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
17 //===----------------------------------------------------------------------===//
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
78 #define DEBUG_TYPE "dagcombine"
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
89 cl::desc("Enable DAG combiner's use of IR alias analysis"));
92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
93 cl::desc("Enable DAG combiner's use of TBAA"));
96 static cl::opt<std::string>
97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
98 cl::desc("Only use DAG-combiner alias analysis in this"
102 /// Hidden option to stress test load slicing, i.e., when this option
103 /// is enabled, load slicing bypasses most of its profitability guards.
105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
106 cl::desc("Bypass the profitability model of load slicing"),
110 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
111 cl::desc("DAG combiner may split indexing from loads"));
117 const TargetLowering &TLI;
119 CodeGenOpt::Level OptLevel;
120 bool LegalOperations = false;
121 bool LegalTypes = false;
124 /// Worklist of all of the nodes that need to be simplified.
126 /// This must behave as a stack -- new nodes to process are pushed onto the
127 /// back and when processing we pop off of the back.
129 /// The worklist will not contain duplicates but may contain null entries
130 /// due to nodes being deleted from the underlying DAG.
131 SmallVector<SDNode *, 64> Worklist;
133 /// Mapping from an SDNode to its position on the worklist.
135 /// This is used to find and remove nodes from the worklist (by nulling
136 /// them) when they are deleted from the underlying DAG. It relies on
137 /// stable indices of nodes within the worklist.
138 DenseMap<SDNode *, unsigned> WorklistMap;
140 /// Set of nodes which have been combined (at least once).
142 /// This is used to allow us to reliably add any operands of a DAG node
143 /// which have not yet been combined to the worklist.
144 SmallPtrSet<SDNode *, 32> CombinedNodes;
146 // AA - Used for DAG load/store alias analysis.
149 /// When an instruction is simplified, add all users of the instruction to
150 /// the work lists because they might get more simplified now.
151 void AddUsersToWorklist(SDNode *N) {
152 for (SDNode *Node : N->uses())
156 /// Call the node-specific routine that folds each particular type of node.
157 SDValue visit(SDNode *N);
160 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
161 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
162 OptLevel(OL), AA(AA) {
163 ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 MaximumLegalStoreInBits = 0;
166 for (MVT VT : MVT::all_valuetypes())
167 if (EVT(VT).isSimple() && VT != MVT::Other &&
168 TLI.isTypeLegal(EVT(VT)) &&
169 VT.getSizeInBits() >= MaximumLegalStoreInBits)
170 MaximumLegalStoreInBits = VT.getSizeInBits();
173 /// Add to the worklist making sure its instance is at the back (next to be
175 void AddToWorklist(SDNode *N) {
176 assert(N->getOpcode() != ISD::DELETED_NODE &&
177 "Deleted Node added to Worklist");
179 // Skip handle nodes as they can't usefully be combined and confuse the
180 // zero-use deletion strategy.
181 if (N->getOpcode() == ISD::HANDLENODE)
184 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
185 Worklist.push_back(N);
188 /// Remove all instances of N from the worklist.
189 void removeFromWorklist(SDNode *N) {
190 CombinedNodes.erase(N);
192 auto It = WorklistMap.find(N);
193 if (It == WorklistMap.end())
194 return; // Not in the worklist.
196 // Null out the entry rather than erasing it to avoid a linear operation.
197 Worklist[It->second] = nullptr;
198 WorklistMap.erase(It);
201 void deleteAndRecombine(SDNode *N);
202 bool recursivelyDeleteUnusedNodes(SDNode *N);
204 /// Replaces all uses of the results of one DAG node with new values.
205 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
208 /// Replaces all uses of the results of one DAG node with new values.
209 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
210 return CombineTo(N, &Res, 1, AddTo);
213 /// Replaces all uses of the results of one DAG node with new values.
214 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216 SDValue To[] = { Res0, Res1 };
217 return CombineTo(N, To, 2, AddTo);
220 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
223 unsigned MaximumLegalStoreInBits;
225 /// Check the specified integer node value to see if it can be simplified or
226 /// if things it uses can be simplified by bit propagation.
227 /// If so, return true.
228 bool SimplifyDemandedBits(SDValue Op) {
229 unsigned BitWidth = Op.getScalarValueSizeInBits();
230 APInt Demanded = APInt::getAllOnesValue(BitWidth);
231 return SimplifyDemandedBits(Op, Demanded);
234 /// Check the specified vector node value to see if it can be simplified or
235 /// if things it uses can be simplified as it only uses some of the
236 /// elements. If so, return true.
237 bool SimplifyDemandedVectorElts(SDValue Op) {
238 unsigned NumElts = Op.getValueType().getVectorNumElements();
239 APInt Demanded = APInt::getAllOnesValue(NumElts);
240 return SimplifyDemandedVectorElts(Op, Demanded);
243 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
244 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
245 bool AssumeSingleUse = false);
247 bool CombineToPreIndexedLoadStore(SDNode *N);
248 bool CombineToPostIndexedLoadStore(SDNode *N);
249 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250 bool SliceUpLoad(SDNode *N);
252 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
255 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257 /// \param EltNo index of the vector element to load.
258 /// \param OriginalLoad load that EVE came from to be replaced.
259 /// \returns EVE on success SDValue() on failure.
260 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266 SDValue PromoteIntBinOp(SDValue Op);
267 SDValue PromoteIntShiftOp(SDValue Op);
268 SDValue PromoteExtend(SDValue Op);
269 bool PromoteLoad(SDValue Op);
271 /// Call the node-specific routine that knows how to fold each
272 /// particular type of node. If that doesn't do anything, try the
273 /// target-specific DAG combines.
274 SDValue combine(SDNode *N);
276 // Visitation implementation - Implement dag node combining for different
277 // node types. The semantics are as follows:
279 // SDValue.getNode() == 0 - No change was made
280 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
281 // otherwise - N should be replaced by the returned Operand.
283 SDValue visitTokenFactor(SDNode *N);
284 SDValue visitMERGE_VALUES(SDNode *N);
285 SDValue visitADD(SDNode *N);
286 SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
287 SDValue visitSUB(SDNode *N);
288 SDValue visitADDC(SDNode *N);
289 SDValue visitUADDO(SDNode *N);
290 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
291 SDValue visitSUBC(SDNode *N);
292 SDValue visitUSUBO(SDNode *N);
293 SDValue visitADDE(SDNode *N);
294 SDValue visitADDCARRY(SDNode *N);
295 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
296 SDValue visitSUBE(SDNode *N);
297 SDValue visitSUBCARRY(SDNode *N);
298 SDValue visitMUL(SDNode *N);
299 SDValue useDivRem(SDNode *N);
300 SDValue visitSDIV(SDNode *N);
301 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
302 SDValue visitUDIV(SDNode *N);
303 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
304 SDValue visitREM(SDNode *N);
305 SDValue visitMULHU(SDNode *N);
306 SDValue visitMULHS(SDNode *N);
307 SDValue visitSMUL_LOHI(SDNode *N);
308 SDValue visitUMUL_LOHI(SDNode *N);
309 SDValue visitSMULO(SDNode *N);
310 SDValue visitUMULO(SDNode *N);
311 SDValue visitIMINMAX(SDNode *N);
312 SDValue visitAND(SDNode *N);
313 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
314 SDValue visitOR(SDNode *N);
315 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
316 SDValue visitXOR(SDNode *N);
317 SDValue SimplifyVBinOp(SDNode *N);
318 SDValue visitSHL(SDNode *N);
319 SDValue visitSRA(SDNode *N);
320 SDValue visitSRL(SDNode *N);
321 SDValue visitRotate(SDNode *N);
322 SDValue visitABS(SDNode *N);
323 SDValue visitBSWAP(SDNode *N);
324 SDValue visitBITREVERSE(SDNode *N);
325 SDValue visitCTLZ(SDNode *N);
326 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
327 SDValue visitCTTZ(SDNode *N);
328 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
329 SDValue visitCTPOP(SDNode *N);
330 SDValue visitSELECT(SDNode *N);
331 SDValue visitVSELECT(SDNode *N);
332 SDValue visitSELECT_CC(SDNode *N);
333 SDValue visitSETCC(SDNode *N);
334 SDValue visitSETCCCARRY(SDNode *N);
335 SDValue visitSIGN_EXTEND(SDNode *N);
336 SDValue visitZERO_EXTEND(SDNode *N);
337 SDValue visitANY_EXTEND(SDNode *N);
338 SDValue visitAssertExt(SDNode *N);
339 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
340 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
341 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
342 SDValue visitTRUNCATE(SDNode *N);
343 SDValue visitBITCAST(SDNode *N);
344 SDValue visitBUILD_PAIR(SDNode *N);
345 SDValue visitFADD(SDNode *N);
346 SDValue visitFSUB(SDNode *N);
347 SDValue visitFMUL(SDNode *N);
348 SDValue visitFMA(SDNode *N);
349 SDValue visitFDIV(SDNode *N);
350 SDValue visitFREM(SDNode *N);
351 SDValue visitFSQRT(SDNode *N);
352 SDValue visitFCOPYSIGN(SDNode *N);
353 SDValue visitSINT_TO_FP(SDNode *N);
354 SDValue visitUINT_TO_FP(SDNode *N);
355 SDValue visitFP_TO_SINT(SDNode *N);
356 SDValue visitFP_TO_UINT(SDNode *N);
357 SDValue visitFP_ROUND(SDNode *N);
358 SDValue visitFP_ROUND_INREG(SDNode *N);
359 SDValue visitFP_EXTEND(SDNode *N);
360 SDValue visitFNEG(SDNode *N);
361 SDValue visitFABS(SDNode *N);
362 SDValue visitFCEIL(SDNode *N);
363 SDValue visitFTRUNC(SDNode *N);
364 SDValue visitFFLOOR(SDNode *N);
365 SDValue visitFMINNUM(SDNode *N);
366 SDValue visitFMAXNUM(SDNode *N);
367 SDValue visitBRCOND(SDNode *N);
368 SDValue visitBR_CC(SDNode *N);
369 SDValue visitLOAD(SDNode *N);
371 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
372 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
374 SDValue visitSTORE(SDNode *N);
375 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
376 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
377 SDValue visitBUILD_VECTOR(SDNode *N);
378 SDValue visitCONCAT_VECTORS(SDNode *N);
379 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
380 SDValue visitVECTOR_SHUFFLE(SDNode *N);
381 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
382 SDValue visitINSERT_SUBVECTOR(SDNode *N);
383 SDValue visitMLOAD(SDNode *N);
384 SDValue visitMSTORE(SDNode *N);
385 SDValue visitMGATHER(SDNode *N);
386 SDValue visitMSCATTER(SDNode *N);
387 SDValue visitFP_TO_FP16(SDNode *N);
388 SDValue visitFP16_TO_FP(SDNode *N);
390 SDValue visitFADDForFMACombine(SDNode *N);
391 SDValue visitFSUBForFMACombine(SDNode *N);
392 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
394 SDValue XformToShuffleWithZero(SDNode *N);
395 SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
398 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
400 SDValue foldSelectOfConstants(SDNode *N);
401 SDValue foldVSelectOfConstants(SDNode *N);
402 SDValue foldBinOpIntoSelect(SDNode *BO);
403 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
404 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
405 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
406 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
407 SDValue N2, SDValue N3, ISD::CondCode CC,
408 bool NotExtCompare = false);
409 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
410 SDValue N2, SDValue N3, ISD::CondCode CC);
411 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
413 SDValue unfoldMaskedMerge(SDNode *N);
414 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
415 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
416 const SDLoc &DL, bool foldBooleans);
417 SDValue rebuildSetCC(SDValue N);
419 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
421 bool isOneUseSetCC(SDValue N) const;
423 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
425 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
426 SDValue CombineExtLoad(SDNode *N);
427 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
428 SDValue combineRepeatedFPDivisors(SDNode *N);
429 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
430 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
431 SDValue BuildSDIV(SDNode *N);
432 SDValue BuildSDIVPow2(SDNode *N);
433 SDValue BuildUDIV(SDNode *N);
434 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
435 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
436 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
437 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
438 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
439 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
440 SDNodeFlags Flags, bool Reciprocal);
441 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
442 SDNodeFlags Flags, bool Reciprocal);
443 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
444 bool DemandHighBits = true);
445 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
446 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
447 SDValue InnerPos, SDValue InnerNeg,
448 unsigned PosOpcode, unsigned NegOpcode,
450 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
451 SDValue MatchLoadCombine(SDNode *N);
452 SDValue ReduceLoadWidth(SDNode *N);
453 SDValue ReduceLoadOpStoreWidth(SDNode *N);
454 SDValue splitMergedValStore(StoreSDNode *ST);
455 SDValue TransformFPLoadStorePair(SDNode *N);
456 SDValue convertBuildVecZextToZext(SDNode *N);
457 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
458 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
459 SDValue reduceBuildVecToShuffle(SDNode *N);
460 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
461 ArrayRef<int> VectorMask, SDValue VecIn1,
462 SDValue VecIn2, unsigned LeftIdx);
463 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
465 /// Walk up chain skipping non-aliasing memory nodes,
466 /// looking for aliasing nodes and adding them to the Aliases vector.
467 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
468 SmallVectorImpl<SDValue> &Aliases);
470 /// Return true if there is any possibility that the two addresses overlap.
471 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
473 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
474 /// chain (aliasing node.)
475 SDValue FindBetterChain(SDNode *N, SDValue Chain);
477 /// Try to replace a store and any possibly adjacent stores on
478 /// consecutive chains with better chains. Return true only if St is
481 /// Notice that other chains may still be replaced even if the function
483 bool findBetterNeighborChains(StoreSDNode *St);
485 /// Holds a pointer to an LSBaseSDNode as well as information on where it
486 /// is located in a sequence of memory operations connected by a chain.
488 // Ptr to the mem node.
489 LSBaseSDNode *MemNode;
491 // Offset from the base ptr.
492 int64_t OffsetFromBase;
494 MemOpLink(LSBaseSDNode *N, int64_t Offset)
495 : MemNode(N), OffsetFromBase(Offset) {}
498 /// This is a helper function for visitMUL to check the profitability
499 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
500 /// MulNode is the original multiply, AddNode is (add x, c1),
501 /// and ConstNode is c2.
502 bool isMulAddWithConstProfitable(SDNode *MulNode,
506 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
507 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
508 /// the type of the loaded value to be extended.
509 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
510 EVT LoadResultTy, EVT &ExtVT);
512 /// Helper function to calculate whether the given Load/Store can have its
513 /// width reduced to ExtVT.
514 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
515 EVT &MemVT, unsigned ShAmt = 0);
517 /// Used by BackwardsPropagateMask to find suitable loads.
518 bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
519 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
520 ConstantSDNode *Mask, SDNode *&NodeToMask);
521 /// Attempt to propagate a given AND node back to load leaves so that they
522 /// can be combined into narrow loads.
523 bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
525 /// Helper function for MergeConsecutiveStores which merges the
526 /// component store chains.
527 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
530 /// This is a helper function for MergeConsecutiveStores. When the
531 /// source elements of the consecutive stores are all constants or
532 /// all extracted vector elements, try to merge them into one
533 /// larger store introducing bitcasts if necessary. \return True
534 /// if a merged store was created.
535 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
536 EVT MemVT, unsigned NumStores,
537 bool IsConstantSrc, bool UseVector,
540 /// This is a helper function for MergeConsecutiveStores. Stores
541 /// that potentially may be merged with St are placed in
542 /// StoreNodes. RootNode is a chain predecessor to all store
544 void getStoreMergeCandidates(StoreSDNode *St,
545 SmallVectorImpl<MemOpLink> &StoreNodes,
548 /// Helper function for MergeConsecutiveStores. Checks if
549 /// candidate stores have indirect dependency through their
550 /// operands. RootNode is the predecessor to all stores calculated
551 /// by getStoreMergeCandidates and is used to prune the dependency check.
552 /// \return True if safe to merge.
553 bool checkMergeStoreCandidatesForDependencies(
554 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
557 /// Merge consecutive store operations into a wide store.
558 /// This optimization uses wide integers or vectors when possible.
559 /// \return number of stores that were merged into a merged store (the
560 /// affected nodes are stored as a prefix in \p StoreNodes).
561 bool MergeConsecutiveStores(StoreSDNode *St);
563 /// Try to transform a truncation where C is a constant:
564 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
566 /// \p N needs to be a truncation and its first operand an AND. Other
567 /// requirements are checked by the function (e.g. that trunc is
568 /// single-use) and if missed an empty SDValue is returned.
569 SDValue distributeTruncateThroughAnd(SDNode *N);
571 /// Helper function to determine whether the target supports operation
572 /// given by \p Opcode for type \p VT, that is, whether the operation
573 /// is legal or custom before legalizing operations, and whether is
574 /// legal (but not custom) after legalization.
575 bool hasOperation(unsigned Opcode, EVT VT) {
577 return TLI.isOperationLegal(Opcode, VT);
578 return TLI.isOperationLegalOrCustom(Opcode, VT);
582 /// Runs the dag combiner on all nodes in the work list
583 void Run(CombineLevel AtLevel);
585 SelectionDAG &getDAG() const { return DAG; }
587 /// Returns a type large enough to hold any valid shift amount - before type
588 /// legalization these can be huge.
589 EVT getShiftAmountTy(EVT LHSTy) {
590 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
591 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
594 /// This method returns true if we are running before type legalization or
595 /// if the specified VT is legal.
596 bool isTypeLegal(const EVT &VT) {
597 if (!LegalTypes) return true;
598 return TLI.isTypeLegal(VT);
601 /// Convenience wrapper around TargetLowering::getSetCCResultType
602 EVT getSetCCResultType(EVT VT) const {
603 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
606 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
607 SDValue OrigLoad, SDValue ExtLoad,
608 ISD::NodeType ExtType);
611 /// This class is a DAGUpdateListener that removes any deleted
612 /// nodes from the worklist.
613 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
617 explicit WorklistRemover(DAGCombiner &dc)
618 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
620 void NodeDeleted(SDNode *N, SDNode *E) override {
621 DC.removeFromWorklist(N);
625 } // end anonymous namespace
627 //===----------------------------------------------------------------------===//
628 // TargetLowering::DAGCombinerInfo implementation
629 //===----------------------------------------------------------------------===//
631 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
632 ((DAGCombiner*)DC)->AddToWorklist(N);
635 SDValue TargetLowering::DAGCombinerInfo::
636 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
637 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
640 SDValue TargetLowering::DAGCombinerInfo::
641 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
642 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
645 SDValue TargetLowering::DAGCombinerInfo::
646 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
647 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
650 void TargetLowering::DAGCombinerInfo::
651 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
652 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
655 //===----------------------------------------------------------------------===//
657 //===----------------------------------------------------------------------===//
659 void DAGCombiner::deleteAndRecombine(SDNode *N) {
660 removeFromWorklist(N);
662 // If the operands of this node are only used by the node, they will now be
663 // dead. Make sure to re-visit them and recursively delete dead nodes.
664 for (const SDValue &Op : N->ops())
665 // For an operand generating multiple values, one of the values may
666 // become dead allowing further simplification (e.g. split index
667 // arithmetic from an indexed load).
668 if (Op->hasOneUse() || Op->getNumValues() > 1)
669 AddToWorklist(Op.getNode());
674 /// Return 1 if we can compute the negated form of the specified expression for
675 /// the same cost as the expression itself, or 2 if we can compute the negated
676 /// form more cheaply than the expression itself.
677 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
678 const TargetLowering &TLI,
679 const TargetOptions *Options,
680 unsigned Depth = 0) {
681 // fneg is removable even if it has multiple uses.
682 if (Op.getOpcode() == ISD::FNEG) return 2;
684 // Don't allow anything with multiple uses unless we know it is free.
685 EVT VT = Op.getValueType();
686 const SDNodeFlags Flags = Op->getFlags();
688 if (!(Op.getOpcode() == ISD::FP_EXTEND &&
689 TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
692 // Don't recurse exponentially.
693 if (Depth > 6) return 0;
695 switch (Op.getOpcode()) {
696 default: return false;
697 case ISD::ConstantFP: {
698 if (!LegalOperations)
701 // Don't invert constant FP values after legalization unless the target says
702 // the negated constant is legal.
703 return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
704 TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
707 if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
710 // After operation legalization, it might not be legal to create new FSUBs.
711 if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
714 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
715 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
718 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
719 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
722 // We can't turn -(A-B) into B-A when we honor signed zeros.
723 if (!Options->NoSignedZerosFPMath &&
724 !Flags.hasNoSignedZeros())
727 // fold (fneg (fsub A, B)) -> (fsub B, A)
732 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
733 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
737 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
743 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
748 /// If isNegatibleForFree returns true, return the newly negated expression.
749 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
750 bool LegalOperations, unsigned Depth = 0) {
751 const TargetOptions &Options = DAG.getTarget().Options;
752 // fneg is removable even if it has multiple uses.
753 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
755 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
757 const SDNodeFlags Flags = Op.getNode()->getFlags();
759 switch (Op.getOpcode()) {
760 default: llvm_unreachable("Unknown code");
761 case ISD::ConstantFP: {
762 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
764 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
767 assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
769 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
770 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
771 DAG.getTargetLoweringInfo(), &Options, Depth+1))
772 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
773 GetNegatedExpression(Op.getOperand(0), DAG,
774 LegalOperations, Depth+1),
775 Op.getOperand(1), Flags);
776 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
777 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
778 GetNegatedExpression(Op.getOperand(1), DAG,
779 LegalOperations, Depth+1),
780 Op.getOperand(0), Flags);
782 // fold (fneg (fsub 0, B)) -> B
783 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
785 return Op.getOperand(1);
787 // fold (fneg (fsub A, B)) -> (fsub B, A)
788 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
789 Op.getOperand(1), Op.getOperand(0), Flags);
793 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
794 if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
795 DAG.getTargetLoweringInfo(), &Options, Depth+1))
796 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
797 GetNegatedExpression(Op.getOperand(0), DAG,
798 LegalOperations, Depth+1),
799 Op.getOperand(1), Flags);
801 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
802 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
804 GetNegatedExpression(Op.getOperand(1), DAG,
805 LegalOperations, Depth+1), Flags);
809 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
810 GetNegatedExpression(Op.getOperand(0), DAG,
811 LegalOperations, Depth+1));
813 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
814 GetNegatedExpression(Op.getOperand(0), DAG,
815 LegalOperations, Depth+1),
820 // APInts must be the same size for most operations, this helper
821 // function zero extends the shorter of the pair so that they match.
822 // We provide an Offset so that we can create bitwidths that won't overflow.
823 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
824 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
825 LHS = LHS.zextOrSelf(Bits);
826 RHS = RHS.zextOrSelf(Bits);
829 // Return true if this node is a setcc, or is a select_cc
830 // that selects between the target values used for true and false, making it
831 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
832 // the appropriate nodes based on the type of node we are checking. This
833 // simplifies life a bit for the callers.
834 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
836 if (N.getOpcode() == ISD::SETCC) {
837 LHS = N.getOperand(0);
838 RHS = N.getOperand(1);
839 CC = N.getOperand(2);
843 if (N.getOpcode() != ISD::SELECT_CC ||
844 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
845 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
848 if (TLI.getBooleanContents(N.getValueType()) ==
849 TargetLowering::UndefinedBooleanContent)
852 LHS = N.getOperand(0);
853 RHS = N.getOperand(1);
854 CC = N.getOperand(4);
858 /// Return true if this is a SetCC-equivalent operation with only one use.
859 /// If this is true, it allows the users to invert the operation for free when
860 /// it is profitable to do so.
861 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
863 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
868 static SDValue peekThroughBitcast(SDValue V) {
869 while (V.getOpcode() == ISD::BITCAST)
874 // Returns the SDNode if it is a constant float BuildVector
875 // or constant float.
876 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
877 if (isa<ConstantFPSDNode>(N))
879 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
884 // Determines if it is a constant integer or a build vector of constant
885 // integers (and undefs).
886 // Do not permit build vector implicit truncation.
887 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
888 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
889 return !(Const->isOpaque() && NoOpaques);
890 if (N.getOpcode() != ISD::BUILD_VECTOR)
892 unsigned BitWidth = N.getScalarValueSizeInBits();
893 for (const SDValue &Op : N->op_values()) {
896 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
897 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
898 (Const->isOpaque() && NoOpaques))
904 // Determines if it is a constant null integer or a splatted vector of a
905 // constant null integer (with no undefs).
906 // Build vector implicit truncation is not an issue for null values.
907 static bool isNullConstantOrNullSplatConstant(SDValue N) {
908 // TODO: may want to use peekThroughBitcast() here.
909 if (ConstantSDNode *Splat = isConstOrConstSplat(N))
910 return Splat->isNullValue();
914 // Determines if it is a constant integer of one or a splatted vector of a
915 // constant integer of one (with no undefs).
916 // Do not permit build vector implicit truncation.
917 static bool isOneConstantOrOneSplatConstant(SDValue N) {
918 // TODO: may want to use peekThroughBitcast() here.
919 unsigned BitWidth = N.getScalarValueSizeInBits();
920 if (ConstantSDNode *Splat = isConstOrConstSplat(N))
921 return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
925 // Determines if it is a constant integer of all ones or a splatted vector of a
926 // constant integer of all ones (with no undefs).
927 // Do not permit build vector implicit truncation.
928 static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
929 N = peekThroughBitcast(N);
930 unsigned BitWidth = N.getScalarValueSizeInBits();
931 if (ConstantSDNode *Splat = isConstOrConstSplat(N))
932 return Splat->isAllOnesValue() &&
933 Splat->getAPIntValue().getBitWidth() == BitWidth;
937 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
939 static bool isAnyConstantBuildVector(const SDNode *N) {
940 return ISD::isBuildVectorOfConstantSDNodes(N) ||
941 ISD::isBuildVectorOfConstantFPSDNodes(N);
944 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
946 EVT VT = N0.getValueType();
947 if (N0.getOpcode() == Opc) {
948 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
949 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
950 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
951 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
952 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
955 if (N0.hasOneUse()) {
956 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
958 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
959 if (!OpNode.getNode())
961 AddToWorklist(OpNode.getNode());
962 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
967 if (N1.getOpcode() == Opc) {
968 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
969 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
970 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
971 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
972 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
975 if (N1.hasOneUse()) {
976 // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
978 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
979 if (!OpNode.getNode())
981 AddToWorklist(OpNode.getNode());
982 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
990 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
992 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
994 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
995 To[0].getNode()->dump(&DAG);
996 dbgs() << " and " << NumTo - 1 << " other values\n");
997 for (unsigned i = 0, e = NumTo; i != e; ++i)
998 assert((!To[i].getNode() ||
999 N->getValueType(i) == To[i].getValueType()) &&
1000 "Cannot combine value to value of different type!");
1002 WorklistRemover DeadNodes(*this);
1003 DAG.ReplaceAllUsesWith(N, To);
1005 // Push the new nodes and any users onto the worklist
1006 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1007 if (To[i].getNode()) {
1008 AddToWorklist(To[i].getNode());
1009 AddUsersToWorklist(To[i].getNode());
1014 // Finally, if the node is now dead, remove it from the graph. The node
1015 // may not be dead if the replacement process recursively simplified to
1016 // something else needing this node.
1018 deleteAndRecombine(N);
1019 return SDValue(N, 0);
1023 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1024 // Replace all uses. If any nodes become isomorphic to other nodes and
1025 // are deleted, make sure to remove them from our worklist.
1026 WorklistRemover DeadNodes(*this);
1027 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1029 // Push the new node and any (possibly new) users onto the worklist.
1030 AddToWorklist(TLO.New.getNode());
1031 AddUsersToWorklist(TLO.New.getNode());
1033 // Finally, if the node is now dead, remove it from the graph. The node
1034 // may not be dead if the replacement process recursively simplified to
1035 // something else needing this node.
1036 if (TLO.Old.getNode()->use_empty())
1037 deleteAndRecombine(TLO.Old.getNode());
1040 /// Check the specified integer node value to see if it can be simplified or if
1041 /// things it uses can be simplified by bit propagation. If so, return true.
1042 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1043 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1045 if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1048 // Revisit the node.
1049 AddToWorklist(Op.getNode());
1051 // Replace the old value with the new one.
1053 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1054 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1057 CommitTargetLoweringOpt(TLO);
1061 /// Check the specified vector node value to see if it can be simplified or
1062 /// if things it uses can be simplified as it only uses some of the elements.
1063 /// If so, return true.
1064 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1065 bool AssumeSingleUse) {
1066 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1067 APInt KnownUndef, KnownZero;
1068 if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1069 0, AssumeSingleUse))
1072 // Revisit the node.
1073 AddToWorklist(Op.getNode());
1075 // Replace the old value with the new one.
1077 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1078 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1081 CommitTargetLoweringOpt(TLO);
1085 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1087 EVT VT = Load->getValueType(0);
1088 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1090 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1091 Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1092 WorklistRemover DeadNodes(*this);
1093 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1094 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1095 deleteAndRecombine(Load);
1096 AddToWorklist(Trunc.getNode());
1099 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1102 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1103 LoadSDNode *LD = cast<LoadSDNode>(Op);
1104 EVT MemVT = LD->getMemoryVT();
1105 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1106 : LD->getExtensionType();
1108 return DAG.getExtLoad(ExtType, DL, PVT,
1109 LD->getChain(), LD->getBasePtr(),
1110 MemVT, LD->getMemOperand());
1113 unsigned Opc = Op.getOpcode();
1116 case ISD::AssertSext:
1117 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1118 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1120 case ISD::AssertZext:
1121 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1122 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1124 case ISD::Constant: {
1126 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1127 return DAG.getNode(ExtOpc, DL, PVT, Op);
1131 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1133 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1136 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1137 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1139 EVT OldVT = Op.getValueType();
1141 bool Replace = false;
1142 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1143 if (!NewOp.getNode())
1145 AddToWorklist(NewOp.getNode());
1148 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1149 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1150 DAG.getValueType(OldVT));
1153 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1154 EVT OldVT = Op.getValueType();
1156 bool Replace = false;
1157 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1158 if (!NewOp.getNode())
1160 AddToWorklist(NewOp.getNode());
1163 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1164 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1167 /// Promote the specified integer binary operation if the target indicates it is
1168 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1169 /// i32 since i16 instructions are longer.
1170 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1171 if (!LegalOperations)
1174 EVT VT = Op.getValueType();
1175 if (VT.isVector() || !VT.isInteger())
1178 // If operation type is 'undesirable', e.g. i16 on x86, consider
1180 unsigned Opc = Op.getOpcode();
1181 if (TLI.isTypeDesirableForOp(Opc, VT))
1185 // Consult target whether it is a good idea to promote this operation and
1186 // what's the right type to promote it to.
1187 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1188 assert(PVT != VT && "Don't know what type to promote to!");
1190 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1192 bool Replace0 = false;
1193 SDValue N0 = Op.getOperand(0);
1194 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1196 bool Replace1 = false;
1197 SDValue N1 = Op.getOperand(1);
1198 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1202 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1204 // We are always replacing N0/N1's use in N and only need
1205 // additional replacements if there are additional uses.
1206 Replace0 &= !N0->hasOneUse();
1207 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1209 // Combine Op here so it is preserved past replacements.
1210 CombineTo(Op.getNode(), RV);
1212 // If operands have a use ordering, make sure we deal with
1213 // predecessor first.
1214 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1216 std::swap(NN0, NN1);
1220 AddToWorklist(NN0.getNode());
1221 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1224 AddToWorklist(NN1.getNode());
1225 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1232 /// Promote the specified integer shift operation if the target indicates it is
1233 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1234 /// i32 since i16 instructions are longer.
1235 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1236 if (!LegalOperations)
1239 EVT VT = Op.getValueType();
1240 if (VT.isVector() || !VT.isInteger())
1243 // If operation type is 'undesirable', e.g. i16 on x86, consider
1245 unsigned Opc = Op.getOpcode();
1246 if (TLI.isTypeDesirableForOp(Opc, VT))
1250 // Consult target whether it is a good idea to promote this operation and
1251 // what's the right type to promote it to.
1252 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1253 assert(PVT != VT && "Don't know what type to promote to!");
1255 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1257 bool Replace = false;
1258 SDValue N0 = Op.getOperand(0);
1259 SDValue N1 = Op.getOperand(1);
1260 if (Opc == ISD::SRA)
1261 N0 = SExtPromoteOperand(N0, PVT);
1262 else if (Opc == ISD::SRL)
1263 N0 = ZExtPromoteOperand(N0, PVT);
1265 N0 = PromoteOperand(N0, PVT, Replace);
1272 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1274 AddToWorklist(N0.getNode());
1276 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1278 // Deal with Op being deleted.
1279 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1285 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1286 if (!LegalOperations)
1289 EVT VT = Op.getValueType();
1290 if (VT.isVector() || !VT.isInteger())
1293 // If operation type is 'undesirable', e.g. i16 on x86, consider
1295 unsigned Opc = Op.getOpcode();
1296 if (TLI.isTypeDesirableForOp(Opc, VT))
1300 // Consult target whether it is a good idea to promote this operation and
1301 // what's the right type to promote it to.
1302 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1303 assert(PVT != VT && "Don't know what type to promote to!");
1304 // fold (aext (aext x)) -> (aext x)
1305 // fold (aext (zext x)) -> (zext x)
1306 // fold (aext (sext x)) -> (sext x)
1307 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1308 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1313 bool DAGCombiner::PromoteLoad(SDValue Op) {
1314 if (!LegalOperations)
1317 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1320 EVT VT = Op.getValueType();
1321 if (VT.isVector() || !VT.isInteger())
1324 // If operation type is 'undesirable', e.g. i16 on x86, consider
1326 unsigned Opc = Op.getOpcode();
1327 if (TLI.isTypeDesirableForOp(Opc, VT))
1331 // Consult target whether it is a good idea to promote this operation and
1332 // what's the right type to promote it to.
1333 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1334 assert(PVT != VT && "Don't know what type to promote to!");
1337 SDNode *N = Op.getNode();
1338 LoadSDNode *LD = cast<LoadSDNode>(N);
1339 EVT MemVT = LD->getMemoryVT();
1340 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1341 : LD->getExtensionType();
1342 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1343 LD->getChain(), LD->getBasePtr(),
1344 MemVT, LD->getMemOperand());
1345 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1347 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1348 Result.getNode()->dump(&DAG); dbgs() << '\n');
1349 WorklistRemover DeadNodes(*this);
1350 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1351 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1352 deleteAndRecombine(N);
1353 AddToWorklist(Result.getNode());
1359 /// Recursively delete a node which has no uses and any operands for
1360 /// which it is the only use.
1362 /// Note that this both deletes the nodes and removes them from the worklist.
1363 /// It also adds any nodes who have had a user deleted to the worklist as they
1364 /// may now have only one use and subject to other combines.
1365 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1366 if (!N->use_empty())
1369 SmallSetVector<SDNode *, 16> Nodes;
1372 N = Nodes.pop_back_val();
1376 if (N->use_empty()) {
1377 for (const SDValue &ChildN : N->op_values())
1378 Nodes.insert(ChildN.getNode());
1380 removeFromWorklist(N);
1385 } while (!Nodes.empty());
1389 //===----------------------------------------------------------------------===//
1390 // Main DAG Combiner implementation
1391 //===----------------------------------------------------------------------===//
1393 void DAGCombiner::Run(CombineLevel AtLevel) {
1394 // set the instance variables, so that the various visit routines may use it.
1396 LegalOperations = Level >= AfterLegalizeVectorOps;
1397 LegalTypes = Level >= AfterLegalizeTypes;
1399 // Add all the dag nodes to the worklist.
1400 for (SDNode &Node : DAG.allnodes())
1401 AddToWorklist(&Node);
1403 // Create a dummy node (which is not added to allnodes), that adds a reference
1404 // to the root node, preventing it from being deleted, and tracking any
1405 // changes of the root.
1406 HandleSDNode Dummy(DAG.getRoot());
1408 // While the worklist isn't empty, find a node and try to combine it.
1409 while (!WorklistMap.empty()) {
1411 // The Worklist holds the SDNodes in order, but it may contain null entries.
1413 N = Worklist.pop_back_val();
1416 bool GoodWorklistEntry = WorklistMap.erase(N);
1417 (void)GoodWorklistEntry;
1418 assert(GoodWorklistEntry &&
1419 "Found a worklist entry without a corresponding map entry!");
1421 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1422 // N is deleted from the DAG, since they too may now be dead or may have a
1423 // reduced number of uses, allowing other xforms.
1424 if (recursivelyDeleteUnusedNodes(N))
1427 WorklistRemover DeadNodes(*this);
1429 // If this combine is running after legalizing the DAG, re-legalize any
1430 // nodes pulled off the worklist.
1431 if (Level == AfterLegalizeDAG) {
1432 SmallSetVector<SDNode *, 16> UpdatedNodes;
1433 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1435 for (SDNode *LN : UpdatedNodes) {
1437 AddUsersToWorklist(LN);
1443 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1445 // Add any operands of the new node which have not yet been combined to the
1446 // worklist as well. Because the worklist uniques things already, this
1447 // won't repeatedly process the same operand.
1448 CombinedNodes.insert(N);
1449 for (const SDValue &ChildN : N->op_values())
1450 if (!CombinedNodes.count(ChildN.getNode()))
1451 AddToWorklist(ChildN.getNode());
1453 SDValue RV = combine(N);
1460 // If we get back the same node we passed in, rather than a new node or
1461 // zero, we know that the node must have defined multiple values and
1462 // CombineTo was used. Since CombineTo takes care of the worklist
1463 // mechanics for us, we have no work to do in this case.
1464 if (RV.getNode() == N)
1467 assert(N->getOpcode() != ISD::DELETED_NODE &&
1468 RV.getOpcode() != ISD::DELETED_NODE &&
1469 "Node was deleted but visit returned new node!");
1471 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1473 if (N->getNumValues() == RV.getNode()->getNumValues())
1474 DAG.ReplaceAllUsesWith(N, RV.getNode());
1476 assert(N->getValueType(0) == RV.getValueType() &&
1477 N->getNumValues() == 1 && "Type mismatch");
1478 DAG.ReplaceAllUsesWith(N, &RV);
1481 // Push the new node and any users onto the worklist
1482 AddToWorklist(RV.getNode());
1483 AddUsersToWorklist(RV.getNode());
1485 // Finally, if the node is now dead, remove it from the graph. The node
1486 // may not be dead if the replacement process recursively simplified to
1487 // something else needing this node. This will also take care of adding any
1488 // operands which have lost a user to the worklist.
1489 recursivelyDeleteUnusedNodes(N);
1492 // If the root changed (e.g. it was a dead load, update the root).
1493 DAG.setRoot(Dummy.getValue());
1494 DAG.RemoveDeadNodes();
1497 SDValue DAGCombiner::visit(SDNode *N) {
1498 switch (N->getOpcode()) {
1500 case ISD::TokenFactor: return visitTokenFactor(N);
1501 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1502 case ISD::ADD: return visitADD(N);
1503 case ISD::SUB: return visitSUB(N);
1504 case ISD::ADDC: return visitADDC(N);
1505 case ISD::UADDO: return visitUADDO(N);
1506 case ISD::SUBC: return visitSUBC(N);
1507 case ISD::USUBO: return visitUSUBO(N);
1508 case ISD::ADDE: return visitADDE(N);
1509 case ISD::ADDCARRY: return visitADDCARRY(N);
1510 case ISD::SUBE: return visitSUBE(N);
1511 case ISD::SUBCARRY: return visitSUBCARRY(N);
1512 case ISD::MUL: return visitMUL(N);
1513 case ISD::SDIV: return visitSDIV(N);
1514 case ISD::UDIV: return visitUDIV(N);
1516 case ISD::UREM: return visitREM(N);
1517 case ISD::MULHU: return visitMULHU(N);
1518 case ISD::MULHS: return visitMULHS(N);
1519 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1520 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1521 case ISD::SMULO: return visitSMULO(N);
1522 case ISD::UMULO: return visitUMULO(N);
1526 case ISD::UMAX: return visitIMINMAX(N);
1527 case ISD::AND: return visitAND(N);
1528 case ISD::OR: return visitOR(N);
1529 case ISD::XOR: return visitXOR(N);
1530 case ISD::SHL: return visitSHL(N);
1531 case ISD::SRA: return visitSRA(N);
1532 case ISD::SRL: return visitSRL(N);
1534 case ISD::ROTL: return visitRotate(N);
1535 case ISD::ABS: return visitABS(N);
1536 case ISD::BSWAP: return visitBSWAP(N);
1537 case ISD::BITREVERSE: return visitBITREVERSE(N);
1538 case ISD::CTLZ: return visitCTLZ(N);
1539 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1540 case ISD::CTTZ: return visitCTTZ(N);
1541 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1542 case ISD::CTPOP: return visitCTPOP(N);
1543 case ISD::SELECT: return visitSELECT(N);
1544 case ISD::VSELECT: return visitVSELECT(N);
1545 case ISD::SELECT_CC: return visitSELECT_CC(N);
1546 case ISD::SETCC: return visitSETCC(N);
1547 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1548 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1549 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1550 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1551 case ISD::AssertSext:
1552 case ISD::AssertZext: return visitAssertExt(N);
1553 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1554 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1555 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1556 case ISD::TRUNCATE: return visitTRUNCATE(N);
1557 case ISD::BITCAST: return visitBITCAST(N);
1558 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1559 case ISD::FADD: return visitFADD(N);
1560 case ISD::FSUB: return visitFSUB(N);
1561 case ISD::FMUL: return visitFMUL(N);
1562 case ISD::FMA: return visitFMA(N);
1563 case ISD::FDIV: return visitFDIV(N);
1564 case ISD::FREM: return visitFREM(N);
1565 case ISD::FSQRT: return visitFSQRT(N);
1566 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1567 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1568 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1569 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1570 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1571 case ISD::FP_ROUND: return visitFP_ROUND(N);
1572 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1573 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1574 case ISD::FNEG: return visitFNEG(N);
1575 case ISD::FABS: return visitFABS(N);
1576 case ISD::FFLOOR: return visitFFLOOR(N);
1577 case ISD::FMINNUM: return visitFMINNUM(N);
1578 case ISD::FMAXNUM: return visitFMAXNUM(N);
1579 case ISD::FCEIL: return visitFCEIL(N);
1580 case ISD::FTRUNC: return visitFTRUNC(N);
1581 case ISD::BRCOND: return visitBRCOND(N);
1582 case ISD::BR_CC: return visitBR_CC(N);
1583 case ISD::LOAD: return visitLOAD(N);
1584 case ISD::STORE: return visitSTORE(N);
1585 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1586 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1587 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1588 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1589 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1590 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1591 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1592 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1593 case ISD::MGATHER: return visitMGATHER(N);
1594 case ISD::MLOAD: return visitMLOAD(N);
1595 case ISD::MSCATTER: return visitMSCATTER(N);
1596 case ISD::MSTORE: return visitMSTORE(N);
1597 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1598 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1603 SDValue DAGCombiner::combine(SDNode *N) {
1604 SDValue RV = visit(N);
1606 // If nothing happened, try a target-specific DAG combine.
1607 if (!RV.getNode()) {
1608 assert(N->getOpcode() != ISD::DELETED_NODE &&
1609 "Node was deleted but visit returned NULL!");
1611 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1612 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1614 // Expose the DAG combiner to the target combiner impls.
1615 TargetLowering::DAGCombinerInfo
1616 DagCombineInfo(DAG, Level, false, this);
1618 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1622 // If nothing happened still, try promoting the operation.
1623 if (!RV.getNode()) {
1624 switch (N->getOpcode()) {
1632 RV = PromoteIntBinOp(SDValue(N, 0));
1637 RV = PromoteIntShiftOp(SDValue(N, 0));
1639 case ISD::SIGN_EXTEND:
1640 case ISD::ZERO_EXTEND:
1641 case ISD::ANY_EXTEND:
1642 RV = PromoteExtend(SDValue(N, 0));
1645 if (PromoteLoad(SDValue(N, 0)))
1651 // If N is a commutative binary node, try eliminate it if the commuted
1652 // version is already present in the DAG.
1653 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1654 N->getNumValues() == 1) {
1655 SDValue N0 = N->getOperand(0);
1656 SDValue N1 = N->getOperand(1);
1658 // Constant operands are canonicalized to RHS.
1659 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1660 SDValue Ops[] = {N1, N0};
1661 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1664 return SDValue(CSENode, 0);
1671 /// Given a node, return its input chain if it has one, otherwise return a null
1673 static SDValue getInputChainForNode(SDNode *N) {
1674 if (unsigned NumOps = N->getNumOperands()) {
1675 if (N->getOperand(0).getValueType() == MVT::Other)
1676 return N->getOperand(0);
1677 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1678 return N->getOperand(NumOps-1);
1679 for (unsigned i = 1; i < NumOps-1; ++i)
1680 if (N->getOperand(i).getValueType() == MVT::Other)
1681 return N->getOperand(i);
1686 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1687 // If N has two operands, where one has an input chain equal to the other,
1688 // the 'other' chain is redundant.
1689 if (N->getNumOperands() == 2) {
1690 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1691 return N->getOperand(0);
1692 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1693 return N->getOperand(1);
1696 // Don't simplify token factors if optnone.
1697 if (OptLevel == CodeGenOpt::None)
1700 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1701 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1702 SmallPtrSet<SDNode*, 16> SeenOps;
1703 bool Changed = false; // If we should replace this token factor.
1705 // Start out with this token factor.
1708 // Iterate through token factors. The TFs grows when new token factors are
1710 for (unsigned i = 0; i < TFs.size(); ++i) {
1711 SDNode *TF = TFs[i];
1713 // Check each of the operands.
1714 for (const SDValue &Op : TF->op_values()) {
1715 switch (Op.getOpcode()) {
1716 case ISD::EntryToken:
1717 // Entry tokens don't need to be added to the list. They are
1722 case ISD::TokenFactor:
1723 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1724 // Queue up for processing.
1725 TFs.push_back(Op.getNode());
1726 // Clean up in case the token factor is removed.
1727 AddToWorklist(Op.getNode());
1734 // Only add if it isn't already in the list.
1735 if (SeenOps.insert(Op.getNode()).second)
1744 // Remove Nodes that are chained to another node in the list. Do so
1745 // by walking up chains breath-first stopping when we've seen
1746 // another operand. In general we must climb to the EntryNode, but we can exit
1747 // early if we find all remaining work is associated with just one operand as
1748 // no further pruning is possible.
1750 // List of nodes to search through and original Ops from which they originate.
1751 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1752 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1753 SmallPtrSet<SDNode *, 16> SeenChains;
1754 bool DidPruneOps = false;
1756 unsigned NumLeftToConsider = 0;
1757 for (const SDValue &Op : Ops) {
1758 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1759 OpWorkCount.push_back(1);
1762 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1763 // If this is an Op, we can remove the op from the list. Remark any
1764 // search associated with it as from the current OpNumber.
1765 if (SeenOps.count(Op) != 0) {
1768 unsigned OrigOpNumber = 0;
1769 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1771 assert((OrigOpNumber != Ops.size()) &&
1772 "expected to find TokenFactor Operand");
1773 // Re-mark worklist from OrigOpNumber to OpNumber
1774 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1775 if (Worklist[i].second == OrigOpNumber) {
1776 Worklist[i].second = OpNumber;
1779 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1780 OpWorkCount[OrigOpNumber] = 0;
1781 NumLeftToConsider--;
1783 // Add if it's a new chain
1784 if (SeenChains.insert(Op).second) {
1785 OpWorkCount[OpNumber]++;
1786 Worklist.push_back(std::make_pair(Op, OpNumber));
1790 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1791 // We need at least be consider at least 2 Ops to prune.
1792 if (NumLeftToConsider <= 1)
1794 auto CurNode = Worklist[i].first;
1795 auto CurOpNumber = Worklist[i].second;
1796 assert((OpWorkCount[CurOpNumber] > 0) &&
1797 "Node should not appear in worklist");
1798 switch (CurNode->getOpcode()) {
1799 case ISD::EntryToken:
1800 // Hitting EntryToken is the only way for the search to terminate without
1802 // another operand's search. Prevent us from marking this operand
1804 NumLeftToConsider++;
1806 case ISD::TokenFactor:
1807 for (const SDValue &Op : CurNode->op_values())
1808 AddToWorklist(i, Op.getNode(), CurOpNumber);
1810 case ISD::CopyFromReg:
1811 case ISD::CopyToReg:
1812 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1815 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1816 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1819 OpWorkCount[CurOpNumber]--;
1820 if (OpWorkCount[CurOpNumber] == 0)
1821 NumLeftToConsider--;
1824 // If we've changed things around then replace token factor.
1828 // The entry token is the only possible outcome.
1829 Result = DAG.getEntryNode();
1832 SmallVector<SDValue, 8> PrunedOps;
1834 for (const SDValue &Op : Ops) {
1835 if (SeenChains.count(Op.getNode()) == 0)
1836 PrunedOps.push_back(Op);
1838 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1840 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1848 /// MERGE_VALUES can always be eliminated.
1849 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1850 WorklistRemover DeadNodes(*this);
1851 // Replacing results may cause a different MERGE_VALUES to suddenly
1852 // be CSE'd with N, and carry its uses with it. Iterate until no
1853 // uses remain, to ensure that the node can be safely deleted.
1854 // First add the users of this node to the work list so that they
1855 // can be tried again once they have new operands.
1856 AddUsersToWorklist(N);
1858 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1859 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1860 } while (!N->use_empty());
1861 deleteAndRecombine(N);
1862 return SDValue(N, 0); // Return N so it doesn't get rechecked!
1865 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1866 /// ConstantSDNode pointer else nullptr.
1867 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1868 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1869 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1872 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1873 auto BinOpcode = BO->getOpcode();
1874 assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1875 BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1876 BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1877 BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1878 BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1879 BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1880 BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1881 BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1882 BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1883 "Unexpected binary operator");
1885 // Don't do this unless the old select is going away. We want to eliminate the
1886 // binary operator, not replace a binop with a select.
1887 // TODO: Handle ISD::SELECT_CC.
1888 unsigned SelOpNo = 0;
1889 SDValue Sel = BO->getOperand(0);
1890 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1892 Sel = BO->getOperand(1);
1895 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1898 SDValue CT = Sel.getOperand(1);
1899 if (!isConstantOrConstantVector(CT, true) &&
1900 !isConstantFPBuildVectorOrConstantFP(CT))
1903 SDValue CF = Sel.getOperand(2);
1904 if (!isConstantOrConstantVector(CF, true) &&
1905 !isConstantFPBuildVectorOrConstantFP(CF))
1908 // Bail out if any constants are opaque because we can't constant fold those.
1909 // The exception is "and" and "or" with either 0 or -1 in which case we can
1910 // propagate non constant operands into select. I.e.:
1911 // and (select Cond, 0, -1), X --> select Cond, 0, X
1912 // or X, (select Cond, -1, 0) --> select Cond, -1, X
1913 bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1914 (isNullConstantOrNullSplatConstant(CT) ||
1915 isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
1916 (isNullConstantOrNullSplatConstant(CF) ||
1917 isAllOnesConstantOrAllOnesSplatConstant(CF));
1919 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1920 if (!CanFoldNonConst &&
1921 !isConstantOrConstantVector(CBO, true) &&
1922 !isConstantFPBuildVectorOrConstantFP(CBO))
1925 EVT VT = Sel.getValueType();
1927 // In case of shift value and shift amount may have different VT. For instance
1928 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1929 // swapped operands and value types do not match. NB: x86 is fine if operands
1930 // are not swapped with shift amount VT being not bigger than shifted value.
1931 // TODO: that is possible to check for a shift operation, correct VTs and
1932 // still perform optimization on x86 if needed.
1933 if (SelOpNo && VT != CBO.getValueType())
1936 // We have a select-of-constants followed by a binary operator with a
1937 // constant. Eliminate the binop by pulling the constant math into the select.
1938 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1940 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1941 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1942 if (!CanFoldNonConst && !NewCT.isUndef() &&
1943 !isConstantOrConstantVector(NewCT, true) &&
1944 !isConstantFPBuildVectorOrConstantFP(NewCT))
1947 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1948 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1949 if (!CanFoldNonConst && !NewCF.isUndef() &&
1950 !isConstantOrConstantVector(NewCF, true) &&
1951 !isConstantFPBuildVectorOrConstantFP(NewCF))
1954 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1957 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1958 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1959 "Expecting add or sub");
1961 // Match a constant operand and a zext operand for the math instruction:
1964 bool IsAdd = N->getOpcode() == ISD::ADD;
1965 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1966 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1967 auto *CN = dyn_cast<ConstantSDNode>(C);
1968 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1971 // Match the zext operand as a setcc of a boolean.
1972 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1973 Z.getOperand(0).getValueType() != MVT::i1)
1976 // Match the compare as: setcc (X & 1), 0, eq.
1977 SDValue SetCC = Z.getOperand(0);
1978 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1979 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1980 SetCC.getOperand(0).getOpcode() != ISD::AND ||
1981 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1984 // We are adding/subtracting a constant and an inverted low bit. Turn that
1985 // into a subtract/add of the low bit with incremented/decremented constant:
1986 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1987 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1988 EVT VT = C.getValueType();
1990 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1991 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1992 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1993 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1996 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1997 /// a shift and add with a different constant.
1998 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
1999 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2000 "Expecting add or sub");
2002 // We need a constant operand for the add/sub, and the other operand is a
2003 // logical shift right: add (srl), C or sub C, (srl).
2004 bool IsAdd = N->getOpcode() == ISD::ADD;
2005 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2006 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2007 ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2008 if (!C || ShiftOp.getOpcode() != ISD::SRL)
2011 // The shift must be of a 'not' value.
2012 // TODO: Use isBitwiseNot() if it works with vectors.
2013 SDValue Not = ShiftOp.getOperand(0);
2014 if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR ||
2015 !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1)))
2018 // The shift must be moving the sign bit to the least-significant-bit.
2019 EVT VT = ShiftOp.getValueType();
2020 SDValue ShAmt = ShiftOp.getOperand(1);
2021 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2022 if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2025 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2026 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2027 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2029 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2030 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2031 APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2032 return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2035 SDValue DAGCombiner::visitADD(SDNode *N) {
2036 SDValue N0 = N->getOperand(0);
2037 SDValue N1 = N->getOperand(1);
2038 EVT VT = N0.getValueType();
2042 if (VT.isVector()) {
2043 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2046 // fold (add x, 0) -> x, vector edition
2047 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2049 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2053 // fold (add x, undef) -> undef
2060 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2061 // canonicalize constant to RHS
2062 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2063 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2064 // fold (add c1, c2) -> c1+c2
2065 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2069 // fold (add x, 0) -> x
2070 if (isNullConstant(N1))
2073 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2074 // fold ((c1-A)+c2) -> (c1+c2)-A
2075 if (N0.getOpcode() == ISD::SUB &&
2076 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2077 // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2078 return DAG.getNode(ISD::SUB, DL, VT,
2079 DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2083 // add (sext i1 X), 1 -> zext (not i1 X)
2084 // We don't transform this pattern:
2085 // add (zext i1 X), -1 -> sext (not i1 X)
2086 // because most (?) targets generate better code for the zext form.
2087 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2088 isOneConstantOrOneSplatConstant(N1)) {
2089 SDValue X = N0.getOperand(0);
2090 if ((!LegalOperations ||
2091 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2092 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2093 X.getScalarValueSizeInBits() == 1) {
2094 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2095 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2099 // Undo the add -> or combine to merge constant offsets from a frame index.
2100 if (N0.getOpcode() == ISD::OR &&
2101 isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2102 isa<ConstantSDNode>(N0.getOperand(1)) &&
2103 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2104 SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2105 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2109 if (SDValue NewSel = foldBinOpIntoSelect(N))
2113 if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
2116 // fold ((0-A) + B) -> B-A
2117 if (N0.getOpcode() == ISD::SUB &&
2118 isNullConstantOrNullSplatConstant(N0.getOperand(0)))
2119 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2121 // fold (A + (0-B)) -> A-B
2122 if (N1.getOpcode() == ISD::SUB &&
2123 isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2124 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2126 // fold (A+(B-A)) -> B
2127 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2128 return N1.getOperand(0);
2130 // fold ((B-A)+A) -> B
2131 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2132 return N0.getOperand(0);
2134 // fold (A+(B-(A+C))) to (B-C)
2135 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2136 N0 == N1.getOperand(1).getOperand(0))
2137 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2138 N1.getOperand(1).getOperand(1));
2140 // fold (A+(B-(C+A))) to (B-C)
2141 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2142 N0 == N1.getOperand(1).getOperand(1))
2143 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2144 N1.getOperand(1).getOperand(0));
2146 // fold (A+((B-A)+or-C)) to (B+or-C)
2147 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2148 N1.getOperand(0).getOpcode() == ISD::SUB &&
2149 N0 == N1.getOperand(0).getOperand(1))
2150 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2153 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2154 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2155 SDValue N00 = N0.getOperand(0);
2156 SDValue N01 = N0.getOperand(1);
2157 SDValue N10 = N1.getOperand(0);
2158 SDValue N11 = N1.getOperand(1);
2160 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2161 return DAG.getNode(ISD::SUB, DL, VT,
2162 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2163 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2166 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2169 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2172 if (SimplifyDemandedBits(SDValue(N, 0)))
2173 return SDValue(N, 0);
2175 // fold (a+b) -> (a|b) iff a and b share no bits.
2176 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2177 DAG.haveNoCommonBitsSet(N0, N1))
2178 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2180 // fold (add (xor a, -1), 1) -> (sub 0, a)
2181 if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1))
2182 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2185 if (SDValue Combined = visitADDLike(N0, N1, N))
2188 if (SDValue Combined = visitADDLike(N1, N0, N))
2194 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2195 bool Masked = false;
2197 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2199 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2200 V = V.getOperand(0);
2204 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2206 V = V.getOperand(0);
2213 // If this is not a carry, return.
2214 if (V.getResNo() != 1)
2217 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2218 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2221 // If the result is masked, then no matter what kind of bool it is we can
2222 // return. If it isn't, then we need to make sure the bool type is either 0 or
2223 // 1 and not other values.
2225 TLI.getBooleanContents(V.getValueType()) ==
2226 TargetLoweringBase::ZeroOrOneBooleanContent)
2232 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2233 EVT VT = N0.getValueType();
2234 SDLoc DL(LocReference);
2236 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2237 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2238 isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
2239 return DAG.getNode(ISD::SUB, DL, VT, N0,
2240 DAG.getNode(ISD::SHL, DL, VT,
2241 N1.getOperand(0).getOperand(1),
2244 if (N1.getOpcode() == ISD::AND) {
2245 SDValue AndOp0 = N1.getOperand(0);
2246 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2247 unsigned DestBits = VT.getScalarSizeInBits();
2249 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2250 // and similar xforms where the inner op is either ~0 or 0.
2251 if (NumSignBits == DestBits &&
2252 isOneConstantOrOneSplatConstant(N1->getOperand(1)))
2253 return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2256 // add (sext i1), X -> sub X, (zext i1)
2257 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2258 N0.getOperand(0).getValueType() == MVT::i1 &&
2259 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
2260 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2261 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2264 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2265 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2266 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2267 if (TN->getVT() == MVT::i1) {
2268 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2269 DAG.getConstant(1, DL, VT));
2270 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2274 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2275 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2277 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2278 N0, N1.getOperand(0), N1.getOperand(2));
2280 // (add X, Carry) -> (addcarry X, 0, Carry)
2281 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2282 if (SDValue Carry = getAsCarry(TLI, N1))
2283 return DAG.getNode(ISD::ADDCARRY, DL,
2284 DAG.getVTList(VT, Carry.getValueType()), N0,
2285 DAG.getConstant(0, DL, VT), Carry);
2290 SDValue DAGCombiner::visitADDC(SDNode *N) {
2291 SDValue N0 = N->getOperand(0);
2292 SDValue N1 = N->getOperand(1);
2293 EVT VT = N0.getValueType();
2296 // If the flag result is dead, turn this into an ADD.
2297 if (!N->hasAnyUseOfValue(1))
2298 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2299 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2301 // canonicalize constant to RHS.
2302 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2303 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2305 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2307 // fold (addc x, 0) -> x + no carry out
2308 if (isNullConstant(N1))
2309 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2312 // If it cannot overflow, transform into an add.
2313 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2314 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2315 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2320 static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
2321 SelectionDAG &DAG, const TargetLowering &TLI) {
2323 switch (TLI.getBooleanContents(VT)) {
2324 case TargetLowering::ZeroOrOneBooleanContent:
2325 case TargetLowering::UndefinedBooleanContent:
2326 Cst = DAG.getConstant(1, DL, VT);
2328 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2329 Cst = DAG.getConstant(-1, DL, VT);
2333 return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2336 static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
2337 if (V.getOpcode() != ISD::XOR) return false;
2338 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
2339 if (!Const) return false;
2341 switch(TLI.getBooleanContents(VT)) {
2342 case TargetLowering::ZeroOrOneBooleanContent:
2343 return Const->isOne();
2344 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2345 return Const->isAllOnesValue();
2346 case TargetLowering::UndefinedBooleanContent:
2347 return (Const->getAPIntValue() & 0x01) == 1;
2349 llvm_unreachable("Unsupported boolean content");
2352 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2353 SDValue N0 = N->getOperand(0);
2354 SDValue N1 = N->getOperand(1);
2355 EVT VT = N0.getValueType();
2359 EVT CarryVT = N->getValueType(1);
2362 // If the flag result is dead, turn this into an ADD.
2363 if (!N->hasAnyUseOfValue(1))
2364 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2365 DAG.getUNDEF(CarryVT));
2367 // canonicalize constant to RHS.
2368 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2369 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2371 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2373 // fold (uaddo x, 0) -> x + no carry out
2374 if (isNullConstant(N1))
2375 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2377 // If it cannot overflow, transform into an add.
2378 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2379 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2380 DAG.getConstant(0, DL, CarryVT));
2382 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2383 if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) {
2384 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2385 DAG.getConstant(0, DL, VT),
2387 return CombineTo(N, Sub,
2388 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2391 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2394 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2400 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2401 auto VT = N0.getValueType();
2403 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2404 // If Y + 1 cannot overflow.
2405 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2406 SDValue Y = N1.getOperand(0);
2407 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2408 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2409 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2413 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2414 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2415 if (SDValue Carry = getAsCarry(TLI, N1))
2416 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2417 DAG.getConstant(0, SDLoc(N), VT), Carry);
2422 SDValue DAGCombiner::visitADDE(SDNode *N) {
2423 SDValue N0 = N->getOperand(0);
2424 SDValue N1 = N->getOperand(1);
2425 SDValue CarryIn = N->getOperand(2);
2427 // canonicalize constant to RHS
2428 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2429 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2431 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2434 // fold (adde x, y, false) -> (addc x, y)
2435 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2436 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2441 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2442 SDValue N0 = N->getOperand(0);
2443 SDValue N1 = N->getOperand(1);
2444 SDValue CarryIn = N->getOperand(2);
2447 // canonicalize constant to RHS
2448 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2449 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2451 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2453 // fold (addcarry x, y, false) -> (uaddo x, y)
2454 if (isNullConstant(CarryIn)) {
2455 if (!LegalOperations ||
2456 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2457 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2460 EVT CarryVT = CarryIn.getValueType();
2462 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2463 if (isNullConstant(N0) && isNullConstant(N1)) {
2464 EVT VT = N0.getValueType();
2465 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2466 AddToWorklist(CarryExt.getNode());
2467 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2468 DAG.getConstant(1, DL, VT)),
2469 DAG.getConstant(0, DL, CarryVT));
2472 // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2473 if (isBitwiseNot(N0) && isNullConstant(N1) &&
2474 isBooleanFlip(CarryIn, CarryVT, TLI)) {
2475 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2476 DAG.getConstant(0, DL, N0.getValueType()),
2477 N0.getOperand(0), CarryIn.getOperand(0));
2478 return CombineTo(N, Sub,
2479 flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
2482 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2485 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2491 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2493 // Iff the flag result is dead:
2494 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2495 if ((N0.getOpcode() == ISD::ADD ||
2496 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2497 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2498 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2499 N0.getOperand(0), N0.getOperand(1), CarryIn);
2502 * When one of the addcarry argument is itself a carry, we may be facing
2503 * a diamond carry propagation. In which case we try to transform the DAG
2504 * to ensure linear carry propagation if that is possible.
2506 * We are trying to get:
2507 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2509 if (auto Y = getAsCarry(TLI, N1)) {
2515 * | (addcarry *, 0, Z)
2519 * (addcarry X, *, *)
2521 if (Y.getOpcode() == ISD::UADDO &&
2522 CarryIn.getResNo() == 1 &&
2523 CarryIn.getOpcode() == ISD::ADDCARRY &&
2524 isNullConstant(CarryIn.getOperand(1)) &&
2525 CarryIn.getOperand(0) == Y.getValue(0)) {
2526 auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2527 Y.getOperand(0), Y.getOperand(1),
2528 CarryIn.getOperand(2));
2529 AddToWorklist(NewY.getNode());
2530 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2531 DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2539 // Since it may not be valid to emit a fold to zero for vector initializers
2540 // check if we can before folding.
2541 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2542 SelectionDAG &DAG, bool LegalOperations,
2545 return DAG.getConstant(0, DL, VT);
2546 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2547 return DAG.getConstant(0, DL, VT);
2551 SDValue DAGCombiner::visitSUB(SDNode *N) {
2552 SDValue N0 = N->getOperand(0);
2553 SDValue N1 = N->getOperand(1);
2554 EVT VT = N0.getValueType();
2558 if (VT.isVector()) {
2559 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2562 // fold (sub x, 0) -> x, vector edition
2563 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2567 // fold (sub x, x) -> 0
2568 // FIXME: Refactor this and xor and other similar operations together.
2570 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2571 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2572 DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2573 // fold (sub c1, c2) -> c1-c2
2574 return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2578 if (SDValue NewSel = foldBinOpIntoSelect(N))
2581 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2583 // fold (sub x, c) -> (add x, -c)
2585 return DAG.getNode(ISD::ADD, DL, VT, N0,
2586 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2589 if (isNullConstantOrNullSplatConstant(N0)) {
2590 unsigned BitWidth = VT.getScalarSizeInBits();
2591 // Right-shifting everything out but the sign bit followed by negation is
2592 // the same as flipping arithmetic/logical shift type without the negation:
2593 // -(X >>u 31) -> (X >>s 31)
2594 // -(X >>s 31) -> (X >>u 31)
2595 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2596 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2597 if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2598 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2599 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2600 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2604 // 0 - X --> 0 if the sub is NUW.
2605 if (N->getFlags().hasNoUnsignedWrap())
2608 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2609 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2610 // N1 must be 0 because negating the minimum signed value is undefined.
2611 if (N->getFlags().hasNoSignedWrap())
2614 // 0 - X --> X if X is 0 or the minimum signed value.
2619 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2620 if (isAllOnesConstantOrAllOnesSplatConstant(N0))
2621 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2623 // fold (A - (0-B)) -> A+B
2624 if (N1.getOpcode() == ISD::SUB &&
2625 isNullConstantOrNullSplatConstant(N1.getOperand(0)))
2626 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2628 // fold A-(A-B) -> B
2629 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2630 return N1.getOperand(1);
2632 // fold (A+B)-A -> B
2633 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2634 return N0.getOperand(1);
2636 // fold (A+B)-B -> A
2637 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2638 return N0.getOperand(0);
2640 // fold C2-(A+C1) -> (C2-C1)-A
2641 if (N1.getOpcode() == ISD::ADD) {
2642 SDValue N11 = N1.getOperand(1);
2643 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2644 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2645 SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2646 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2650 // fold ((A+(B+or-C))-B) -> A+or-C
2651 if (N0.getOpcode() == ISD::ADD &&
2652 (N0.getOperand(1).getOpcode() == ISD::SUB ||
2653 N0.getOperand(1).getOpcode() == ISD::ADD) &&
2654 N0.getOperand(1).getOperand(0) == N1)
2655 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2656 N0.getOperand(1).getOperand(1));
2658 // fold ((A+(C+B))-B) -> A+C
2659 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2660 N0.getOperand(1).getOperand(1) == N1)
2661 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2662 N0.getOperand(1).getOperand(0));
2664 // fold ((A-(B-C))-C) -> A-B
2665 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2666 N0.getOperand(1).getOperand(1) == N1)
2667 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2668 N0.getOperand(1).getOperand(0));
2670 // fold (A-(B-C)) -> A+(C-B)
2671 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2672 return DAG.getNode(ISD::ADD, DL, VT, N0,
2673 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2676 // fold (X - (-Y * Z)) -> (X + (Y * Z))
2677 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2678 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2679 isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) {
2680 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2681 N1.getOperand(0).getOperand(1),
2683 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2685 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2686 isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) {
2687 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2689 N1.getOperand(1).getOperand(1));
2690 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2694 // If either operand of a sub is undef, the result is undef
2700 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2703 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2706 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2707 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2708 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2709 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2710 SDValue S0 = N1.getOperand(0);
2711 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2712 unsigned OpSizeInBits = VT.getScalarSizeInBits();
2713 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2714 if (C->getAPIntValue() == (OpSizeInBits - 1))
2715 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2720 // If the relocation model supports it, consider symbol offsets.
2721 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2722 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2723 // fold (sub Sym, c) -> Sym-c
2724 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2725 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2727 (uint64_t)N1C->getSExtValue());
2728 // fold (sub Sym+c1, Sym+c2) -> c1-c2
2729 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2730 if (GA->getGlobal() == GB->getGlobal())
2731 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2735 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2736 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2737 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2738 if (TN->getVT() == MVT::i1) {
2739 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2740 DAG.getConstant(1, DL, VT));
2741 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2745 // Prefer an add for more folding potential and possibly better codegen:
2746 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2747 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2748 SDValue ShAmt = N1.getOperand(1);
2749 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2750 if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2751 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2752 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2759 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2760 SDValue N0 = N->getOperand(0);
2761 SDValue N1 = N->getOperand(1);
2762 EVT VT = N0.getValueType();
2765 // If the flag result is dead, turn this into an SUB.
2766 if (!N->hasAnyUseOfValue(1))
2767 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2768 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2770 // fold (subc x, x) -> 0 + no borrow
2772 return CombineTo(N, DAG.getConstant(0, DL, VT),
2773 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2775 // fold (subc x, 0) -> x + no borrow
2776 if (isNullConstant(N1))
2777 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2779 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2780 if (isAllOnesConstant(N0))
2781 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2782 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2787 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2788 SDValue N0 = N->getOperand(0);
2789 SDValue N1 = N->getOperand(1);
2790 EVT VT = N0.getValueType();
2794 EVT CarryVT = N->getValueType(1);
2797 // If the flag result is dead, turn this into an SUB.
2798 if (!N->hasAnyUseOfValue(1))
2799 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2800 DAG.getUNDEF(CarryVT));
2802 // fold (usubo x, x) -> 0 + no borrow
2804 return CombineTo(N, DAG.getConstant(0, DL, VT),
2805 DAG.getConstant(0, DL, CarryVT));
2807 // fold (usubo x, 0) -> x + no borrow
2808 if (isNullConstant(N1))
2809 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2811 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2812 if (isAllOnesConstant(N0))
2813 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2814 DAG.getConstant(0, DL, CarryVT));
2819 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2820 SDValue N0 = N->getOperand(0);
2821 SDValue N1 = N->getOperand(1);
2822 SDValue CarryIn = N->getOperand(2);
2824 // fold (sube x, y, false) -> (subc x, y)
2825 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2826 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2831 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2832 SDValue N0 = N->getOperand(0);
2833 SDValue N1 = N->getOperand(1);
2834 SDValue CarryIn = N->getOperand(2);
2836 // fold (subcarry x, y, false) -> (usubo x, y)
2837 if (isNullConstant(CarryIn)) {
2838 if (!LegalOperations ||
2839 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2840 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2846 SDValue DAGCombiner::visitMUL(SDNode *N) {
2847 SDValue N0 = N->getOperand(0);
2848 SDValue N1 = N->getOperand(1);
2849 EVT VT = N0.getValueType();
2851 // fold (mul x, undef) -> 0
2852 if (N0.isUndef() || N1.isUndef())
2853 return DAG.getConstant(0, SDLoc(N), VT);
2855 bool N0IsConst = false;
2856 bool N1IsConst = false;
2857 bool N1IsOpaqueConst = false;
2858 bool N0IsOpaqueConst = false;
2859 APInt ConstValue0, ConstValue1;
2861 if (VT.isVector()) {
2862 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2865 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2866 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2867 assert((!N0IsConst ||
2868 ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2869 "Splat APInt should be element width");
2870 assert((!N1IsConst ||
2871 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2872 "Splat APInt should be element width");
2874 N0IsConst = isa<ConstantSDNode>(N0);
2876 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2877 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2879 N1IsConst = isa<ConstantSDNode>(N1);
2881 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2882 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2886 // fold (mul c1, c2) -> c1*c2
2887 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2888 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2889 N0.getNode(), N1.getNode());
2891 // canonicalize constant to RHS (vector doesn't have to splat)
2892 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2893 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2894 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2895 // fold (mul x, 0) -> 0
2896 if (N1IsConst && ConstValue1.isNullValue())
2898 // fold (mul x, 1) -> x
2899 if (N1IsConst && ConstValue1.isOneValue())
2902 if (SDValue NewSel = foldBinOpIntoSelect(N))
2905 // fold (mul x, -1) -> 0-x
2906 if (N1IsConst && ConstValue1.isAllOnesValue()) {
2908 return DAG.getNode(ISD::SUB, DL, VT,
2909 DAG.getConstant(0, DL, VT), N0);
2911 // fold (mul x, (1 << c)) -> x << c
2912 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2913 DAG.isKnownToBeAPowerOfTwo(N1) &&
2914 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2916 SDValue LogBase2 = BuildLogBase2(N1, DL);
2917 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2918 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2919 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2921 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2922 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2923 unsigned Log2Val = (-ConstValue1).logBase2();
2925 // FIXME: If the input is something that is easily negated (e.g. a
2926 // single-use add), we should put the negate there.
2927 return DAG.getNode(ISD::SUB, DL, VT,
2928 DAG.getConstant(0, DL, VT),
2929 DAG.getNode(ISD::SHL, DL, VT, N0,
2930 DAG.getConstant(Log2Val, DL,
2931 getShiftAmountTy(N0.getValueType()))));
2934 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2935 if (N0.getOpcode() == ISD::SHL &&
2936 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2937 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2938 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2939 if (isConstantOrConstantVector(C3))
2940 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2943 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2946 SDValue Sh(nullptr, 0), Y(nullptr, 0);
2948 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2949 if (N0.getOpcode() == ISD::SHL &&
2950 isConstantOrConstantVector(N0.getOperand(1)) &&
2951 N0.getNode()->hasOneUse()) {
2953 } else if (N1.getOpcode() == ISD::SHL &&
2954 isConstantOrConstantVector(N1.getOperand(1)) &&
2955 N1.getNode()->hasOneUse()) {
2960 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2961 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2965 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2966 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2967 N0.getOpcode() == ISD::ADD &&
2968 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2969 isMulAddWithConstProfitable(N, N0, N1))
2970 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2971 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2972 N0.getOperand(0), N1),
2973 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2974 N0.getOperand(1), N1));
2977 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2983 /// Return true if divmod libcall is available.
2984 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2985 const TargetLowering &TLI) {
2987 EVT NodeType = Node->getValueType(0);
2988 if (!NodeType.isSimple())
2990 switch (NodeType.getSimpleVT().SimpleTy) {
2991 default: return false; // No libcall for vector types.
2992 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
2993 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2994 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2995 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2996 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2999 return TLI.getLibcallName(LC) != nullptr;
3002 /// Issue divrem if both quotient and remainder are needed.
3003 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3004 if (Node->use_empty())
3005 return SDValue(); // This is a dead node, leave it alone.
3007 unsigned Opcode = Node->getOpcode();
3008 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3009 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3011 // DivMod lib calls can still work on non-legal types if using lib-calls.
3012 EVT VT = Node->getValueType(0);
3013 if (VT.isVector() || !VT.isInteger())
3016 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3019 // If DIVREM is going to get expanded into a libcall,
3020 // but there is no libcall available, then don't combine.
3021 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3022 !isDivRemLibcallAvailable(Node, isSigned, TLI))
3025 // If div is legal, it's better to do the normal expansion
3026 unsigned OtherOpcode = 0;
3027 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3028 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3029 if (TLI.isOperationLegalOrCustom(Opcode, VT))
3032 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3033 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3037 SDValue Op0 = Node->getOperand(0);
3038 SDValue Op1 = Node->getOperand(1);
3040 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3041 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3043 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3046 // Convert the other matching node(s), too;
3047 // otherwise, the DIVREM may get target-legalized into something
3048 // target-specific that we won't be able to recognize.
3049 unsigned UserOpc = User->getOpcode();
3050 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3051 User->getOperand(0) == Op0 &&
3052 User->getOperand(1) == Op1) {
3054 if (UserOpc == OtherOpcode) {
3055 SDVTList VTs = DAG.getVTList(VT, VT);
3056 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3057 } else if (UserOpc == DivRemOpc) {
3058 combined = SDValue(User, 0);
3060 assert(UserOpc == Opcode);
3064 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3065 CombineTo(User, combined);
3066 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3067 CombineTo(User, combined.getValue(1));
3073 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3074 SDValue N0 = N->getOperand(0);
3075 SDValue N1 = N->getOperand(1);
3076 EVT VT = N->getValueType(0);
3079 if (DAG.isUndef(N->getOpcode(), {N0, N1}))
3080 return DAG.getUNDEF(VT);
3085 return DAG.getConstant(0, DL, VT);
3090 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3091 SDValue N0 = N->getOperand(0);
3092 SDValue N1 = N->getOperand(1);
3093 EVT VT = N->getValueType(0);
3094 EVT CCVT = getSetCCResultType(VT);
3098 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3103 // fold (sdiv c1, c2) -> c1/c2
3104 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3105 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3106 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3107 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3108 // fold (sdiv X, 1) -> X
3109 if (N1C && N1C->isOne())
3111 // fold (sdiv X, -1) -> 0-X
3112 if (N1C && N1C->isAllOnesValue())
3113 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3114 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3115 if (N1C && N1C->getAPIntValue().isMinSignedValue())
3116 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3117 DAG.getConstant(1, DL, VT),
3118 DAG.getConstant(0, DL, VT));
3120 if (SDValue V = simplifyDivRem(N, DAG))
3123 if (SDValue NewSel = foldBinOpIntoSelect(N))
3126 // If we know the sign bits of both operands are zero, strength reduce to a
3127 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3128 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3129 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3131 if (SDValue V = visitSDIVLike(N0, N1, N))
3134 // sdiv, srem -> sdivrem
3135 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3136 // true. Otherwise, we break the simplification logic in visitREM().
3137 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3138 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3139 if (SDValue DivRem = useDivRem(N))
3145 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3147 EVT VT = N->getValueType(0);
3148 EVT CCVT = getSetCCResultType(VT);
3149 unsigned BitWidth = VT.getScalarSizeInBits();
3151 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3153 // Helper for determining whether a value is a power-2 constant scalar or a
3154 // vector of such elements.
3155 auto IsPowerOfTwo = [](ConstantSDNode *C) {
3156 if (C->isNullValue() || C->isOpaque())
3158 if (C->getAPIntValue().isPowerOf2())
3160 if ((-C->getAPIntValue()).isPowerOf2())
3165 // fold (sdiv X, pow2) -> simple ops after legalize
3166 // FIXME: We check for the exact bit here because the generic lowering gives
3167 // better results in that case. The target-specific lowering should learn how
3168 // to handle exact sdivs efficiently.
3169 if (!N->getFlags().hasExact() &&
3170 ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
3171 // Target-specific implementation of sdiv x, pow2.
3172 if (SDValue Res = BuildSDIVPow2(N))
3175 // Create constants that are functions of the shift amount value.
3176 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3177 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3178 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3179 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3180 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3181 if (!isConstantOrConstantVector(Inexact))
3184 // Splat the sign bit into the register
3185 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3186 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3187 AddToWorklist(Sign.getNode());
3189 // Add (N0 < 0) ? abs2 - 1 : 0;
3190 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3191 AddToWorklist(Srl.getNode());
3192 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3193 AddToWorklist(Add.getNode());
3194 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3195 AddToWorklist(Sra.getNode());
3197 // Special case: (sdiv X, 1) -> X
3198 // Special Case: (sdiv X, -1) -> 0-X
3199 SDValue One = DAG.getConstant(1, DL, VT);
3200 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3201 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3202 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3203 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3204 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3206 // If dividing by a positive value, we're done. Otherwise, the result must
3208 SDValue Zero = DAG.getConstant(0, DL, VT);
3209 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3211 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3212 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3213 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3217 // If integer divide is expensive and we satisfy the requirements, emit an
3218 // alternate sequence. Targets may check function attributes for size/speed
3220 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3221 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3222 if (SDValue Op = BuildSDIV(N))
3228 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3229 SDValue N0 = N->getOperand(0);
3230 SDValue N1 = N->getOperand(1);
3231 EVT VT = N->getValueType(0);
3232 EVT CCVT = getSetCCResultType(VT);
3236 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3241 // fold (udiv c1, c2) -> c1/c2
3242 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3243 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3245 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3248 // fold (udiv X, 1) -> X
3249 if (N1C && N1C->isOne())
3251 // fold (udiv X, -1) -> select(X == -1, 1, 0)
3252 if (N1C && N1C->getAPIntValue().isAllOnesValue())
3253 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3254 DAG.getConstant(1, DL, VT),
3255 DAG.getConstant(0, DL, VT));
3257 if (SDValue V = simplifyDivRem(N, DAG))
3260 if (SDValue NewSel = foldBinOpIntoSelect(N))
3263 if (SDValue V = visitUDIVLike(N0, N1, N))
3266 // sdiv, srem -> sdivrem
3267 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3268 // true. Otherwise, we break the simplification logic in visitREM().
3269 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3270 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3271 if (SDValue DivRem = useDivRem(N))
3277 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3279 EVT VT = N->getValueType(0);
3281 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3283 // fold (udiv x, (1 << c)) -> x >>u c
3284 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3285 DAG.isKnownToBeAPowerOfTwo(N1)) {
3286 SDValue LogBase2 = BuildLogBase2(N1, DL);
3287 AddToWorklist(LogBase2.getNode());
3289 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3290 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3291 AddToWorklist(Trunc.getNode());
3292 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3295 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3296 if (N1.getOpcode() == ISD::SHL) {
3297 SDValue N10 = N1.getOperand(0);
3298 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3299 DAG.isKnownToBeAPowerOfTwo(N10)) {
3300 SDValue LogBase2 = BuildLogBase2(N10, DL);
3301 AddToWorklist(LogBase2.getNode());
3303 EVT ADDVT = N1.getOperand(1).getValueType();
3304 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3305 AddToWorklist(Trunc.getNode());
3306 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3307 AddToWorklist(Add.getNode());
3308 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3312 // fold (udiv x, c) -> alternate
3313 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3314 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3315 if (SDValue Op = BuildUDIV(N))
3321 // handles ISD::SREM and ISD::UREM
3322 SDValue DAGCombiner::visitREM(SDNode *N) {
3323 unsigned Opcode = N->getOpcode();
3324 SDValue N0 = N->getOperand(0);
3325 SDValue N1 = N->getOperand(1);
3326 EVT VT = N->getValueType(0);
3327 EVT CCVT = getSetCCResultType(VT);
3329 bool isSigned = (Opcode == ISD::SREM);
3332 // fold (rem c1, c2) -> c1%c2
3333 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3334 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3336 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3338 // fold (urem X, -1) -> select(X == -1, 0, x)
3339 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3340 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3341 DAG.getConstant(0, DL, VT), N0);
3343 if (SDValue V = simplifyDivRem(N, DAG))
3346 if (SDValue NewSel = foldBinOpIntoSelect(N))
3350 // If we know the sign bits of both operands are zero, strength reduce to a
3351 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3352 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3353 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3355 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3356 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3357 // fold (urem x, pow2) -> (and x, pow2-1)
3358 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3359 AddToWorklist(Add.getNode());
3360 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3362 if (N1.getOpcode() == ISD::SHL &&
3363 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3364 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3365 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3366 AddToWorklist(Add.getNode());
3367 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3371 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3373 // If X/C can be simplified by the division-by-constant logic, lower
3374 // X%C to the equivalent of X-X/C*C.
3375 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3376 // speculative DIV must not cause a DIVREM conversion. We guard against this
3377 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3378 // combine will not return a DIVREM. Regardless, checking cheapness here
3379 // makes sense since the simplification results in fatter code.
3380 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3381 SDValue OptimizedDiv =
3382 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3383 if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM &&
3384 OptimizedDiv.getOpcode() != ISD::SDIVREM) {
3385 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3386 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3387 AddToWorklist(OptimizedDiv.getNode());
3388 AddToWorklist(Mul.getNode());
3393 // sdiv, srem -> sdivrem
3394 if (SDValue DivRem = useDivRem(N))
3395 return DivRem.getValue(1);
3400 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3401 SDValue N0 = N->getOperand(0);
3402 SDValue N1 = N->getOperand(1);
3403 EVT VT = N->getValueType(0);
3406 if (VT.isVector()) {
3407 // fold (mulhs x, 0) -> 0
3408 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3410 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3414 // fold (mulhs x, 0) -> 0
3415 if (isNullConstant(N1))
3417 // fold (mulhs x, 1) -> (sra x, size(x)-1)
3418 if (isOneConstant(N1))
3419 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3420 DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3421 getShiftAmountTy(N0.getValueType())));
3423 // fold (mulhs x, undef) -> 0
3424 if (N0.isUndef() || N1.isUndef())
3425 return DAG.getConstant(0, DL, VT);
3427 // If the type twice as wide is legal, transform the mulhs to a wider multiply
3429 if (VT.isSimple() && !VT.isVector()) {
3430 MVT Simple = VT.getSimpleVT();
3431 unsigned SimpleSize = Simple.getSizeInBits();
3432 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3433 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3434 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3435 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3436 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3437 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3438 DAG.getConstant(SimpleSize, DL,
3439 getShiftAmountTy(N1.getValueType())));
3440 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3447 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3448 SDValue N0 = N->getOperand(0);
3449 SDValue N1 = N->getOperand(1);
3450 EVT VT = N->getValueType(0);
3453 if (VT.isVector()) {
3454 // fold (mulhu x, 0) -> 0
3455 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3457 if (ISD::isBuildVectorAllZeros(N0.getNode()))
3461 // fold (mulhu x, 0) -> 0
3462 if (isNullConstant(N1))
3464 // fold (mulhu x, 1) -> 0
3465 if (isOneConstant(N1))
3466 return DAG.getConstant(0, DL, N0.getValueType());
3467 // fold (mulhu x, undef) -> 0
3468 if (N0.isUndef() || N1.isUndef())
3469 return DAG.getConstant(0, DL, VT);
3471 // If the type twice as wide is legal, transform the mulhu to a wider multiply
3473 if (VT.isSimple() && !VT.isVector()) {
3474 MVT Simple = VT.getSimpleVT();
3475 unsigned SimpleSize = Simple.getSizeInBits();
3476 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3477 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3478 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3479 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3480 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3481 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3482 DAG.getConstant(SimpleSize, DL,
3483 getShiftAmountTy(N1.getValueType())));
3484 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3491 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3492 /// give the opcodes for the two computations that are being performed. Return
3493 /// true if a simplification was made.
3494 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3496 // If the high half is not needed, just compute the low half.
3497 bool HiExists = N->hasAnyUseOfValue(1);
3499 (!LegalOperations ||
3500 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3501 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3502 return CombineTo(N, Res, Res);
3505 // If the low half is not needed, just compute the high half.
3506 bool LoExists = N->hasAnyUseOfValue(0);
3508 (!LegalOperations ||
3509 TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3510 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3511 return CombineTo(N, Res, Res);
3514 // If both halves are used, return as it is.
3515 if (LoExists && HiExists)
3518 // If the two computed results can be simplified separately, separate them.
3520 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3521 AddToWorklist(Lo.getNode());
3522 SDValue LoOpt = combine(Lo.getNode());
3523 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3524 (!LegalOperations ||
3525 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3526 return CombineTo(N, LoOpt, LoOpt);
3530 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3531 AddToWorklist(Hi.getNode());
3532 SDValue HiOpt = combine(Hi.getNode());
3533 if (HiOpt.getNode() && HiOpt != Hi &&
3534 (!LegalOperations ||
3535 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3536 return CombineTo(N, HiOpt, HiOpt);
3542 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3543 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3546 EVT VT = N->getValueType(0);
3549 // If the type is twice as wide is legal, transform the mulhu to a wider
3550 // multiply plus a shift.
3551 if (VT.isSimple() && !VT.isVector()) {
3552 MVT Simple = VT.getSimpleVT();
3553 unsigned SimpleSize = Simple.getSizeInBits();
3554 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3555 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3556 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3557 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3558 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3559 // Compute the high part as N1.
3560 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3561 DAG.getConstant(SimpleSize, DL,
3562 getShiftAmountTy(Lo.getValueType())));
3563 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3564 // Compute the low part as N0.
3565 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3566 return CombineTo(N, Lo, Hi);
3573 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3574 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3577 EVT VT = N->getValueType(0);
3580 // If the type is twice as wide is legal, transform the mulhu to a wider
3581 // multiply plus a shift.
3582 if (VT.isSimple() && !VT.isVector()) {
3583 MVT Simple = VT.getSimpleVT();
3584 unsigned SimpleSize = Simple.getSizeInBits();
3585 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3586 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3587 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3588 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3589 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3590 // Compute the high part as N1.
3591 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3592 DAG.getConstant(SimpleSize, DL,
3593 getShiftAmountTy(Lo.getValueType())));
3594 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3595 // Compute the low part as N0.
3596 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3597 return CombineTo(N, Lo, Hi);
3604 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3605 // (smulo x, 2) -> (saddo x, x)
3606 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3607 if (C2->getAPIntValue() == 2)
3608 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3609 N->getOperand(0), N->getOperand(0));
3614 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3615 // (umulo x, 2) -> (uaddo x, x)
3616 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3617 if (C2->getAPIntValue() == 2)
3618 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3619 N->getOperand(0), N->getOperand(0));
3624 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3625 SDValue N0 = N->getOperand(0);
3626 SDValue N1 = N->getOperand(1);
3627 EVT VT = N0.getValueType();
3631 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3634 // fold operation with constant operands.
3635 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3636 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3638 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3640 // canonicalize constant to RHS
3641 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3642 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3643 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3645 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3646 // Only do this if the current op isn't legal and the flipped is.
3647 unsigned Opcode = N->getOpcode();
3648 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3649 if (!TLI.isOperationLegal(Opcode, VT) &&
3650 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3651 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3654 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3655 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3656 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3657 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3658 default: llvm_unreachable("Unknown MINMAX opcode");
3660 if (TLI.isOperationLegal(AltOpcode, VT))
3661 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3667 /// If this is a binary operator with two operands of the same opcode, try to
3669 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3670 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3671 EVT VT = N0.getValueType();
3672 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3674 // Bail early if none of these transforms apply.
3675 if (N0.getNumOperands() == 0) return SDValue();
3677 // For each of OP in AND/OR/XOR:
3678 // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3679 // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3680 // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3681 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3682 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3684 // do not sink logical op inside of a vector extend, since it may combine
3686 EVT Op0VT = N0.getOperand(0).getValueType();
3687 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3688 N0.getOpcode() == ISD::SIGN_EXTEND ||
3689 N0.getOpcode() == ISD::BSWAP ||
3690 // Avoid infinite looping with PromoteIntBinOp.
3691 (N0.getOpcode() == ISD::ANY_EXTEND &&
3692 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3693 (N0.getOpcode() == ISD::TRUNCATE &&
3694 (!TLI.isZExtFree(VT, Op0VT) ||
3695 !TLI.isTruncateFree(Op0VT, VT)) &&
3696 TLI.isTypeLegal(Op0VT))) &&
3698 Op0VT == N1.getOperand(0).getValueType() &&
3699 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3700 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3701 N0.getOperand(0).getValueType(),
3702 N0.getOperand(0), N1.getOperand(0));
3703 AddToWorklist(ORNode.getNode());
3704 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3707 // For each of OP in SHL/SRL/SRA/AND...
3708 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3709 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3710 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3711 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3712 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3713 N0.getOperand(1) == N1.getOperand(1)) {
3714 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3715 N0.getOperand(0).getValueType(),
3716 N0.getOperand(0), N1.getOperand(0));
3717 AddToWorklist(ORNode.getNode());
3718 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3719 ORNode, N0.getOperand(1));
3722 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3723 // Only perform this optimization up until type legalization, before
3724 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3725 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3726 // we don't want to undo this promotion.
3727 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3729 if ((N0.getOpcode() == ISD::BITCAST ||
3730 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3731 Level <= AfterLegalizeTypes) {
3732 SDValue In0 = N0.getOperand(0);
3733 SDValue In1 = N1.getOperand(0);
3734 EVT In0Ty = In0.getValueType();
3735 EVT In1Ty = In1.getValueType();
3737 // If both incoming values are integers, and the original types are the
3739 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3740 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3741 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3742 AddToWorklist(Op.getNode());
3747 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3748 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3749 // If both shuffles use the same mask, and both shuffle within a single
3750 // vector, then it is worthwhile to move the swizzle after the operation.
3751 // The type-legalizer generates this pattern when loading illegal
3752 // vector types from memory. In many cases this allows additional shuffle
3754 // There are other cases where moving the shuffle after the xor/and/or
3755 // is profitable even if shuffles don't perform a swizzle.
3756 // If both shuffles use the same mask, and both shuffles have the same first
3757 // or second operand, then it might still be profitable to move the shuffle
3758 // after the xor/and/or operation.
3759 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3760 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3761 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3763 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3764 "Inputs to shuffles are not the same type");
3766 // Check that both shuffles use the same mask. The masks are known to be of
3767 // the same length because the result vector type is the same.
3768 // Check also that shuffles have only one use to avoid introducing extra
3770 if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3771 SVN0->getMask().equals(SVN1->getMask())) {
3772 SDValue ShOp = N0->getOperand(1);
3774 // Don't try to fold this node if it requires introducing a
3775 // build vector of all zeros that might be illegal at this stage.
3776 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3778 ShOp = DAG.getConstant(0, SDLoc(N), VT);
3783 // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C)
3784 // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C)
3785 // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0)
3786 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3787 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3788 N0->getOperand(0), N1->getOperand(0));
3789 AddToWorklist(NewNode.getNode());
3790 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3794 // Don't try to fold this node if it requires introducing a
3795 // build vector of all zeros that might be illegal at this stage.
3796 ShOp = N0->getOperand(0);
3797 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3799 ShOp = DAG.getConstant(0, SDLoc(N), VT);
3804 // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B))
3805 // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B))
3806 // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B))
3807 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3808 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3809 N0->getOperand(1), N1->getOperand(1));
3810 AddToWorklist(NewNode.getNode());
3811 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3820 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3821 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3823 SDValue LL, LR, RL, RR, N0CC, N1CC;
3824 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3825 !isSetCCEquivalent(N1, RL, RR, N1CC))
3828 assert(N0.getValueType() == N1.getValueType() &&
3829 "Unexpected operand types for bitwise logic op");
3830 assert(LL.getValueType() == LR.getValueType() &&
3831 RL.getValueType() == RR.getValueType() &&
3832 "Unexpected operand types for setcc");
3834 // If we're here post-legalization or the logic op type is not i1, the logic
3835 // op type must match a setcc result type. Also, all folds require new
3836 // operations on the left and right operands, so those types must match.
3837 EVT VT = N0.getValueType();
3838 EVT OpVT = LL.getValueType();
3839 if (LegalOperations || VT.getScalarType() != MVT::i1)
3840 if (VT != getSetCCResultType(OpVT))
3842 if (OpVT != RL.getValueType())
3845 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3846 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3847 bool IsInteger = OpVT.isInteger();
3848 if (LR == RR && CC0 == CC1 && IsInteger) {
3849 bool IsZero = isNullConstantOrNullSplatConstant(LR);
3850 bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3853 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3854 // All sign bits clear?
3855 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3857 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3858 // Any sign bits set?
3859 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3861 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3862 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3863 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3864 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3865 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3866 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3867 AddToWorklist(Or.getNode());
3868 return DAG.getSetCC(DL, VT, Or, LR, CC1);
3872 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3873 // All sign bits set?
3874 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3876 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3877 // Any sign bits clear?
3878 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3880 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3881 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3882 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3883 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3884 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3885 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3886 AddToWorklist(And.getNode());
3887 return DAG.getSetCC(DL, VT, And, LR, CC1);
3891 // TODO: What is the 'or' equivalent of this fold?
3892 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3893 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3894 IsInteger && CC0 == ISD::SETNE &&
3895 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3896 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3897 SDValue One = DAG.getConstant(1, DL, OpVT);
3898 SDValue Two = DAG.getConstant(2, DL, OpVT);
3899 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3900 AddToWorklist(Add.getNode());
3901 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3904 // Try more general transforms if the predicates match and the only user of
3905 // the compares is the 'and' or 'or'.
3906 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3907 N0.hasOneUse() && N1.hasOneUse()) {
3908 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3909 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3910 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3911 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3912 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3913 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3914 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3915 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3919 // Canonicalize equivalent operands to LL == RL.
3920 if (LL == RR && LR == RL) {
3921 CC1 = ISD::getSetCCSwappedOperands(CC1);
3925 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3926 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3927 if (LL == RL && LR == RR) {
3928 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3929 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3930 if (NewCC != ISD::SETCC_INVALID &&
3931 (!LegalOperations ||
3932 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3933 TLI.isOperationLegal(ISD::SETCC, OpVT))))
3934 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3940 /// This contains all DAGCombine rules which reduce two values combined by
3941 /// an And operation to a single value. This makes them reusable in the context
3942 /// of visitSELECT(). Rules involving constants are not included as
3943 /// visitSELECT() already handles those cases.
3944 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3945 EVT VT = N1.getValueType();
3948 // fold (and x, undef) -> 0
3949 if (N0.isUndef() || N1.isUndef())
3950 return DAG.getConstant(0, DL, VT);
3952 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3955 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3956 VT.getSizeInBits() <= 64) {
3957 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3958 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3959 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3960 // immediate for an add, but it is legal if its top c2 bits are set,
3961 // transform the ADD so the immediate doesn't need to be materialized
3963 APInt ADDC = ADDI->getAPIntValue();
3964 APInt SRLC = SRLI->getAPIntValue();
3965 if (ADDC.getMinSignedBits() <= 64 &&
3966 SRLC.ult(VT.getSizeInBits()) &&
3967 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3968 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
3969 SRLC.getZExtValue());
3970 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3972 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3975 DAG.getNode(ISD::ADD, DL0, VT,
3976 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3977 CombineTo(N0.getNode(), NewAdd);
3978 // Return N so it doesn't get rechecked!
3979 return SDValue(N, 0);
3987 // Reduce bit extract of low half of an integer to the narrower type.
3988 // (and (srl i64:x, K), KMask) ->
3989 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3990 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3991 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3992 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3993 unsigned Size = VT.getSizeInBits();
3994 const APInt &AndMask = CAnd->getAPIntValue();
3995 unsigned ShiftBits = CShift->getZExtValue();
3997 // Bail out, this node will probably disappear anyway.
4001 unsigned MaskBits = AndMask.countTrailingOnes();
4002 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4004 if (AndMask.isMask() &&
4005 // Required bits must not span the two halves of the integer and
4006 // must fit in the half size type.
4007 (ShiftBits + MaskBits <= Size / 2) &&
4008 TLI.isNarrowingProfitable(VT, HalfVT) &&
4009 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4010 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4011 TLI.isTruncateFree(VT, HalfVT) &&
4012 TLI.isZExtFree(HalfVT, VT)) {
4013 // The isNarrowingProfitable is to avoid regressions on PPC and
4014 // AArch64 which match a few 64-bit bit insert / bit extract patterns
4015 // on downstream users of this. Those patterns could probably be
4016 // extended to handle extensions mixed in.
4019 assert(MaskBits <= Size);
4021 // Extracting the highest bit of the low half.
4022 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4023 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4026 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4027 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4028 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4029 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4030 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4039 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4040 EVT LoadResultTy, EVT &ExtVT) {
4041 if (!AndC->getAPIntValue().isMask())
4044 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4046 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4047 EVT LoadedVT = LoadN->getMemoryVT();
4049 if (ExtVT == LoadedVT &&
4050 (!LegalOperations ||
4051 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4052 // ZEXTLOAD will match without needing to change the size of the value being
4057 // Do not change the width of a volatile load.
4058 if (LoadN->isVolatile())
4061 // Do not generate loads of non-round integer types since these can
4062 // be expensive (and would be wrong if the type is not byte sized).
4063 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4066 if (LegalOperations &&
4067 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4070 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4076 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4077 ISD::LoadExtType ExtType, EVT &MemVT,
4081 // Only allow byte offsets.
4085 // Do not generate loads of non-round integer types since these can
4086 // be expensive (and would be wrong if the type is not byte sized).
4087 if (!MemVT.isRound())
4090 // Don't change the width of a volatile load.
4091 if (LDST->isVolatile())
4094 // Verify that we are actually reducing a load width here.
4095 if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4098 // Ensure that this isn't going to produce an unsupported unaligned access.
4100 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4101 LDST->getAddressSpace(), ShAmt / 8))
4104 // It's not possible to generate a constant of extended or untyped type.
4105 EVT PtrType = LDST->getBasePtr().getValueType();
4106 if (PtrType == MVT::Untyped || PtrType.isExtended())
4109 if (isa<LoadSDNode>(LDST)) {
4110 LoadSDNode *Load = cast<LoadSDNode>(LDST);
4111 // Don't transform one with multiple uses, this would require adding a new
4113 if (!SDValue(Load, 0).hasOneUse())
4116 if (LegalOperations &&
4117 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4120 // For the transform to be legal, the load must produce only two values
4121 // (the value loaded and the chain). Don't transform a pre-increment
4122 // load, for example, which produces an extra value. Otherwise the
4123 // transformation is not equivalent, and the downstream logic to replace
4124 // uses gets things wrong.
4125 if (Load->getNumValues() > 2)
4128 // If the load that we're shrinking is an extload and we're not just
4129 // discarding the extension we can't simply shrink the load. Bail.
4130 // TODO: It would be possible to merge the extensions in some cases.
4131 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4132 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4135 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4138 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4139 StoreSDNode *Store = cast<StoreSDNode>(LDST);
4140 // Can't write outside the original store
4141 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4144 if (LegalOperations &&
4145 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4151 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4152 SmallPtrSetImpl<LoadSDNode*> &Loads,
4153 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4154 ConstantSDNode *Mask,
4155 SDNode *&NodeToMask) {
4156 // Recursively search for the operands, looking for loads which can be
4158 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4159 SDValue Op = N->getOperand(i);
4161 if (Op.getValueType().isVector())
4164 // Some constants may need fixing up later if they are too large.
4165 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4166 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4167 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4168 NodesWithConsts.insert(N);
4172 if (!Op.hasOneUse())
4175 switch(Op.getOpcode()) {
4177 auto *Load = cast<LoadSDNode>(Op);
4179 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4180 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4182 // ZEXTLOAD is already small enough.
4183 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4184 ExtVT.bitsGE(Load->getMemoryVT()))
4187 // Use LE to convert equal sized loads to zext.
4188 if (ExtVT.bitsLE(Load->getMemoryVT()))
4195 case ISD::ZERO_EXTEND:
4196 case ISD::AssertZext: {
4197 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4198 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4199 EVT VT = Op.getOpcode() == ISD::AssertZext ?
4200 cast<VTSDNode>(Op.getOperand(1))->getVT() :
4201 Op.getOperand(0).getValueType();
4203 // We can accept extending nodes if the mask is wider or an equal
4204 // width to the original type.
4205 if (ExtVT.bitsGE(VT))
4212 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4218 // Allow one node which will masked along with any loads found.
4222 // Also ensure that the node to be masked only produces one data result.
4223 NodeToMask = Op.getNode();
4224 if (NodeToMask->getNumValues() > 1) {
4225 bool HasValue = false;
4226 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4227 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4228 if (VT != MVT::Glue && VT != MVT::Other) {
4230 NodeToMask = nullptr;
4236 assert(HasValue && "Node to be masked has no data result?");
4242 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4243 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4247 if (!Mask->getAPIntValue().isMask())
4250 // No need to do anything if the and directly uses a load.
4251 if (isa<LoadSDNode>(N->getOperand(0)))
4254 SmallPtrSet<LoadSDNode*, 8> Loads;
4255 SmallPtrSet<SDNode*, 2> NodesWithConsts;
4256 SDNode *FixupNode = nullptr;
4257 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4258 if (Loads.size() == 0)
4261 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4262 SDValue MaskOp = N->getOperand(1);
4264 // If it exists, fixup the single node we allow in the tree that needs
4267 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4268 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4269 FixupNode->getValueType(0),
4270 SDValue(FixupNode, 0), MaskOp);
4271 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4272 if (And.getOpcode() == ISD ::AND)
4273 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4276 // Narrow any constants that need it.
4277 for (auto *LogicN : NodesWithConsts) {
4278 SDValue Op0 = LogicN->getOperand(0);
4279 SDValue Op1 = LogicN->getOperand(1);
4281 if (isa<ConstantSDNode>(Op0))
4282 std::swap(Op0, Op1);
4284 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4287 DAG.UpdateNodeOperands(LogicN, Op0, And);
4290 // Create narrow loads.
4291 for (auto *Load : Loads) {
4292 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4293 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4294 SDValue(Load, 0), MaskOp);
4295 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4296 if (And.getOpcode() == ISD ::AND)
4298 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4299 SDValue NewLoad = ReduceLoadWidth(And.getNode());
4301 "Shouldn't be masking the load if it can't be narrowed");
4302 CombineTo(Load, NewLoad, NewLoad.getValue(1));
4304 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4311 // x & (-1 'logical shift' y)
4313 // (x 'opposite logical shift' y) 'logical shift' y
4314 // if it is better for performance.
4315 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4316 assert(N->getOpcode() == ISD::AND);
4318 SDValue N0 = N->getOperand(0);
4319 SDValue N1 = N->getOperand(1);
4321 // Do we actually prefer shifts over mask?
4322 if (!TLI.preferShiftsToClearExtremeBits(N0))
4325 // Try to match (-1 '[outer] logical shift' y)
4326 unsigned OuterShift;
4327 unsigned InnerShift; // The opposite direction to the OuterShift.
4328 SDValue Y; // Shift amount.
4329 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4332 OuterShift = M->getOpcode();
4333 if (OuterShift == ISD::SHL)
4334 InnerShift = ISD::SRL;
4335 else if (OuterShift == ISD::SRL)
4336 InnerShift = ISD::SHL;
4339 if (!isAllOnesConstant(M->getOperand(0)))
4341 Y = M->getOperand(1);
4348 else if (matchMask(N0))
4354 EVT VT = N->getValueType(0);
4356 // tmp = x 'opposite logical shift' y
4357 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4358 // ret = tmp 'logical shift' y
4359 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4364 SDValue DAGCombiner::visitAND(SDNode *N) {
4365 SDValue N0 = N->getOperand(0);
4366 SDValue N1 = N->getOperand(1);
4367 EVT VT = N1.getValueType();
4374 if (VT.isVector()) {
4375 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4378 // fold (and x, 0) -> 0, vector edition
4379 if (ISD::isBuildVectorAllZeros(N0.getNode()))
4380 // do not return N0, because undef node may exist in N0
4381 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4382 SDLoc(N), N0.getValueType());
4383 if (ISD::isBuildVectorAllZeros(N1.getNode()))
4384 // do not return N1, because undef node may exist in N1
4385 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4386 SDLoc(N), N1.getValueType());
4388 // fold (and x, -1) -> x, vector edition
4389 if (ISD::isBuildVectorAllOnes(N0.getNode()))
4391 if (ISD::isBuildVectorAllOnes(N1.getNode()))
4395 // fold (and c1, c2) -> c1&c2
4396 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4397 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4398 if (N0C && N1C && !N1C->isOpaque())
4399 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4400 // canonicalize constant to RHS
4401 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4402 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4403 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4404 // fold (and x, -1) -> x
4405 if (isAllOnesConstant(N1))
4407 // if (and x, c) is known to be zero, return 0
4408 unsigned BitWidth = VT.getScalarSizeInBits();
4409 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4410 APInt::getAllOnesValue(BitWidth)))
4411 return DAG.getConstant(0, SDLoc(N), VT);
4413 if (SDValue NewSel = foldBinOpIntoSelect(N))
4417 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
4420 // Try to convert a constant mask AND into a shuffle clear mask.
4422 if (SDValue Shuffle = XformToShuffleWithZero(N))
4425 // fold (and (or x, C), D) -> D if (C & D) == D
4426 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4427 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4429 if (N0.getOpcode() == ISD::OR &&
4430 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4432 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4433 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4434 SDValue N0Op0 = N0.getOperand(0);
4435 APInt Mask = ~N1C->getAPIntValue();
4436 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4437 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4438 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4439 N0.getValueType(), N0Op0);
4441 // Replace uses of the AND with uses of the Zero extend node.
4444 // We actually want to replace all uses of the any_extend with the
4445 // zero_extend, to avoid duplicating things. This will later cause this
4446 // AND to be folded.
4447 CombineTo(N0.getNode(), Zext);
4448 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4451 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4452 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4453 // already be zero by virtue of the width of the base type of the load.
4455 // the 'X' node here can either be nothing or an extract_vector_elt to catch
4457 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4458 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4459 N0.getOperand(0).getOpcode() == ISD::LOAD &&
4460 N0.getOperand(0).getResNo() == 0) ||
4461 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4462 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4463 N0 : N0.getOperand(0) );
4465 // Get the constant (if applicable) the zero'th operand is being ANDed with.
4466 // This can be a pure constant or a vector splat, in which case we treat the
4467 // vector as a scalar and use the splat value.
4468 APInt Constant = APInt::getNullValue(1);
4469 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4470 Constant = C->getAPIntValue();
4471 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4472 APInt SplatValue, SplatUndef;
4473 unsigned SplatBitSize;
4475 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4476 SplatBitSize, HasAnyUndefs);
4478 // Undef bits can contribute to a possible optimisation if set, so
4480 SplatValue |= SplatUndef;
4482 // The splat value may be something like "0x00FFFFFF", which means 0 for
4483 // the first vector value and FF for the rest, repeating. We need a mask
4484 // that will apply equally to all members of the vector, so AND all the
4485 // lanes of the constant together.
4486 EVT VT = Vector->getValueType(0);
4487 unsigned BitWidth = VT.getScalarSizeInBits();
4489 // If the splat value has been compressed to a bitlength lower
4490 // than the size of the vector lane, we need to re-expand it to
4492 if (BitWidth > SplatBitSize)
4493 for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4494 SplatBitSize < BitWidth;
4495 SplatBitSize = SplatBitSize * 2)
4496 SplatValue |= SplatValue.shl(SplatBitSize);
4498 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4499 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4500 if (SplatBitSize % BitWidth == 0) {
4501 Constant = APInt::getAllOnesValue(BitWidth);
4502 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4503 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4508 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4509 // actually legal and isn't going to get expanded, else this is a false
4511 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4512 Load->getValueType(0),
4513 Load->getMemoryVT());
4515 // Resize the constant to the same size as the original memory access before
4516 // extension. If it is still the AllOnesValue then this AND is completely
4518 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4521 switch (Load->getExtensionType()) {
4522 default: B = false; break;
4523 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4525 case ISD::NON_EXTLOAD: B = true; break;
4528 if (B && Constant.isAllOnesValue()) {
4529 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4530 // preserve semantics once we get rid of the AND.
4531 SDValue NewLoad(Load, 0);
4533 // Fold the AND away. NewLoad may get replaced immediately.
4534 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4536 if (Load->getExtensionType() == ISD::EXTLOAD) {
4537 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4538 Load->getValueType(0), SDLoc(Load),
4539 Load->getChain(), Load->getBasePtr(),
4540 Load->getOffset(), Load->getMemoryVT(),
4541 Load->getMemOperand());
4542 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4543 if (Load->getNumValues() == 3) {
4544 // PRE/POST_INC loads have 3 values.
4545 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4546 NewLoad.getValue(2) };
4547 CombineTo(Load, To, 3, true);
4549 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4553 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4557 // fold (and (load x), 255) -> (zextload x, i8)
4558 // fold (and (extload x, i16), 255) -> (zextload x, i8)
4559 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4560 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4561 (N0.getOpcode() == ISD::ANY_EXTEND &&
4562 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4563 if (SDValue Res = ReduceLoadWidth(N)) {
4564 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4565 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4568 CombineTo(LN0, Res, Res.getValue(1));
4569 return SDValue(N, 0);
4573 if (Level >= AfterLegalizeTypes) {
4574 // Attempt to propagate the AND back up to the leaves which, if they're
4575 // loads, can be combined to narrow loads and the AND node can be removed.
4576 // Perform after legalization so that extend nodes will already be
4577 // combined into the loads.
4578 if (BackwardsPropagateMask(N, DAG)) {
4579 return SDValue(N, 0);
4583 if (SDValue Combined = visitANDLike(N0, N1, N))
4586 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4587 if (N0.getOpcode() == N1.getOpcode())
4588 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4591 // Masking the negated extension of a boolean is just the zero-extended
4593 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4594 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4596 // Note: the SimplifyDemandedBits fold below can make an information-losing
4597 // transform, and then we have no way to find this better fold.
4598 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4599 if (isNullConstantOrNullSplatConstant(N0.getOperand(0))) {
4600 SDValue SubRHS = N0.getOperand(1);
4601 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4602 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4604 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4605 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4606 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4610 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4611 // fold (and (sra)) -> (and (srl)) when possible.
4612 if (SimplifyDemandedBits(SDValue(N, 0)))
4613 return SDValue(N, 0);
4615 // fold (zext_inreg (extload x)) -> (zextload x)
4616 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4617 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4618 EVT MemVT = LN0->getMemoryVT();
4619 // If we zero all the possible extended bits, then we can turn this into
4620 // a zextload if we are running before legalize or the operation is legal.
4621 unsigned BitWidth = N1.getScalarValueSizeInBits();
4622 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4623 BitWidth - MemVT.getScalarSizeInBits())) &&
4624 ((!LegalOperations && !LN0->isVolatile()) ||
4625 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4626 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4627 LN0->getChain(), LN0->getBasePtr(),
4628 MemVT, LN0->getMemOperand());
4630 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4631 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4634 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4635 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4637 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4638 EVT MemVT = LN0->getMemoryVT();
4639 // If we zero all the possible extended bits, then we can turn this into
4640 // a zextload if we are running before legalize or the operation is legal.
4641 unsigned BitWidth = N1.getScalarValueSizeInBits();
4642 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4643 BitWidth - MemVT.getScalarSizeInBits())) &&
4644 ((!LegalOperations && !LN0->isVolatile()) ||
4645 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4646 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4647 LN0->getChain(), LN0->getBasePtr(),
4648 MemVT, LN0->getMemOperand());
4650 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4651 return SDValue(N, 0); // Return N so it doesn't get rechecked!
4654 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4655 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4656 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4657 N0.getOperand(1), false))
4661 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4667 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4668 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4669 bool DemandHighBits) {
4670 if (!LegalOperations)
4673 EVT VT = N->getValueType(0);
4674 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4676 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4679 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4680 bool LookPassAnd0 = false;
4681 bool LookPassAnd1 = false;
4682 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4684 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4686 if (N0.getOpcode() == ISD::AND) {
4687 if (!N0.getNode()->hasOneUse())
4689 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4690 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4691 // This is needed for X86.
4692 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4693 N01C->getZExtValue() != 0xFFFF))
4695 N0 = N0.getOperand(0);
4696 LookPassAnd0 = true;
4699 if (N1.getOpcode() == ISD::AND) {
4700 if (!N1.getNode()->hasOneUse())
4702 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4703 if (!N11C || N11C->getZExtValue() != 0xFF)
4705 N1 = N1.getOperand(0);
4706 LookPassAnd1 = true;
4709 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4711 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4713 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4716 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4717 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4720 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4723 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4724 SDValue N00 = N0->getOperand(0);
4725 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4726 if (!N00.getNode()->hasOneUse())
4728 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4729 if (!N001C || N001C->getZExtValue() != 0xFF)
4731 N00 = N00.getOperand(0);
4732 LookPassAnd0 = true;
4735 SDValue N10 = N1->getOperand(0);
4736 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4737 if (!N10.getNode()->hasOneUse())
4739 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
4740 // Also allow 0xFFFF since the bits will be shifted out. This is needed
4742 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4743 N101C->getZExtValue() != 0xFFFF))
4745 N10 = N10.getOperand(0);
4746 LookPassAnd1 = true;
4752 // Make sure everything beyond the low halfword gets set to zero since the SRL
4753 // 16 will clear the top bits.
4754 unsigned OpSizeInBits = VT.getSizeInBits();
4755 if (DemandHighBits && OpSizeInBits > 16) {
4756 // If the left-shift isn't masked out then the only way this is a bswap is
4757 // if all bits beyond the low 8 are 0. In that case the entire pattern
4758 // reduces to a left shift anyway: leave it for other parts of the combiner.
4762 // However, if the right shift isn't masked out then it might be because
4763 // it's not needed. See if we can spot that too.
4764 if (!LookPassAnd1 &&
4765 !DAG.MaskedValueIsZero(
4766 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4770 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4771 if (OpSizeInBits > 16) {
4773 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4774 DAG.getConstant(OpSizeInBits - 16, DL,
4775 getShiftAmountTy(VT)));
4780 /// Return true if the specified node is an element that makes up a 32-bit
4781 /// packed halfword byteswap.
4782 /// ((x & 0x000000ff) << 8) |
4783 /// ((x & 0x0000ff00) >> 8) |
4784 /// ((x & 0x00ff0000) << 8) |
4785 /// ((x & 0xff000000) >> 8)
4786 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
4787 if (!N.getNode()->hasOneUse())
4790 unsigned Opc = N.getOpcode();
4791 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4794 SDValue N0 = N.getOperand(0);
4795 unsigned Opc0 = N0.getOpcode();
4796 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4799 ConstantSDNode *N1C = nullptr;
4800 // SHL or SRL: look upstream for AND mask operand
4801 if (Opc == ISD::AND)
4802 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4803 else if (Opc0 == ISD::AND)
4804 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4808 unsigned MaskByteOffset;
4809 switch (N1C->getZExtValue()) {
4812 case 0xFF: MaskByteOffset = 0; break;
4813 case 0xFF00: MaskByteOffset = 1; break;
4815 // In case demanded bits didn't clear the bits that will be shifted out.
4816 // This is needed for X86.
4817 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4822 case 0xFF0000: MaskByteOffset = 2; break;
4823 case 0xFF000000: MaskByteOffset = 3; break;
4826 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4827 if (Opc == ISD::AND) {
4828 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4830 // (x >> 8) & 0xff0000
4831 if (Opc0 != ISD::SRL)
4833 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4834 if (!C || C->getZExtValue() != 8)
4837 // (x << 8) & 0xff00
4838 // (x << 8) & 0xff000000
4839 if (Opc0 != ISD::SHL)
4841 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4842 if (!C || C->getZExtValue() != 8)
4845 } else if (Opc == ISD::SHL) {
4847 // (x & 0xff0000) << 8
4848 if (MaskByteOffset != 0 && MaskByteOffset != 2)
4850 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4851 if (!C || C->getZExtValue() != 8)
4853 } else { // Opc == ISD::SRL
4854 // (x & 0xff00) >> 8
4855 // (x & 0xff000000) >> 8
4856 if (MaskByteOffset != 1 && MaskByteOffset != 3)
4858 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4859 if (!C || C->getZExtValue() != 8)
4863 if (Parts[MaskByteOffset])
4866 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4870 /// Match a 32-bit packed halfword bswap. That is
4871 /// ((x & 0x000000ff) << 8) |
4872 /// ((x & 0x0000ff00) >> 8) |
4873 /// ((x & 0x00ff0000) << 8) |
4874 /// ((x & 0xff000000) >> 8)
4875 /// => (rotl (bswap x), 16)
4876 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4877 if (!LegalOperations)
4880 EVT VT = N->getValueType(0);
4883 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4887 // (or (or (and), (and)), (or (and), (and)))
4888 // (or (or (or (and), (and)), (and)), (and))
4889 if (N0.getOpcode() != ISD::OR)
4891 SDValue N00 = N0.getOperand(0);
4892 SDValue N01 = N0.getOperand(1);
4893 SDNode *Parts[4] = {};
4895 if (N1.getOpcode() == ISD::OR &&
4896 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4897 // (or (or (and), (and)), (or (and), (and)))
4898 if (!isBSwapHWordElement(N00, Parts))
4901 if (!isBSwapHWordElement(N01, Parts))
4903 SDValue N10 = N1.getOperand(0);
4904 if (!isBSwapHWordElement(N10, Parts))
4906 SDValue N11 = N1.getOperand(1);
4907 if (!isBSwapHWordElement(N11, Parts))
4910 // (or (or (or (and), (and)), (and)), (and))
4911 if (!isBSwapHWordElement(N1, Parts))
4913 if (!isBSwapHWordElement(N01, Parts))
4915 if (N00.getOpcode() != ISD::OR)
4917 SDValue N000 = N00.getOperand(0);
4918 if (!isBSwapHWordElement(N000, Parts))
4920 SDValue N001 = N00.getOperand(1);
4921 if (!isBSwapHWordElement(N001, Parts))
4925 // Make sure the parts are all coming from the same node.
4926 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4930 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4931 SDValue(Parts[0], 0));
4933 // Result of the bswap should be rotated by 16. If it's not legal, then
4934 // do (x << 16) | (x >> 16).
4935 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4936 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4937 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4938 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4939 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4940 return DAG.getNode(ISD::OR, DL, VT,
4941 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4942 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4945 /// This contains all DAGCombine rules which reduce two values combined by
4946 /// an Or operation to a single value \see visitANDLike().
4947 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4948 EVT VT = N1.getValueType();
4951 // fold (or x, undef) -> -1
4952 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4953 return DAG.getAllOnesConstant(DL, VT);
4955 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4958 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
4959 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4960 // Don't increase # computations.
4961 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4962 // We can only do this xform if we know that bits from X that are set in C2
4963 // but not in C1 are already zero. Likewise for Y.
4964 if (const ConstantSDNode *N0O1C =
4965 getAsNonOpaqueConstant(N0.getOperand(1))) {
4966 if (const ConstantSDNode *N1O1C =
4967 getAsNonOpaqueConstant(N1.getOperand(1))) {
4968 // We can only do this xform if we know that bits from X that are set in
4969 // C2 but not in C1 are already zero. Likewise for Y.
4970 const APInt &LHSMask = N0O1C->getAPIntValue();
4971 const APInt &RHSMask = N1O1C->getAPIntValue();
4973 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4974 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4975 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4976 N0.getOperand(0), N1.getOperand(0));
4977 return DAG.getNode(ISD::AND, DL, VT, X,
4978 DAG.getConstant(LHSMask | RHSMask, DL, VT));
4984 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4985 if (N0.getOpcode() == ISD::AND &&
4986 N1.getOpcode() == ISD::AND &&
4987 N0.getOperand(0) == N1.getOperand(0) &&
4988 // Don't increase # computations.
4989 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4990 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4991 N0.getOperand(1), N1.getOperand(1));
4992 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4998 SDValue DAGCombiner::visitOR(SDNode *N) {
4999 SDValue N0 = N->getOperand(0);
5000 SDValue N1 = N->getOperand(1);
5001 EVT VT = N1.getValueType();
5008 if (VT.isVector()) {
5009 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5012 // fold (or x, 0) -> x, vector edition
5013 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5015 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5018 // fold (or x, -1) -> -1, vector edition
5019 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5020 // do not return N0, because undef node may exist in N0
5021 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5022 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5023 // do not return N1, because undef node may exist in N1
5024 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5026 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5027 // Do this only if the resulting shuffle is legal.
5028 if (isa<ShuffleVectorSDNode>(N0) &&
5029 isa<ShuffleVectorSDNode>(N1) &&
5030 // Avoid folding a node with illegal type.
5031 TLI.isTypeLegal(VT)) {
5032 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5033 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5034 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5035 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5036 // Ensure both shuffles have a zero input.
5037 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5038 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5039 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5040 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5041 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5042 bool CanFold = true;
5043 int NumElts = VT.getVectorNumElements();
5044 SmallVector<int, 4> Mask(NumElts);
5046 for (int i = 0; i != NumElts; ++i) {
5047 int M0 = SV0->getMaskElt(i);
5048 int M1 = SV1->getMaskElt(i);
5050 // Determine if either index is pointing to a zero vector.
5051 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5052 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5054 // If one element is zero and the otherside is undef, keep undef.
5055 // This also handles the case that both are undef.
5056 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5061 // Make sure only one of the elements is zero.
5062 if (M0Zero == M1Zero) {
5067 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5069 // We have a zero and non-zero element. If the non-zero came from
5070 // SV0 make the index a LHS index. If it came from SV1, make it
5071 // a RHS index. We need to mod by NumElts because we don't care
5072 // which operand it came from in the original shuffles.
5073 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5077 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5078 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5080 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5082 std::swap(NewLHS, NewRHS);
5083 ShuffleVectorSDNode::commuteMask(Mask);
5084 LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5088 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5094 // fold (or c1, c2) -> c1|c2
5095 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5096 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5097 if (N0C && N1C && !N1C->isOpaque())
5098 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5099 // canonicalize constant to RHS
5100 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5101 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5102 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5103 // fold (or x, 0) -> x
5104 if (isNullConstant(N1))
5106 // fold (or x, -1) -> -1
5107 if (isAllOnesConstant(N1))
5110 if (SDValue NewSel = foldBinOpIntoSelect(N))
5113 // fold (or x, c) -> c iff (x & ~c) == 0
5114 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5117 if (SDValue Combined = visitORLike(N0, N1, N))
5120 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5121 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5123 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5127 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
5130 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5131 // iff (c1 & c2) != 0.
5132 auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5133 return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
5135 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5136 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
5137 if (SDValue COR = DAG.FoldConstantArithmetic(
5138 ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5139 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5140 AddToWorklist(IOR.getNode());
5141 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5145 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
5146 if (N0.getOpcode() == N1.getOpcode())
5147 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
5150 // See if this is some rotate idiom.
5151 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5152 return SDValue(Rot, 0);
5154 if (SDValue Load = MatchLoadCombine(N))
5157 // Simplify the operands using demanded-bits information.
5158 if (SimplifyDemandedBits(SDValue(N, 0)))
5159 return SDValue(N, 0);
5164 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5165 if (Op.getOpcode() == ISD::AND &&
5166 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5167 Mask = Op.getOperand(1);
5168 return Op.getOperand(0);
5173 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5174 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5176 Op = stripConstantMask(DAG, Op, Mask);
5177 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5184 /// Helper function for visitOR to extract the needed side of a rotate idiom
5185 /// from a shl/srl/mul/udiv. This is meant to handle cases where
5186 /// InstCombine merged some outside op with one of the shifts from
5187 /// the rotate pattern.
5188 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5189 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5192 /// (or (mul v c0) (shrl (mul v c1) c2)):
5193 /// expands (mul v c0) -> (shl (mul v c1) c3)
5195 /// (or (udiv v c0) (shl (udiv v c1) c2)):
5196 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
5198 /// (or (shl v c0) (shrl (shl v c1) c2)):
5199 /// expands (shl v c0) -> (shl (shl v c1) c3)
5201 /// (or (shrl v c0) (shl (shrl v c1) c2)):
5202 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
5204 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5205 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5206 SDValue ExtractFrom, SDValue &Mask,
5208 assert(OppShift && ExtractFrom && "Empty SDValue");
5210 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5211 "Existing shift must be valid as a rotate half");
5213 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5215 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5217 // Find opcode of the needed shift to be extracted from (op0 v c0).
5218 unsigned Opcode = ISD::DELETED_NODE;
5219 bool IsMulOrDiv = false;
5220 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5221 // opcode or its arithmetic (mul or udiv) variant.
5222 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5223 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5224 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5226 Opcode = NeededShift;
5229 // op0 must be either the needed shift opcode or the mul/udiv equivalent
5230 // that the needed shift can be extracted from.
5231 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5232 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5235 // op0 must be the same opcode on both sides, have the same LHS argument,
5236 // and produce the same value type.
5237 SDValue OppShiftLHS = OppShift.getOperand(0);
5238 EVT ShiftedVT = OppShiftLHS.getValueType();
5239 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5240 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5241 ShiftedVT != ExtractFrom.getValueType())
5244 // Amount of the existing shift.
5245 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5246 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5247 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5248 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5249 ConstantSDNode *ExtractFromCst =
5250 isConstOrConstSplat(ExtractFrom.getOperand(1));
5251 // TODO: We should be able to handle non-uniform constant vectors for these values
5252 // Check that we have constant values.
5253 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5254 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5255 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5258 // Compute the shift amount we need to extract to complete the rotate.
5259 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5260 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5261 if (NeededShiftAmt.isNegative())
5263 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5264 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5265 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5266 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5268 // Now try extract the needed shift from the ExtractFrom op and see if the
5269 // result matches up with the existing shift's LHS op.
5271 // Op to extract from is a mul or udiv by a constant.
5273 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5274 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5275 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5276 NeededShiftAmt.getZExtValue());
5279 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5280 if (Rem != 0 || ResultAmt != OppLHSAmt)
5283 // Op to extract from is a shift by a constant.
5285 // c2 - (bitwidth(op0 v c0) - c1) == c0
5286 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5287 ExtractFromAmt.getBitWidth()))
5291 // Return the expanded shift op that should allow a rotate to be formed.
5292 EVT ShiftVT = OppShift.getOperand(1).getValueType();
5293 EVT ResVT = ExtractFrom.getValueType();
5294 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5295 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5298 // Return true if we can prove that, whenever Neg and Pos are both in the
5299 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
5300 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5302 // (or (shift1 X, Neg), (shift2 X, Pos))
5304 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5305 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
5306 // to consider shift amounts with defined behavior.
5307 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5308 SelectionDAG &DAG) {
5309 // If EltSize is a power of 2 then:
5311 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5312 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5314 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5315 // for the stronger condition:
5317 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
5319 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5320 // we can just replace Neg with Neg' for the rest of the function.
5322 // In other cases we check for the even stronger condition:
5324 // Neg == EltSize - Pos [B]
5326 // for all Neg and Pos. Note that the (or ...) then invokes undefined
5327 // behavior if Pos == 0 (and consequently Neg == EltSize).
5329 // We could actually use [A] whenever EltSize is a power of 2, but the
5330 // only extra cases that it would match are those uninteresting ones
5331 // where Neg and Pos are never in range at the same time. E.g. for
5332 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5333 // as well as (sub 32, Pos), but:
5335 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5337 // always invokes undefined behavior for 32-bit X.
5339 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5340 unsigned MaskLoBits = 0;
5341 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5342 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5344 DAG.computeKnownBits(Neg.getOperand(0), Known);
5345 unsigned Bits = Log2_64(EltSize);
5346 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5347 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5348 Neg = Neg.getOperand(0);
5354 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5355 if (Neg.getOpcode() != ISD::SUB)
5357 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5360 SDValue NegOp1 = Neg.getOperand(1);
5362 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5363 // Pos'. The truncation is redundant for the purpose of the equality.
5364 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5365 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5367 DAG.computeKnownBits(Pos.getOperand(0), Known);
5368 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5369 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5371 Pos = Pos.getOperand(0);
5375 // The condition we need is now:
5377 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5379 // If NegOp1 == Pos then we need:
5381 // EltSize & Mask == NegC & Mask
5383 // (because "x & Mask" is a truncation and distributes through subtraction).
5386 Width = NegC->getAPIntValue();
5388 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5389 // Then the condition we want to prove becomes:
5391 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5393 // which, again because "x & Mask" is a truncation, becomes:
5395 // NegC & Mask == (EltSize - PosC) & Mask
5396 // EltSize & Mask == (NegC + PosC) & Mask
5397 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5398 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5399 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5405 // Now we just need to check that EltSize & Mask == Width & Mask.
5407 // EltSize & Mask is 0 since Mask is EltSize - 1.
5408 return Width.getLoBits(MaskLoBits) == 0;
5409 return Width == EltSize;
5412 // A subroutine of MatchRotate used once we have found an OR of two opposite
5413 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
5414 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5415 // former being preferred if supported. InnerPos and InnerNeg are Pos and
5416 // Neg with outer conversions stripped away.
5417 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5418 SDValue Neg, SDValue InnerPos,
5419 SDValue InnerNeg, unsigned PosOpcode,
5420 unsigned NegOpcode, const SDLoc &DL) {
5421 // fold (or (shl x, (*ext y)),
5422 // (srl x, (*ext (sub 32, y)))) ->
5423 // (rotl x, y) or (rotr x, (sub 32, y))
5425 // fold (or (shl x, (*ext (sub 32, y))),
5426 // (srl x, (*ext y))) ->
5427 // (rotr x, y) or (rotl x, (sub 32, y))
5428 EVT VT = Shifted.getValueType();
5429 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5430 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5431 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5432 HasPos ? Pos : Neg).getNode();
5438 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
5439 // idioms for rotate, and if the target supports rotation instructions, generate
5441 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5442 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
5443 EVT VT = LHS.getValueType();
5444 if (!TLI.isTypeLegal(VT)) return nullptr;
5446 // The target must have at least one rotate flavor.
5447 bool HasROTL = hasOperation(ISD::ROTL, VT);
5448 bool HasROTR = hasOperation(ISD::ROTR, VT);
5449 if (!HasROTL && !HasROTR) return nullptr;
5451 // Check for truncated rotate.
5452 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5453 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5454 assert(LHS.getValueType() == RHS.getValueType());
5455 if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5456 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5457 SDValue(Rot, 0)).getNode();
5461 // Match "(X shl/srl V1) & V2" where V2 may not be present.
5462 SDValue LHSShift; // The shift.
5463 SDValue LHSMask; // AND value if any.
5464 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5466 SDValue RHSShift; // The shift.
5467 SDValue RHSMask; // AND value if any.
5468 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5470 // If neither side matched a rotate half, bail
5471 if (!LHSShift && !RHSShift)
5474 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5475 // side of the rotate, so try to handle that here. In all cases we need to
5476 // pass the matched shift from the opposite side to compute the opcode and
5477 // needed shift amount to extract. We still want to do this if both sides
5478 // matched a rotate half because one half may be a potential overshift that
5479 // can be broken down (ie if InstCombine merged two shl or srl ops into a
5482 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5484 if (SDValue NewRHSShift =
5485 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5486 RHSShift = NewRHSShift;
5487 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5489 if (SDValue NewLHSShift =
5490 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5491 LHSShift = NewLHSShift;
5493 // If a side is still missing, nothing else we can do.
5494 if (!RHSShift || !LHSShift)
5497 // At this point we've matched or extracted a shift op on each side.
5499 if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5500 return nullptr; // Not shifting the same value.
5502 if (LHSShift.getOpcode() == RHSShift.getOpcode())
5503 return nullptr; // Shifts must disagree.
5505 // Canonicalize shl to left side in a shl/srl pair.
5506 if (RHSShift.getOpcode() == ISD::SHL) {
5507 std::swap(LHS, RHS);
5508 std::swap(LHSShift, RHSShift);
5509 std::swap(LHSMask, RHSMask);
5512 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5513 SDValue LHSShiftArg = LHSShift.getOperand(0);
5514 SDValue LHSShiftAmt = LHSShift.getOperand(1);
5515 SDValue RHSShiftArg = RHSShift.getOperand(0);
5516 SDValue RHSShiftAmt = RHSShift.getOperand(1);
5518 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5519 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5520 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5521 ConstantSDNode *RHS) {
5522 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5524 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5525 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5526 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5528 // If there is an AND of either shifted operand, apply it to the result.
5529 if (LHSMask.getNode() || RHSMask.getNode()) {
5530 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5531 SDValue Mask = AllOnes;
5533 if (LHSMask.getNode()) {
5534 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5535 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5536 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5538 if (RHSMask.getNode()) {
5539 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5540 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5541 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5544 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5547 return Rot.getNode();
5550 // If there is a mask here, and we have a variable shift, we can't be sure
5551 // that we're masking out the right stuff.
5552 if (LHSMask.getNode() || RHSMask.getNode())
5555 // If the shift amount is sign/zext/any-extended just peel it off.
5556 SDValue LExtOp0 = LHSShiftAmt;
5557 SDValue RExtOp0 = RHSShiftAmt;
5558 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5559 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5560 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5561 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5562 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5563 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5564 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5565 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5566 LExtOp0 = LHSShiftAmt.getOperand(0);
5567 RExtOp0 = RHSShiftAmt.getOperand(0);
5570 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5571 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5575 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5576 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5585 /// Represents known origin of an individual byte in load combine pattern. The
5586 /// value of the byte is either constant zero or comes from memory.
5587 struct ByteProvider {
5588 // For constant zero providers Load is set to nullptr. For memory providers
5589 // Load represents the node which loads the byte from memory.
5590 // ByteOffset is the offset of the byte in the value produced by the load.
5591 LoadSDNode *Load = nullptr;
5592 unsigned ByteOffset = 0;
5594 ByteProvider() = default;
5596 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5597 return ByteProvider(Load, ByteOffset);
5600 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5602 bool isConstantZero() const { return !Load; }
5603 bool isMemory() const { return Load; }
5605 bool operator==(const ByteProvider &Other) const {
5606 return Other.Load == Load && Other.ByteOffset == ByteOffset;
5610 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5611 : Load(Load), ByteOffset(ByteOffset) {}
5614 } // end anonymous namespace
5616 /// Recursively traverses the expression calculating the origin of the requested
5617 /// byte of the given value. Returns None if the provider can't be calculated.
5619 /// For all the values except the root of the expression verifies that the value
5620 /// has exactly one use and if it's not true return None. This way if the origin
5621 /// of the byte is returned it's guaranteed that the values which contribute to
5622 /// the byte are not used outside of this expression.
5624 /// Because the parts of the expression are not allowed to have more than one
5625 /// use this function iterates over trees, not DAGs. So it never visits the same
5626 /// node more than once.
5627 static const Optional<ByteProvider>
5628 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5629 bool Root = false) {
5630 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5634 if (!Root && !Op.hasOneUse())
5637 assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5638 unsigned BitWidth = Op.getValueSizeInBits();
5639 if (BitWidth % 8 != 0)
5641 unsigned ByteWidth = BitWidth / 8;
5642 assert(Index < ByteWidth && "invalid index requested");
5645 switch (Op.getOpcode()) {
5647 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5650 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5654 if (LHS->isConstantZero())
5656 if (RHS->isConstantZero())
5661 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5665 uint64_t BitShift = ShiftOp->getZExtValue();
5666 if (BitShift % 8 != 0)
5668 uint64_t ByteShift = BitShift / 8;
5670 return Index < ByteShift
5671 ? ByteProvider::getConstantZero()
5672 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5675 case ISD::ANY_EXTEND:
5676 case ISD::SIGN_EXTEND:
5677 case ISD::ZERO_EXTEND: {
5678 SDValue NarrowOp = Op->getOperand(0);
5679 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5680 if (NarrowBitWidth % 8 != 0)
5682 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5684 if (Index >= NarrowByteWidth)
5685 return Op.getOpcode() == ISD::ZERO_EXTEND
5686 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5688 return calculateByteProvider(NarrowOp, Index, Depth + 1);
5691 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5694 auto L = cast<LoadSDNode>(Op.getNode());
5695 if (L->isVolatile() || L->isIndexed())
5698 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5699 if (NarrowBitWidth % 8 != 0)
5701 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5703 if (Index >= NarrowByteWidth)
5704 return L->getExtensionType() == ISD::ZEXTLOAD
5705 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5707 return ByteProvider::getMemory(L, Index);
5714 /// Match a pattern where a wide type scalar value is loaded by several narrow
5715 /// loads and combined by shifts and ors. Fold it into a single load or a load
5716 /// and a BSWAP if the targets supports it.
5718 /// Assuming little endian target:
5720 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5722 /// i32 val = *((i32)a)
5725 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5727 /// i32 val = BSWAP(*((i32)a))
5729 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5730 /// interact well with the worklist mechanism. When a part of the pattern is
5731 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5732 /// but the root node of the pattern which triggers the load combine is not
5733 /// necessarily a direct user of the changed node. For example, once the address
5734 /// of t28 load is reassociated load combine won't be triggered:
5735 /// t25: i32 = add t4, Constant:i32<2>
5736 /// t26: i64 = sign_extend t25
5737 /// t27: i64 = add t2, t26
5738 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5739 /// t29: i32 = zero_extend t28
5740 /// t32: i32 = shl t29, Constant:i8<8>
5741 /// t33: i32 = or t23, t32
5742 /// As a possible fix visitLoad can check if the load can be a part of a load
5743 /// combine pattern and add corresponding OR roots to the worklist.
5744 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
5745 assert(N->getOpcode() == ISD::OR &&
5746 "Can only match load combining against OR nodes");
5748 // Handles simple types only
5749 EVT VT = N->getValueType(0);
5750 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
5752 unsigned ByteWidth = VT.getSizeInBits() / 8;
5754 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5755 // Before legalize we can introduce too wide illegal loads which will be later
5756 // split into legal sized loads. This enables us to combine i64 load by i8
5757 // patterns to a couple of i32 loads on 32 bit targets.
5758 if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
5761 std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
5762 unsigned BW, unsigned i) { return i; };
5763 std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
5764 unsigned BW, unsigned i) { return BW - i - 1; };
5766 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
5767 auto MemoryByteOffset = [&] (ByteProvider P) {
5768 assert(P.isMemory() && "Must be a memory byte provider");
5769 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
5770 assert(LoadBitWidth % 8 == 0 &&
5771 "can only analyze providers for individual bytes not bit");
5772 unsigned LoadByteWidth = LoadBitWidth / 8;
5773 return IsBigEndianTarget
5774 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
5775 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
5778 Optional<BaseIndexOffset> Base;
5781 SmallPtrSet<LoadSDNode *, 8> Loads;
5782 Optional<ByteProvider> FirstByteProvider;
5783 int64_t FirstOffset = INT64_MAX;
5785 // Check if all the bytes of the OR we are looking at are loaded from the same
5786 // base address. Collect bytes offsets from Base address in ByteOffsets.
5787 SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
5788 for (unsigned i = 0; i < ByteWidth; i++) {
5789 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
5790 if (!P || !P->isMemory()) // All the bytes must be loaded from memory
5793 LoadSDNode *L = P->Load;
5794 assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
5795 "Must be enforced by calculateByteProvider");
5796 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
5798 // All loads must share the same chain
5799 SDValue LChain = L->getChain();
5802 else if (Chain != LChain)
5805 // Loads must share the same base address
5806 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
5807 int64_t ByteOffsetFromBase = 0;
5810 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
5813 // Calculate the offset of the current byte from the base address
5814 ByteOffsetFromBase += MemoryByteOffset(*P);
5815 ByteOffsets[i] = ByteOffsetFromBase;
5817 // Remember the first byte load
5818 if (ByteOffsetFromBase < FirstOffset) {
5819 FirstByteProvider = P;
5820 FirstOffset = ByteOffsetFromBase;
5825 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
5826 "memory, so there must be at least one load which produces the value");
5827 assert(Base && "Base address of the accessed memory location must be set");
5828 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
5830 // Check if the bytes of the OR we are looking at match with either big or
5831 // little endian value load
5832 bool BigEndian = true, LittleEndian = true;
5833 for (unsigned i = 0; i < ByteWidth; i++) {
5834 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
5835 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
5836 BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
5837 if (!BigEndian && !LittleEndian)
5840 assert((BigEndian != LittleEndian) && "should be either or");
5841 assert(FirstByteProvider && "must be set");
5843 // Ensure that the first byte is loaded from zero offset of the first load.
5844 // So the combined value can be loaded from the first load address.
5845 if (MemoryByteOffset(*FirstByteProvider) != 0)
5847 LoadSDNode *FirstLoad = FirstByteProvider->Load;
5849 // The node we are looking at matches with the pattern, check if we can
5850 // replace it with a single load and bswap if needed.
5852 // If the load needs byte swap check if the target supports it
5853 bool NeedsBswap = IsBigEndianTarget != BigEndian;
5855 // Before legalize we can introduce illegal bswaps which will be later
5856 // converted to an explicit bswap sequence. This way we end up with a single
5857 // load and byte shuffling instead of several loads and byte shuffling.
5858 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
5861 // Check that a load of the wide type is both allowed and fast on the target
5863 bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
5864 VT, FirstLoad->getAddressSpace(),
5865 FirstLoad->getAlignment(), &Fast);
5866 if (!Allowed || !Fast)
5870 DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
5871 FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
5873 // Transfer chain users from old loads to the new load.
5874 for (LoadSDNode *L : Loads)
5875 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
5877 return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
5880 // If the target has andn, bsl, or a similar bit-select instruction,
5881 // we want to unfold masked merge, with canonical pattern of:
5883 // ((x ^ y) & m) ^ y
5886 // (x & m) | (y & ~m)
5887 // If y is a constant, and the 'andn' does not work with immediates,
5888 // we unfold into a different pattern:
5889 // ~(~x & m) & (m | y)
5890 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
5891 // the very least that breaks andnpd / andnps patterns, and because those
5892 // patterns are simplified in IR and shouldn't be created in the DAG
5893 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
5894 assert(N->getOpcode() == ISD::XOR);
5896 // Don't touch 'not' (i.e. where y = -1).
5897 if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1)))
5900 EVT VT = N->getValueType(0);
5902 // There are 3 commutable operators in the pattern,
5903 // so we have to deal with 8 possible variants of the basic pattern.
5905 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
5906 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
5908 SDValue Xor = And.getOperand(XorIdx);
5909 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
5911 SDValue Xor0 = Xor.getOperand(0);
5912 SDValue Xor1 = Xor.getOperand(1);
5913 // Don't touch 'not' (i.e. where y = -1).
5914 if (isAllOnesConstantOrAllOnesSplatConstant(Xor1))
5917 std::swap(Xor0, Xor1);
5922 M = And.getOperand(XorIdx ? 0 : 1);
5926 SDValue N0 = N->getOperand(0);
5927 SDValue N1 = N->getOperand(1);
5928 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
5929 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
5932 // Don't do anything if the mask is constant. This should not be reachable.
5933 // InstCombine should have already unfolded this pattern, and DAGCombiner
5934 // probably shouldn't produce it, too.
5935 if (isa<ConstantSDNode>(M.getNode()))
5938 // We can transform if the target has AndNot
5939 if (!TLI.hasAndNot(M))
5944 // If Y is a constant, check that 'andn' works with immediates.
5945 if (!TLI.hasAndNot(Y)) {
5946 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
5947 // If not, we need to do a bit more work to make sure andn is still used.
5948 SDValue NotX = DAG.getNOT(DL, X, VT);
5949 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
5950 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
5951 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
5952 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
5955 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
5956 SDValue NotM = DAG.getNOT(DL, M, VT);
5957 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
5959 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
5962 SDValue DAGCombiner::visitXOR(SDNode *N) {
5963 SDValue N0 = N->getOperand(0);
5964 SDValue N1 = N->getOperand(1);
5965 EVT VT = N0.getValueType();
5968 if (VT.isVector()) {
5969 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5972 // fold (xor x, 0) -> x, vector edition
5973 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5975 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5979 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
5980 if (N0.isUndef() && N1.isUndef())
5981 return DAG.getConstant(0, SDLoc(N), VT);
5982 // fold (xor x, undef) -> undef
5987 // fold (xor c1, c2) -> c1^c2
5988 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5989 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
5991 return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
5992 // canonicalize constant to RHS
5993 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5994 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5995 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
5996 // fold (xor x, 0) -> x
5997 if (isNullConstant(N1))
6000 if (SDValue NewSel = foldBinOpIntoSelect(N))
6004 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
6007 // fold !(x cc y) -> (x !cc y)
6008 SDValue LHS, RHS, CC;
6009 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6010 bool isInt = LHS.getValueType().isInteger();
6011 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6014 if (!LegalOperations ||
6015 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6016 switch (N0.getOpcode()) {
6018 llvm_unreachable("Unhandled SetCC Equivalent!");
6020 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6021 case ISD::SELECT_CC:
6022 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6023 N0.getOperand(3), NotCC);
6028 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6029 if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
6030 N0.getNode()->hasOneUse() &&
6031 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6032 SDValue V = N0.getOperand(0);
6034 V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
6035 DAG.getConstant(1, DL, V.getValueType()));
6036 AddToWorklist(V.getNode());
6037 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
6040 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6041 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6042 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6043 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6044 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6045 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6046 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6047 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6048 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6049 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6052 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6053 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6054 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
6055 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6056 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6057 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
6058 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6059 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6060 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6061 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
6064 // fold (xor (and x, y), y) -> (and (not x), y)
6065 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6066 N0->getOperand(1) == N1) {
6067 SDValue X = N0->getOperand(0);
6068 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6069 AddToWorklist(NotX.getNode());
6070 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
6073 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6074 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6075 SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1;
6076 SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1;
6077 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6078 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6079 SDValue S0 = S.getOperand(0);
6080 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6081 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6082 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6083 if (C->getAPIntValue() == (OpSizeInBits - 1))
6084 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
6089 // fold (xor x, x) -> 0
6091 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
6093 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6094 // Here is a concrete example of this equivalence:
6096 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
6097 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6101 // i16 ~1 == 0b1111111111111110
6102 // i16 rol(~1, 14) == 0b1011111111111111
6104 // Some additional tips to help conceptualize this transform:
6105 // - Try to see the operation as placing a single zero in a value of all ones.
6106 // - There exists no value for x which would allow the result to contain zero.
6107 // - Values of x larger than the bitwidth are undefined and do not require a
6108 // consistent result.
6109 // - Pushing the zero left requires shifting one bits in from the right.
6110 // A rotate left of ~1 is a nice way of achieving the desired result.
6111 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
6112 && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6114 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6118 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
6119 if (N0.getOpcode() == N1.getOpcode())
6120 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
6123 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
6124 if (SDValue MM = unfoldMaskedMerge(N))
6127 // Simplify the expression using non-local knowledge.
6128 if (SimplifyDemandedBits(SDValue(N, 0)))
6129 return SDValue(N, 0);
6134 /// Handle transforms common to the three shifts, when the shift amount is a
6136 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6137 SDNode *LHS = N->getOperand(0).getNode();
6138 if (!LHS->hasOneUse()) return SDValue();
6140 // We want to pull some binops through shifts, so that we have (and (shift))
6141 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
6142 // thing happens with address calculations, so it's important to canonicalize
6144 bool HighBitSet = false; // Can we transform this if the high bit is set?
6146 switch (LHS->getOpcode()) {
6147 default: return SDValue();
6150 HighBitSet = false; // We can only transform sra if the high bit is clear.
6153 HighBitSet = true; // We can only transform sra if the high bit is set.
6156 if (N->getOpcode() != ISD::SHL)
6157 return SDValue(); // only shl(add) not sr[al](add).
6158 HighBitSet = false; // We can only transform sra if the high bit is clear.
6162 // We require the RHS of the binop to be a constant and not opaque as well.
6163 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6164 if (!BinOpCst) return SDValue();
6166 // FIXME: disable this unless the input to the binop is a shift by a constant
6167 // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6168 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6169 bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6170 BinOpLHSVal->getOpcode() == ISD::SRA ||
6171 BinOpLHSVal->getOpcode() == ISD::SRL;
6172 bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6173 BinOpLHSVal->getOpcode() == ISD::SELECT;
6175 if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6179 if (isCopyOrSelect && N->hasOneUse())
6182 EVT VT = N->getValueType(0);
6184 // If this is a signed shift right, and the high bit is modified by the
6185 // logical operation, do not perform the transformation. The highBitSet
6186 // boolean indicates the value of the high bit of the constant which would
6187 // cause it to be modified for this operation.
6188 if (N->getOpcode() == ISD::SRA) {
6189 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6190 if (BinOpRHSSignSet != HighBitSet)
6194 if (!TLI.isDesirableToCommuteWithShift(LHS))
6197 // Fold the constants, shifting the binop RHS by the shift amount.
6198 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6200 LHS->getOperand(1), N->getOperand(1));
6201 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6203 // Create the new shift.
6204 SDValue NewShift = DAG.getNode(N->getOpcode(),
6205 SDLoc(LHS->getOperand(0)),
6206 VT, LHS->getOperand(0), N->getOperand(1));
6208 // Create the new binop.
6209 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6212 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6213 assert(N->getOpcode() == ISD::TRUNCATE);
6214 assert(N->getOperand(0).getOpcode() == ISD::AND);
6216 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6217 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
6218 SDValue N01 = N->getOperand(0).getOperand(1);
6219 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6221 EVT TruncVT = N->getValueType(0);
6222 SDValue N00 = N->getOperand(0).getOperand(0);
6223 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6224 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6225 AddToWorklist(Trunc00.getNode());
6226 AddToWorklist(Trunc01.getNode());
6227 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6234 SDValue DAGCombiner::visitRotate(SDNode *N) {
6236 SDValue N0 = N->getOperand(0);
6237 SDValue N1 = N->getOperand(1);
6238 EVT VT = N->getValueType(0);
6239 unsigned Bitsize = VT.getScalarSizeInBits();
6241 // fold (rot x, 0) -> x
6242 if (isNullConstantOrNullSplatConstant(N1))
6245 // fold (rot x, c) -> (rot x, c % BitSize)
6246 if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6247 if (Cst->getAPIntValue().uge(Bitsize)) {
6248 uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6249 return DAG.getNode(N->getOpcode(), dl, VT, N0,
6250 DAG.getConstant(RotAmt, dl, N1.getValueType()));
6254 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6255 if (N1.getOpcode() == ISD::TRUNCATE &&
6256 N1.getOperand(0).getOpcode() == ISD::AND) {
6257 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6258 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6261 unsigned NextOp = N0.getOpcode();
6262 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6263 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6264 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6265 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6266 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6267 EVT ShiftVT = C1->getValueType(0);
6268 bool SameSide = (N->getOpcode() == NextOp);
6269 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6270 if (SDValue CombinedShift =
6271 DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6272 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6273 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6274 ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6275 BitsizeC.getNode());
6276 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6284 SDValue DAGCombiner::visitSHL(SDNode *N) {
6285 SDValue N0 = N->getOperand(0);
6286 SDValue N1 = N->getOperand(1);
6287 EVT VT = N0.getValueType();
6288 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6291 if (VT.isVector()) {
6292 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6295 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6296 // If setcc produces all-one true value then:
6297 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6298 if (N1CV && N1CV->isConstant()) {
6299 if (N0.getOpcode() == ISD::AND) {
6300 SDValue N00 = N0->getOperand(0);
6301 SDValue N01 = N0->getOperand(1);
6302 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6304 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6305 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6306 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6307 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6309 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6315 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6317 // fold (shl c1, c2) -> c1<<c2
6318 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6319 if (N0C && N1C && !N1C->isOpaque())
6320 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6321 // fold (shl 0, x) -> 0
6322 if (isNullConstantOrNullSplatConstant(N0))
6324 // fold (shl x, c >= size(x)) -> undef
6325 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6326 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6327 return Val->getAPIntValue().uge(OpSizeInBits);
6329 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6330 return DAG.getUNDEF(VT);
6331 // fold (shl x, 0) -> x
6332 if (N1C && N1C->isNullValue())
6334 // fold (shl undef, x) -> 0
6336 return DAG.getConstant(0, SDLoc(N), VT);
6338 if (SDValue NewSel = foldBinOpIntoSelect(N))
6341 // if (shl x, c) is known to be zero, return 0
6342 if (DAG.MaskedValueIsZero(SDValue(N, 0),
6343 APInt::getAllOnesValue(OpSizeInBits)))
6344 return DAG.getConstant(0, SDLoc(N), VT);
6345 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6346 if (N1.getOpcode() == ISD::TRUNCATE &&
6347 N1.getOperand(0).getOpcode() == ISD::AND) {
6348 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6349 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6352 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6353 return SDValue(N, 0);
6355 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6356 if (N0.getOpcode() == ISD::SHL) {
6357 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6358 ConstantSDNode *RHS) {
6359 APInt c1 = LHS->getAPIntValue();
6360 APInt c2 = RHS->getAPIntValue();
6361 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6362 return (c1 + c2).uge(OpSizeInBits);
6364 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6365 return DAG.getConstant(0, SDLoc(N), VT);
6367 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6368 ConstantSDNode *RHS) {
6369 APInt c1 = LHS->getAPIntValue();
6370 APInt c2 = RHS->getAPIntValue();
6371 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6372 return (c1 + c2).ult(OpSizeInBits);
6374 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6376 EVT ShiftVT = N1.getValueType();
6377 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6378 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6382 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6383 // For this to be valid, the second form must not preserve any of the bits
6384 // that are shifted out by the inner shift in the first form. This means
6385 // the outer shift size must be >= the number of bits added by the ext.
6386 // As a corollary, we don't care what kind of ext it is.
6387 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6388 N0.getOpcode() == ISD::ANY_EXTEND ||
6389 N0.getOpcode() == ISD::SIGN_EXTEND) &&
6390 N0.getOperand(0).getOpcode() == ISD::SHL) {
6391 SDValue N0Op0 = N0.getOperand(0);
6392 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6393 APInt c1 = N0Op0C1->getAPIntValue();
6394 APInt c2 = N1C->getAPIntValue();
6395 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6397 EVT InnerShiftVT = N0Op0.getValueType();
6398 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6399 if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6401 APInt Sum = c1 + c2;
6402 if (Sum.uge(OpSizeInBits))
6403 return DAG.getConstant(0, DL, VT);
6407 DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6408 DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6413 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6414 // Only fold this if the inner zext has no other uses to avoid increasing
6415 // the total number of instructions.
6416 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6417 N0.getOperand(0).getOpcode() == ISD::SRL) {
6418 SDValue N0Op0 = N0.getOperand(0);
6419 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6420 if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6421 uint64_t c1 = N0Op0C1->getZExtValue();
6422 uint64_t c2 = N1C->getZExtValue();
6424 SDValue NewOp0 = N0.getOperand(0);
6425 EVT CountVT = NewOp0.getOperand(1).getValueType();
6427 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6429 DAG.getConstant(c2, DL, CountVT));
6430 AddToWorklist(NewSHL.getNode());
6431 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6437 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
6438 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
6439 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6440 N0->getFlags().hasExact()) {
6441 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6442 uint64_t C1 = N0C1->getZExtValue();
6443 uint64_t C2 = N1C->getZExtValue();
6446 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6447 DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6448 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6449 DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6453 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6454 // (and (srl x, (sub c1, c2), MASK)
6455 // Only fold this if the inner shift has no other uses -- if it does, folding
6456 // this will increase the total number of instructions.
6457 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6458 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6459 uint64_t c1 = N0C1->getZExtValue();
6460 if (c1 < OpSizeInBits) {
6461 uint64_t c2 = N1C->getZExtValue();
6462 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6467 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6468 DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6470 Mask.lshrInPlace(c1 - c2);
6472 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6473 DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6476 return DAG.getNode(ISD::AND, DL, VT, Shift,
6477 DAG.getConstant(Mask, DL, VT));
6482 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6483 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6484 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6486 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6487 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6488 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6491 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6492 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6493 // Variant of version done on multiply, except mul by a power of 2 is turned
6495 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6496 N0.getNode()->hasOneUse() &&
6497 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6498 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6499 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6500 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6501 AddToWorklist(Shl0.getNode());
6502 AddToWorklist(Shl1.getNode());
6503 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6506 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6507 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6508 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6509 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6510 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6511 if (isConstantOrConstantVector(Shl))
6512 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6515 if (N1C && !N1C->isOpaque())
6516 if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6522 SDValue DAGCombiner::visitSRA(SDNode *N) {
6523 SDValue N0 = N->getOperand(0);
6524 SDValue N1 = N->getOperand(1);
6525 EVT VT = N0.getValueType();
6526 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6528 // Arithmetic shifting an all-sign-bit value is a no-op.
6529 // fold (sra 0, x) -> 0
6530 // fold (sra -1, x) -> -1
6531 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6536 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6539 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6541 // fold (sra c1, c2) -> (sra c1, c2)
6542 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6543 if (N0C && N1C && !N1C->isOpaque())
6544 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6545 // fold (sra x, c >= size(x)) -> undef
6546 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6547 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6548 return Val->getAPIntValue().uge(OpSizeInBits);
6550 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6551 return DAG.getUNDEF(VT);
6552 // fold (sra x, 0) -> x
6553 if (N1C && N1C->isNullValue())
6556 if (SDValue NewSel = foldBinOpIntoSelect(N))
6559 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6561 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6562 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6563 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6565 ExtVT = EVT::getVectorVT(*DAG.getContext(),
6566 ExtVT, VT.getVectorNumElements());
6567 if ((!LegalOperations ||
6568 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6569 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6570 N0.getOperand(0), DAG.getValueType(ExtVT));
6573 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6574 if (N0.getOpcode() == ISD::SRA) {
6576 EVT ShiftVT = N1.getValueType();
6578 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6579 ConstantSDNode *RHS) {
6580 APInt c1 = LHS->getAPIntValue();
6581 APInt c2 = RHS->getAPIntValue();
6582 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6583 return (c1 + c2).uge(OpSizeInBits);
6585 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6586 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
6587 DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
6589 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6590 ConstantSDNode *RHS) {
6591 APInt c1 = LHS->getAPIntValue();
6592 APInt c2 = RHS->getAPIntValue();
6593 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6594 return (c1 + c2).ult(OpSizeInBits);
6596 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6597 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6598 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
6602 // fold (sra (shl X, m), (sub result_size, n))
6603 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6604 // result_size - n != m.
6605 // If truncate is free for the target sext(shl) is likely to result in better
6607 if (N0.getOpcode() == ISD::SHL && N1C) {
6608 // Get the two constanst of the shifts, CN0 = m, CN = n.
6609 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6611 LLVMContext &Ctx = *DAG.getContext();
6612 // Determine what the truncate's result bitsize and type would be.
6613 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6616 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6618 // Determine the residual right-shift amount.
6619 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6621 // If the shift is not a no-op (in which case this should be just a sign
6622 // extend already), the truncated to type is legal, sign_extend is legal
6623 // on that type, and the truncate to that type is both legal and free,
6624 // perform the transform.
6625 if ((ShiftAmt > 0) &&
6626 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6627 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6628 TLI.isTruncateFree(VT, TruncVT)) {
6630 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6631 getShiftAmountTy(N0.getOperand(0).getValueType()));
6632 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6633 N0.getOperand(0), Amt);
6634 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6636 return DAG.getNode(ISD::SIGN_EXTEND, DL,
6637 N->getValueType(0), Trunc);
6642 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6643 if (N1.getOpcode() == ISD::TRUNCATE &&
6644 N1.getOperand(0).getOpcode() == ISD::AND) {
6645 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6646 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6649 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6650 // if c1 is equal to the number of bits the trunc removes
6651 if (N0.getOpcode() == ISD::TRUNCATE &&
6652 (N0.getOperand(0).getOpcode() == ISD::SRL ||
6653 N0.getOperand(0).getOpcode() == ISD::SRA) &&
6654 N0.getOperand(0).hasOneUse() &&
6655 N0.getOperand(0).getOperand(1).hasOneUse() &&
6657 SDValue N0Op0 = N0.getOperand(0);
6658 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6659 unsigned LargeShiftVal = LargeShift->getZExtValue();
6660 EVT LargeVT = N0Op0.getValueType();
6662 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6665 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6666 getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6667 SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6668 N0Op0.getOperand(0), Amt);
6669 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6674 // Simplify, based on bits shifted out of the LHS.
6675 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6676 return SDValue(N, 0);
6678 // If the sign bit is known to be zero, switch this to a SRL.
6679 if (DAG.SignBitIsZero(N0))
6680 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6682 if (N1C && !N1C->isOpaque())
6683 if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6689 SDValue DAGCombiner::visitSRL(SDNode *N) {
6690 SDValue N0 = N->getOperand(0);
6691 SDValue N1 = N->getOperand(1);
6692 EVT VT = N0.getValueType();
6693 unsigned OpSizeInBits = VT.getScalarSizeInBits();
6697 if (SDValue FoldedVOp = SimplifyVBinOp(N))
6700 ConstantSDNode *N1C = isConstOrConstSplat(N1);
6702 // fold (srl c1, c2) -> c1 >>u c2
6703 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6704 if (N0C && N1C && !N1C->isOpaque())
6705 return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6706 // fold (srl 0, x) -> 0
6707 if (isNullConstantOrNullSplatConstant(N0))
6709 // fold (srl x, c >= size(x)) -> undef
6710 // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
6711 auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
6712 return Val->getAPIntValue().uge(OpSizeInBits);
6714 if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig))
6715 return DAG.getUNDEF(VT);
6716 // fold (srl x, 0) -> x
6717 if (N1C && N1C->isNullValue())
6720 if (SDValue NewSel = foldBinOpIntoSelect(N))
6723 // if (srl x, c) is known to be zero, return 0
6724 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6725 APInt::getAllOnesValue(OpSizeInBits)))
6726 return DAG.getConstant(0, SDLoc(N), VT);
6728 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6729 if (N0.getOpcode() == ISD::SRL) {
6730 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6731 ConstantSDNode *RHS) {
6732 APInt c1 = LHS->getAPIntValue();
6733 APInt c2 = RHS->getAPIntValue();
6734 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6735 return (c1 + c2).uge(OpSizeInBits);
6737 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6738 return DAG.getConstant(0, SDLoc(N), VT);
6740 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6741 ConstantSDNode *RHS) {
6742 APInt c1 = LHS->getAPIntValue();
6743 APInt c2 = RHS->getAPIntValue();
6744 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6745 return (c1 + c2).ult(OpSizeInBits);
6747 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6749 EVT ShiftVT = N1.getValueType();
6750 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6751 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
6755 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
6756 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
6757 N0.getOperand(0).getOpcode() == ISD::SRL) {
6758 if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
6759 uint64_t c1 = N001C->getZExtValue();
6760 uint64_t c2 = N1C->getZExtValue();
6761 EVT InnerShiftVT = N0.getOperand(0).getValueType();
6762 EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
6763 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6764 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
6765 if (c1 + OpSizeInBits == InnerShiftSize) {
6767 if (c1 + c2 >= InnerShiftSize)
6768 return DAG.getConstant(0, DL, VT);
6769 return DAG.getNode(ISD::TRUNCATE, DL, VT,
6770 DAG.getNode(ISD::SRL, DL, InnerShiftVT,
6771 N0.getOperand(0).getOperand(0),
6772 DAG.getConstant(c1 + c2, DL,
6778 // fold (srl (shl x, c), c) -> (and x, cst2)
6779 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
6780 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
6783 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
6784 AddToWorklist(Mask.getNode());
6785 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
6788 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
6789 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6790 // Shifting in all undef bits?
6791 EVT SmallVT = N0.getOperand(0).getValueType();
6792 unsigned BitSize = SmallVT.getScalarSizeInBits();
6793 if (N1C->getZExtValue() >= BitSize)
6794 return DAG.getUNDEF(VT);
6796 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
6797 uint64_t ShiftAmt = N1C->getZExtValue();
6799 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
6801 DAG.getConstant(ShiftAmt, DL0,
6802 getShiftAmountTy(SmallVT)));
6803 AddToWorklist(SmallShift.getNode());
6804 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
6806 return DAG.getNode(ISD::AND, DL, VT,
6807 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
6808 DAG.getConstant(Mask, DL, VT));
6812 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
6813 // bit, which is unmodified by sra.
6814 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
6815 if (N0.getOpcode() == ISD::SRA)
6816 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
6819 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
6820 if (N1C && N0.getOpcode() == ISD::CTLZ &&
6821 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
6823 DAG.computeKnownBits(N0.getOperand(0), Known);
6825 // If any of the input bits are KnownOne, then the input couldn't be all
6826 // zeros, thus the result of the srl will always be zero.
6827 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
6829 // If all of the bits input the to ctlz node are known to be zero, then
6830 // the result of the ctlz is "32" and the result of the shift is one.
6831 APInt UnknownBits = ~Known.Zero;
6832 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
6834 // Otherwise, check to see if there is exactly one bit input to the ctlz.
6835 if (UnknownBits.isPowerOf2()) {
6836 // Okay, we know that only that the single bit specified by UnknownBits
6837 // could be set on input to the CTLZ node. If this bit is set, the SRL
6838 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
6839 // to an SRL/XOR pair, which is likely to simplify more.
6840 unsigned ShAmt = UnknownBits.countTrailingZeros();
6841 SDValue Op = N0.getOperand(0);
6845 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
6846 DAG.getConstant(ShAmt, DL,
6847 getShiftAmountTy(Op.getValueType())));
6848 AddToWorklist(Op.getNode());
6852 return DAG.getNode(ISD::XOR, DL, VT,
6853 Op, DAG.getConstant(1, DL, VT));
6857 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
6858 if (N1.getOpcode() == ISD::TRUNCATE &&
6859 N1.getOperand(0).getOpcode() == ISD::AND) {
6860 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6861 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
6864 // fold operands of srl based on knowledge that the low bits are not
6866 if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6867 return SDValue(N, 0);
6869 if (N1C && !N1C->isOpaque())
6870 if (SDValue NewSRL = visitShiftByConstant(N, N1C))
6873 // Attempt to convert a srl of a load into a narrower zero-extending load.
6874 if (SDValue NarrowLoad = ReduceLoadWidth(N))
6877 // Here is a common situation. We want to optimize:
6880 // %b = and i32 %a, 2
6881 // %c = srl i32 %b, 1
6882 // brcond i32 %c ...
6888 // %c = setcc eq %b, 0
6891 // However when after the source operand of SRL is optimized into AND, the SRL
6892 // itself may not be optimized further. Look for it and add the BRCOND into
6894 if (N->hasOneUse()) {
6895 SDNode *Use = *N->use_begin();
6896 if (Use->getOpcode() == ISD::BRCOND)
6898 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
6899 // Also look pass the truncate.
6900 Use = *Use->use_begin();
6901 if (Use->getOpcode() == ISD::BRCOND)
6909 SDValue DAGCombiner::visitABS(SDNode *N) {
6910 SDValue N0 = N->getOperand(0);
6911 EVT VT = N->getValueType(0);
6913 // fold (abs c1) -> c2
6914 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6915 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
6916 // fold (abs (abs x)) -> (abs x)
6917 if (N0.getOpcode() == ISD::ABS)
6919 // fold (abs x) -> x iff not-negative
6920 if (DAG.SignBitIsZero(N0))
6925 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
6926 SDValue N0 = N->getOperand(0);
6927 EVT VT = N->getValueType(0);
6929 // fold (bswap c1) -> c2
6930 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6931 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
6932 // fold (bswap (bswap x)) -> x
6933 if (N0.getOpcode() == ISD::BSWAP)
6934 return N0->getOperand(0);
6938 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
6939 SDValue N0 = N->getOperand(0);
6940 EVT VT = N->getValueType(0);
6942 // fold (bitreverse c1) -> c2
6943 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6944 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
6945 // fold (bitreverse (bitreverse x)) -> x
6946 if (N0.getOpcode() == ISD::BITREVERSE)
6947 return N0.getOperand(0);
6951 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
6952 SDValue N0 = N->getOperand(0);
6953 EVT VT = N->getValueType(0);
6955 // fold (ctlz c1) -> c2
6956 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6957 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
6959 // If the value is known never to be zero, switch to the undef version.
6960 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
6961 if (DAG.isKnownNeverZero(N0))
6962 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6968 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
6969 SDValue N0 = N->getOperand(0);
6970 EVT VT = N->getValueType(0);
6972 // fold (ctlz_zero_undef c1) -> c2
6973 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6974 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6978 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
6979 SDValue N0 = N->getOperand(0);
6980 EVT VT = N->getValueType(0);
6982 // fold (cttz c1) -> c2
6983 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
6984 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
6986 // If the value is known never to be zero, switch to the undef version.
6987 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
6988 if (DAG.isKnownNeverZero(N0))
6989 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
6995 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
6996 SDValue N0 = N->getOperand(0);
6997 EVT VT = N->getValueType(0);
6999 // fold (cttz_zero_undef c1) -> c2
7000 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7001 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7005 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7006 SDValue N0 = N->getOperand(0);
7007 EVT VT = N->getValueType(0);
7009 // fold (ctpop c1) -> c2
7010 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7011 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7015 /// Generate Min/Max node
7016 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7017 SDValue RHS, SDValue True, SDValue False,
7018 ISD::CondCode CC, const TargetLowering &TLI,
7019 SelectionDAG &DAG) {
7020 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7030 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7031 if (TLI.isOperationLegal(Opcode, VT))
7032 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7041 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7042 if (TLI.isOperationLegal(Opcode, VT))
7043 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7051 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7052 SDValue Cond = N->getOperand(0);
7053 SDValue N1 = N->getOperand(1);
7054 SDValue N2 = N->getOperand(2);
7055 EVT VT = N->getValueType(0);
7056 EVT CondVT = Cond.getValueType();
7059 if (!VT.isInteger())
7062 auto *C1 = dyn_cast<ConstantSDNode>(N1);
7063 auto *C2 = dyn_cast<ConstantSDNode>(N2);
7067 // Only do this before legalization to avoid conflicting with target-specific
7068 // transforms in the other direction (create a select from a zext/sext). There
7069 // is also a target-independent combine here in DAGCombiner in the other
7070 // direction for (select Cond, -1, 0) when the condition is not i1.
7071 if (CondVT == MVT::i1 && !LegalOperations) {
7072 if (C1->isNullValue() && C2->isOne()) {
7073 // select Cond, 0, 1 --> zext (!Cond)
7074 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7076 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7079 if (C1->isNullValue() && C2->isAllOnesValue()) {
7080 // select Cond, 0, -1 --> sext (!Cond)
7081 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7083 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7086 if (C1->isOne() && C2->isNullValue()) {
7087 // select Cond, 1, 0 --> zext (Cond)
7089 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7092 if (C1->isAllOnesValue() && C2->isNullValue()) {
7093 // select Cond, -1, 0 --> sext (Cond)
7095 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7099 // For any constants that differ by 1, we can transform the select into an
7100 // extend and add. Use a target hook because some targets may prefer to
7101 // transform in the other direction.
7102 if (TLI.convertSelectOfConstantsToMath(VT)) {
7103 if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7104 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7106 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7107 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7109 if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7110 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7112 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7113 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7120 // fold (select Cond, 0, 1) -> (xor Cond, 1)
7121 // We can't do this reliably if integer based booleans have different contents
7122 // to floating point based booleans. This is because we can't tell whether we
7123 // have an integer-based boolean or a floating-point-based boolean unless we
7124 // can find the SETCC that produced it and inspect its operands. This is
7125 // fairly easy if C is the SETCC node, but it can potentially be
7126 // undiscoverable (or not reasonably discoverable). For example, it could be
7127 // in another basic block or it could require searching a complicated
7129 if (CondVT.isInteger() &&
7130 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7131 TargetLowering::ZeroOrOneBooleanContent &&
7132 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7133 TargetLowering::ZeroOrOneBooleanContent &&
7134 C1->isNullValue() && C2->isOne()) {
7136 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7137 if (VT.bitsEq(CondVT))
7139 return DAG.getZExtOrTrunc(NotCond, DL, VT);
7145 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7146 SDValue N0 = N->getOperand(0);
7147 SDValue N1 = N->getOperand(1);
7148 SDValue N2 = N->getOperand(2);
7149 EVT VT = N->getValueType(0);
7150 EVT VT0 = N0.getValueType();
7153 // fold (select C, X, X) -> X
7157 if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
7158 // fold (select true, X, Y) -> X
7159 // fold (select false, X, Y) -> Y
7160 return !N0C->isNullValue() ? N1 : N2;
7163 // fold (select X, X, Y) -> (or X, Y)
7164 // fold (select X, 1, Y) -> (or C, Y)
7165 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7166 return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7168 if (SDValue V = foldSelectOfConstants(N))
7171 // fold (select C, 0, X) -> (and (not C), X)
7172 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7173 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7174 AddToWorklist(NOTNode.getNode());
7175 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7177 // fold (select C, X, 1) -> (or (not C), X)
7178 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7179 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7180 AddToWorklist(NOTNode.getNode());
7181 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7183 // fold (select X, Y, X) -> (and X, Y)
7184 // fold (select X, Y, 0) -> (and X, Y)
7185 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7186 return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7188 // If we can fold this based on the true/false value, do so.
7189 if (SimplifySelectOps(N, N1, N2))
7190 return SDValue(N, 0); // Don't revisit N.
7192 if (VT0 == MVT::i1) {
7193 // The code in this block deals with the following 2 equivalences:
7194 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7195 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7196 // The target can specify its preferred form with the
7197 // shouldNormalizeToSelectSequence() callback. However we always transform
7198 // to the right anyway if we find the inner select exists in the DAG anyway
7199 // and we always transform to the left side if we know that we can further
7200 // optimize the combination of the conditions.
7201 bool normalizeToSequence =
7202 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7203 // select (and Cond0, Cond1), X, Y
7204 // -> select Cond0, (select Cond1, X, Y), Y
7205 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7206 SDValue Cond0 = N0->getOperand(0);
7207 SDValue Cond1 = N0->getOperand(1);
7208 SDValue InnerSelect =
7209 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7210 if (normalizeToSequence || !InnerSelect.use_empty())
7211 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7214 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7215 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7216 SDValue Cond0 = N0->getOperand(0);
7217 SDValue Cond1 = N0->getOperand(1);
7218 SDValue InnerSelect =
7219 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7220 if (normalizeToSequence || !InnerSelect.use_empty())
7221 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7225 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7226 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7227 SDValue N1_0 = N1->getOperand(0);
7228 SDValue N1_1 = N1->getOperand(1);
7229 SDValue N1_2 = N1->getOperand(2);
7230 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7231 // Create the actual and node if we can generate good code for it.
7232 if (!normalizeToSequence) {
7233 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7234 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7236 // Otherwise see if we can optimize the "and" to a better pattern.
7237 if (SDValue Combined = visitANDLike(N0, N1_0, N))
7238 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7242 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7243 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7244 SDValue N2_0 = N2->getOperand(0);
7245 SDValue N2_1 = N2->getOperand(1);
7246 SDValue N2_2 = N2->getOperand(2);
7247 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7248 // Create the actual or node if we can generate good code for it.
7249 if (!normalizeToSequence) {
7250 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7251 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7253 // Otherwise see if we can optimize to a better pattern.
7254 if (SDValue Combined = visitORLike(N0, N2_0, N))
7255 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7261 if (VT0 == MVT::i1) {
7262 // select (not Cond), N1, N2 -> select Cond, N2, N1
7263 if (isBitwiseNot(N0))
7264 return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
7267 // fold selects based on a setcc into other things, such as min/max/abs
7268 if (N0.getOpcode() == ISD::SETCC) {
7269 // select x, y (fcmp lt x, y) -> fminnum x, y
7270 // select x, y (fcmp gt x, y) -> fmaxnum x, y
7272 // This is OK if we don't care about what happens if either operand is a
7276 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
7277 // no signed zeros as well as no nans.
7278 const TargetOptions &Options = DAG.getTarget().Options;
7279 if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() &&
7280 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
7281 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7283 if (SDValue FMinMax = combineMinNumMaxNum(
7284 DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
7288 if ((!LegalOperations &&
7289 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
7290 TLI.isOperationLegal(ISD::SELECT_CC, VT))
7291 return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0),
7292 N0.getOperand(1), N1, N2, N0.getOperand(2));
7293 return SimplifySelect(DL, N0, N1, N2);
7300 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7303 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7305 // Split the inputs.
7306 SDValue Lo, Hi, LL, LH, RL, RH;
7307 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7308 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7310 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7311 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7313 return std::make_pair(Lo, Hi);
7316 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7317 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7318 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7320 SDValue Cond = N->getOperand(0);
7321 SDValue LHS = N->getOperand(1);
7322 SDValue RHS = N->getOperand(2);
7323 EVT VT = N->getValueType(0);
7324 int NumElems = VT.getVectorNumElements();
7325 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7326 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7327 Cond.getOpcode() == ISD::BUILD_VECTOR);
7329 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7330 // binary ones here.
7331 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7334 // We're sure we have an even number of elements due to the
7335 // concat_vectors we have as arguments to vselect.
7336 // Skip BV elements until we find one that's not an UNDEF
7337 // After we find an UNDEF element, keep looping until we get to half the
7338 // length of the BV and see if all the non-undef nodes are the same.
7339 ConstantSDNode *BottomHalf = nullptr;
7340 for (int i = 0; i < NumElems / 2; ++i) {
7341 if (Cond->getOperand(i)->isUndef())
7344 if (BottomHalf == nullptr)
7345 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7346 else if (Cond->getOperand(i).getNode() != BottomHalf)
7350 // Do the same for the second half of the BuildVector
7351 ConstantSDNode *TopHalf = nullptr;
7352 for (int i = NumElems / 2; i < NumElems; ++i) {
7353 if (Cond->getOperand(i)->isUndef())
7356 if (TopHalf == nullptr)
7357 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7358 else if (Cond->getOperand(i).getNode() != TopHalf)
7362 assert(TopHalf && BottomHalf &&
7363 "One half of the selector was all UNDEFs and the other was all the "
7364 "same value. This should have been addressed before this function.");
7366 ISD::CONCAT_VECTORS, DL, VT,
7367 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7368 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7371 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7372 if (Level >= AfterLegalizeTypes)
7375 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7376 SDValue Mask = MSC->getMask();
7377 SDValue Data = MSC->getValue();
7380 // If the MSCATTER data type requires splitting and the mask is provided by a
7381 // SETCC, then split both nodes and its operands before legalization. This
7382 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7383 // and enables future optimizations (e.g. min/max pattern matching on X86).
7384 if (Mask.getOpcode() != ISD::SETCC)
7387 // Check if any splitting is required.
7388 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7389 TargetLowering::TypeSplitVector)
7391 SDValue MaskLo, MaskHi, Lo, Hi;
7392 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7395 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7397 SDValue Chain = MSC->getChain();
7399 EVT MemoryVT = MSC->getMemoryVT();
7400 unsigned Alignment = MSC->getOriginalAlignment();
7402 EVT LoMemVT, HiMemVT;
7403 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7405 SDValue DataLo, DataHi;
7406 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7408 SDValue Scale = MSC->getScale();
7409 SDValue BasePtr = MSC->getBasePtr();
7410 SDValue IndexLo, IndexHi;
7411 std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7413 MachineMemOperand *MMO = DAG.getMachineFunction().
7414 getMachineMemOperand(MSC->getPointerInfo(),
7415 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
7416 Alignment, MSC->getAAInfo(), MSC->getRanges());
7418 SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7419 Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
7422 SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7423 Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7426 AddToWorklist(Lo.getNode());
7427 AddToWorklist(Hi.getNode());
7429 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7432 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7433 if (Level >= AfterLegalizeTypes)
7436 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7437 SDValue Mask = MST->getMask();
7438 SDValue Data = MST->getValue();
7439 EVT VT = Data.getValueType();
7442 // If the MSTORE data type requires splitting and the mask is provided by a
7443 // SETCC, then split both nodes and its operands before legalization. This
7444 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7445 // and enables future optimizations (e.g. min/max pattern matching on X86).
7446 if (Mask.getOpcode() == ISD::SETCC) {
7447 // Check if any splitting is required.
7448 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7449 TargetLowering::TypeSplitVector)
7452 SDValue MaskLo, MaskHi, Lo, Hi;
7453 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7455 SDValue Chain = MST->getChain();
7456 SDValue Ptr = MST->getBasePtr();
7458 EVT MemoryVT = MST->getMemoryVT();
7459 unsigned Alignment = MST->getOriginalAlignment();
7461 // if Alignment is equal to the vector size,
7462 // take the half of it for the second part
7463 unsigned SecondHalfAlignment =
7464 (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7466 EVT LoMemVT, HiMemVT;
7467 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7469 SDValue DataLo, DataHi;
7470 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7472 MachineMemOperand *MMO = DAG.getMachineFunction().
7473 getMachineMemOperand(MST->getPointerInfo(),
7474 MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
7475 Alignment, MST->getAAInfo(), MST->getRanges());
7477 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7478 MST->isTruncatingStore(),
7479 MST->isCompressingStore());
7481 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7482 MST->isCompressingStore());
7483 unsigned HiOffset = LoMemVT.getStoreSize();
7485 MMO = DAG.getMachineFunction().getMachineMemOperand(
7486 MST->getPointerInfo().getWithOffset(HiOffset),
7487 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7488 MST->getAAInfo(), MST->getRanges());
7490 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7491 MST->isTruncatingStore(),
7492 MST->isCompressingStore());
7494 AddToWorklist(Lo.getNode());
7495 AddToWorklist(Hi.getNode());
7497 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7502 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7503 if (Level >= AfterLegalizeTypes)
7506 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7507 SDValue Mask = MGT->getMask();
7510 // If the MGATHER result requires splitting and the mask is provided by a
7511 // SETCC, then split both nodes and its operands before legalization. This
7512 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7513 // and enables future optimizations (e.g. min/max pattern matching on X86).
7515 if (Mask.getOpcode() != ISD::SETCC)
7518 EVT VT = N->getValueType(0);
7520 // Check if any splitting is required.
7521 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7522 TargetLowering::TypeSplitVector)
7525 SDValue MaskLo, MaskHi, Lo, Hi;
7526 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7528 SDValue Src0 = MGT->getValue();
7529 SDValue Src0Lo, Src0Hi;
7530 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
7533 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7535 SDValue Chain = MGT->getChain();
7536 EVT MemoryVT = MGT->getMemoryVT();
7537 unsigned Alignment = MGT->getOriginalAlignment();
7539 EVT LoMemVT, HiMemVT;
7540 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7542 SDValue Scale = MGT->getScale();
7543 SDValue BasePtr = MGT->getBasePtr();
7544 SDValue Index = MGT->getIndex();
7545 SDValue IndexLo, IndexHi;
7546 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7548 MachineMemOperand *MMO = DAG.getMachineFunction().
7549 getMachineMemOperand(MGT->getPointerInfo(),
7550 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
7551 Alignment, MGT->getAAInfo(), MGT->getRanges());
7553 SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale };
7554 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7557 SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale };
7558 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7561 AddToWorklist(Lo.getNode());
7562 AddToWorklist(Hi.getNode());
7564 // Build a factor node to remember that this load is independent of the
7566 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7569 // Legalized the chain result - switch anything that used the old chain to
7571 DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7573 SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7575 SDValue RetOps[] = { GatherRes, Chain };
7576 return DAG.getMergeValues(RetOps, DL);
7579 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7580 if (Level >= AfterLegalizeTypes)
7583 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7584 SDValue Mask = MLD->getMask();
7587 // If the MLOAD result requires splitting and the mask is provided by a
7588 // SETCC, then split both nodes and its operands before legalization. This
7589 // prevents the type legalizer from unrolling SETCC into scalar comparisons
7590 // and enables future optimizations (e.g. min/max pattern matching on X86).
7591 if (Mask.getOpcode() == ISD::SETCC) {
7592 EVT VT = N->getValueType(0);
7594 // Check if any splitting is required.
7595 if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7596 TargetLowering::TypeSplitVector)
7599 SDValue MaskLo, MaskHi, Lo, Hi;
7600 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7602 SDValue Src0 = MLD->getSrc0();
7603 SDValue Src0Lo, Src0Hi;
7604 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
7607 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7609 SDValue Chain = MLD->getChain();
7610 SDValue Ptr = MLD->getBasePtr();
7611 EVT MemoryVT = MLD->getMemoryVT();
7612 unsigned Alignment = MLD->getOriginalAlignment();
7614 // if Alignment is equal to the vector size,
7615 // take the half of it for the second part
7616 unsigned SecondHalfAlignment =
7617 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7618 Alignment/2 : Alignment;
7620 EVT LoMemVT, HiMemVT;
7621 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7623 MachineMemOperand *MMO = DAG.getMachineFunction().
7624 getMachineMemOperand(MLD->getPointerInfo(),
7625 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
7626 Alignment, MLD->getAAInfo(), MLD->getRanges());
7628 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
7629 ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7631 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7632 MLD->isExpandingLoad());
7633 unsigned HiOffset = LoMemVT.getStoreSize();
7635 MMO = DAG.getMachineFunction().getMachineMemOperand(
7636 MLD->getPointerInfo().getWithOffset(HiOffset),
7637 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
7638 MLD->getAAInfo(), MLD->getRanges());
7640 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
7641 ISD::NON_EXTLOAD, MLD->isExpandingLoad());
7643 AddToWorklist(Lo.getNode());
7644 AddToWorklist(Hi.getNode());
7646 // Build a factor node to remember that this load is independent of the
7648 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7651 // Legalized the chain result - switch anything that used the old chain to
7653 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
7655 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7657 SDValue RetOps[] = { LoadRes, Chain };
7658 return DAG.getMergeValues(RetOps, DL);
7663 /// A vector select of 2 constant vectors can be simplified to math/logic to
7664 /// avoid a variable select instruction and possibly avoid constant loads.
7665 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
7666 SDValue Cond = N->getOperand(0);
7667 SDValue N1 = N->getOperand(1);
7668 SDValue N2 = N->getOperand(2);
7669 EVT VT = N->getValueType(0);
7670 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
7671 !TLI.convertSelectOfConstantsToMath(VT) ||
7672 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
7673 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
7676 // Check if we can use the condition value to increment/decrement a single
7677 // constant value. This simplifies a select to an add and removes a constant
7678 // load/materialization from the general case.
7679 bool AllAddOne = true;
7680 bool AllSubOne = true;
7681 unsigned Elts = VT.getVectorNumElements();
7682 for (unsigned i = 0; i != Elts; ++i) {
7683 SDValue N1Elt = N1.getOperand(i);
7684 SDValue N2Elt = N2.getOperand(i);
7685 if (N1Elt.isUndef() || N2Elt.isUndef())
7688 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
7689 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
7696 // Further simplifications for the extra-special cases where the constants are
7697 // all 0 or all -1 should be implemented as folds of these patterns.
7699 if (AllAddOne || AllSubOne) {
7700 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
7701 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
7702 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
7703 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
7704 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
7707 // The general case for select-of-constants:
7708 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
7709 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
7710 // leave that to a machine-specific pass.
7714 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
7715 SDValue N0 = N->getOperand(0);
7716 SDValue N1 = N->getOperand(1);
7717 SDValue N2 = N->getOperand(2);
7720 // fold (vselect C, X, X) -> X
7724 // Canonicalize integer abs.
7725 // vselect (setg[te] X, 0), X, -X ->
7726 // vselect (setgt X, -1), X, -X ->
7727 // vselect (setl[te] X, 0), -X, X ->
7728 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
7729 if (N0.getOpcode() == ISD::SETCC) {
7730 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
7731 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7733 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
7735 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
7736 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
7737 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
7738 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
7739 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
7740 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
7741 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7744 EVT VT = LHS.getValueType();
7745 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
7746 return DAG.getNode(ISD::ABS, DL, VT, LHS);
7748 SDValue Shift = DAG.getNode(
7749 ISD::SRA, DL, VT, LHS,
7750 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
7751 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
7752 AddToWorklist(Shift.getNode());
7753 AddToWorklist(Add.getNode());
7754 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
7757 // If this select has a condition (setcc) with narrower operands than the
7758 // select, try to widen the compare to match the select width.
7759 // TODO: This should be extended to handle any constant.
7760 // TODO: This could be extended to handle non-loading patterns, but that
7761 // requires thorough testing to avoid regressions.
7762 if (isNullConstantOrNullSplatConstant(RHS)) {
7763 EVT NarrowVT = LHS.getValueType();
7764 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
7765 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
7766 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
7767 unsigned WideWidth = WideVT.getScalarSizeInBits();
7768 bool IsSigned = isSignedIntSetCC(CC);
7769 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
7770 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
7771 SetCCWidth != 1 && SetCCWidth < WideWidth &&
7772 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
7773 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
7774 // Both compare operands can be widened for free. The LHS can use an
7775 // extended load, and the RHS is a constant:
7776 // vselect (ext (setcc load(X), C)), N1, N2 -->
7777 // vselect (setcc extload(X), C'), N1, N2
7778 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7779 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
7780 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
7781 EVT WideSetCCVT = getSetCCResultType(WideVT);
7782 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
7783 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
7788 if (SimplifySelectOps(N, N1, N2))
7789 return SDValue(N, 0); // Don't revisit N.
7791 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
7792 if (ISD::isBuildVectorAllOnes(N0.getNode()))
7794 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
7795 if (ISD::isBuildVectorAllZeros(N0.getNode()))
7798 // The ConvertSelectToConcatVector function is assuming both the above
7799 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
7801 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
7802 N2.getOpcode() == ISD::CONCAT_VECTORS &&
7803 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
7804 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
7808 if (SDValue V = foldVSelectOfConstants(N))
7814 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
7815 SDValue N0 = N->getOperand(0);
7816 SDValue N1 = N->getOperand(1);
7817 SDValue N2 = N->getOperand(2);
7818 SDValue N3 = N->getOperand(3);
7819 SDValue N4 = N->getOperand(4);
7820 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
7822 // fold select_cc lhs, rhs, x, x, cc -> x
7826 // Determine if the condition we're dealing with is constant
7827 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
7828 CC, SDLoc(N), false)) {
7829 AddToWorklist(SCC.getNode());
7831 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
7832 if (!SCCC->isNullValue())
7833 return N2; // cond always true -> true val
7835 return N3; // cond always false -> false val
7836 } else if (SCC->isUndef()) {
7837 // When the condition is UNDEF, just return the first operand. This is
7838 // coherent the DAG creation, no setcc node is created in this case
7840 } else if (SCC.getOpcode() == ISD::SETCC) {
7841 // Fold to a simpler select_cc
7842 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
7843 SCC.getOperand(0), SCC.getOperand(1), N2, N3,
7848 // If we can fold this based on the true/false value, do so.
7849 if (SimplifySelectOps(N, N2, N3))
7850 return SDValue(N, 0); // Don't revisit N.
7852 // fold select_cc into other things, such as min/max/abs
7853 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
7856 SDValue DAGCombiner::visitSETCC(SDNode *N) {
7857 // setcc is very commonly used as an argument to brcond. This pattern
7858 // also lend itself to numerous combines and, as a result, it is desired
7859 // we keep the argument to a brcond as a setcc as much as possible.
7861 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
7863 SDValue Combined = SimplifySetCC(
7864 N->getValueType(0), N->getOperand(0), N->getOperand(1),
7865 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
7870 // If we prefer to have a setcc, and we don't, we'll try our best to
7871 // recreate one using rebuildSetCC.
7872 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
7873 SDValue NewSetCC = rebuildSetCC(Combined);
7875 // We don't have anything interesting to combine to.
7876 if (NewSetCC.getNode() == N)
7886 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
7887 SDValue LHS = N->getOperand(0);
7888 SDValue RHS = N->getOperand(1);
7889 SDValue Carry = N->getOperand(2);
7890 SDValue Cond = N->getOperand(3);
7892 // If Carry is false, fold to a regular SETCC.
7893 if (isNullConstant(Carry))
7894 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
7899 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
7900 /// a build_vector of constants.
7901 /// This function is called by the DAGCombiner when visiting sext/zext/aext
7902 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
7903 /// Vector extends are not folded if operations are legal; this is to
7904 /// avoid introducing illegal build_vector dag nodes.
7905 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
7906 SelectionDAG &DAG, bool LegalTypes,
7907 bool LegalOperations) {
7908 unsigned Opcode = N->getOpcode();
7909 SDValue N0 = N->getOperand(0);
7910 EVT VT = N->getValueType(0);
7912 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
7913 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7914 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
7915 && "Expected EXTEND dag node in input!");
7917 // fold (sext c1) -> c1
7918 // fold (zext c1) -> c1
7919 // fold (aext c1) -> c1
7920 if (isa<ConstantSDNode>(N0))
7921 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
7923 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
7924 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
7925 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
7926 EVT SVT = VT.getScalarType();
7927 if (!(VT.isVector() &&
7928 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
7929 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
7932 // We can fold this node into a build_vector.
7933 unsigned VTBits = SVT.getSizeInBits();
7934 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
7935 SmallVector<SDValue, 8> Elts;
7936 unsigned NumElts = VT.getVectorNumElements();
7939 for (unsigned i=0; i != NumElts; ++i) {
7940 SDValue Op = N0->getOperand(i);
7941 if (Op->isUndef()) {
7942 Elts.push_back(DAG.getUNDEF(SVT));
7947 // Get the constant value and if needed trunc it to the size of the type.
7948 // Nodes like build_vector might have constants wider than the scalar type.
7949 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
7950 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
7951 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
7953 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
7956 return DAG.getBuildVector(VT, DL, Elts).getNode();
7959 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
7960 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
7961 // transformation. Returns true if extension are possible and the above
7962 // mentioned transformation is profitable.
7963 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
7965 SmallVectorImpl<SDNode *> &ExtendNodes,
7966 const TargetLowering &TLI) {
7967 bool HasCopyToRegUses = false;
7968 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
7969 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
7970 UE = N0.getNode()->use_end();
7975 if (UI.getUse().getResNo() != N0.getResNo())
7977 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
7978 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
7979 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
7980 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
7981 // Sign bits will be lost after a zext.
7984 for (unsigned i = 0; i != 2; ++i) {
7985 SDValue UseOp = User->getOperand(i);
7988 if (!isa<ConstantSDNode>(UseOp))
7993 ExtendNodes.push_back(User);
7996 // If truncates aren't free and there are users we can't
7997 // extend, it isn't worthwhile.
8000 // Remember if this value is live-out.
8001 if (User->getOpcode() == ISD::CopyToReg)
8002 HasCopyToRegUses = true;
8005 if (HasCopyToRegUses) {
8006 bool BothLiveOut = false;
8007 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8009 SDUse &Use = UI.getUse();
8010 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8016 // Both unextended and extended values are live out. There had better be
8017 // a good reason for the transformation.
8018 return ExtendNodes.size();
8023 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8024 SDValue OrigLoad, SDValue ExtLoad,
8025 ISD::NodeType ExtType) {
8026 // Extend SetCC uses if necessary.
8028 for (SDNode *SetCC : SetCCs) {
8029 SmallVector<SDValue, 4> Ops;
8031 for (unsigned j = 0; j != 2; ++j) {
8032 SDValue SOp = SetCC->getOperand(j);
8033 if (SOp == OrigLoad)
8034 Ops.push_back(ExtLoad);
8036 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8039 Ops.push_back(SetCC->getOperand(2));
8040 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8044 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8045 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8046 SDValue N0 = N->getOperand(0);
8047 EVT DstVT = N->getValueType(0);
8048 EVT SrcVT = N0.getValueType();
8050 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8051 N->getOpcode() == ISD::ZERO_EXTEND) &&
8052 "Unexpected node type (not an extend)!");
8054 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8055 // For example, on a target with legal v4i32, but illegal v8i32, turn:
8056 // (v8i32 (sext (v8i16 (load x))))
8058 // (v8i32 (concat_vectors (v4i32 (sextload x)),
8059 // (v4i32 (sextload (x + 16)))))
8060 // Where uses of the original load, i.e.:
8062 // are replaced with:
8064 // (v8i32 (concat_vectors (v4i32 (sextload x)),
8065 // (v4i32 (sextload (x + 16)))))))
8067 // This combine is only applicable to illegal, but splittable, vectors.
8068 // All legal types, and illegal non-vector types, are handled elsewhere.
8069 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8071 if (N0->getOpcode() != ISD::LOAD)
8074 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8076 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8077 !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8078 !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8081 SmallVector<SDNode *, 4> SetCCs;
8082 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8085 ISD::LoadExtType ExtType =
8086 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8088 // Try to split the vector types to get down to legal types.
8089 EVT SplitSrcVT = SrcVT;
8090 EVT SplitDstVT = DstVT;
8091 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8092 SplitSrcVT.getVectorNumElements() > 1) {
8093 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8094 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8097 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8101 const unsigned NumSplits =
8102 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8103 const unsigned Stride = SplitSrcVT.getStoreSize();
8104 SmallVector<SDValue, 4> Loads;
8105 SmallVector<SDValue, 4> Chains;
8107 SDValue BasePtr = LN0->getBasePtr();
8108 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8109 const unsigned Offset = Idx * Stride;
8110 const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8112 SDValue SplitLoad = DAG.getExtLoad(
8113 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8114 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8115 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8117 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8118 DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8120 Loads.push_back(SplitLoad.getValue(0));
8121 Chains.push_back(SplitLoad.getValue(1));
8124 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8125 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8128 AddToWorklist(NewChain.getNode());
8130 CombineTo(N, NewValue);
8132 // Replace uses of the original load (before extension)
8133 // with a truncate of the concatenated sextloaded vectors.
8135 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8136 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8137 CombineTo(N0.getNode(), Trunc, NewChain);
8138 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8141 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8142 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8143 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8144 assert(N->getOpcode() == ISD::ZERO_EXTEND);
8145 EVT VT = N->getValueType(0);
8148 SDValue N0 = N->getOperand(0);
8149 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8150 N0.getOpcode() == ISD::XOR) ||
8151 N0.getOperand(1).getOpcode() != ISD::Constant ||
8152 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8156 SDValue N1 = N0->getOperand(0);
8157 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8158 N1.getOperand(1).getOpcode() != ISD::Constant ||
8159 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8163 if (!isa<LoadSDNode>(N1.getOperand(0)))
8165 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8166 EVT MemVT = Load->getMemoryVT();
8167 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8168 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8172 // If the shift op is SHL, the logic op must be AND, otherwise the result
8174 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8177 if (!N0.hasOneUse() || !N1.hasOneUse())
8180 SmallVector<SDNode*, 4> SetCCs;
8181 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8182 ISD::ZERO_EXTEND, SetCCs, TLI))
8185 // Actually do the transformation.
8186 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8187 Load->getChain(), Load->getBasePtr(),
8188 Load->getMemoryVT(), Load->getMemOperand());
8191 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8194 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8195 Mask = Mask.zext(VT.getSizeInBits());
8197 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8198 DAG.getConstant(Mask, DL0, VT));
8200 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8202 if (SDValue(Load, 0).hasOneUse()) {
8203 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8205 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8206 Load->getValueType(0), ExtLoad);
8207 CombineTo(Load, Trunc, ExtLoad.getValue(1));
8209 return SDValue(N,0); // Return N so it doesn't get rechecked!
8212 /// If we're narrowing or widening the result of a vector select and the final
8213 /// size is the same size as a setcc (compare) feeding the select, then try to
8214 /// apply the cast operation to the select's operands because matching vector
8215 /// sizes for a select condition and other operands should be more efficient.
8216 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8217 unsigned CastOpcode = Cast->getOpcode();
8218 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8219 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8220 CastOpcode == ISD::FP_ROUND) &&
8221 "Unexpected opcode for vector select narrowing/widening");
8223 // We only do this transform before legal ops because the pattern may be
8224 // obfuscated by target-specific operations after legalization. Do not create
8225 // an illegal select op, however, because that may be difficult to lower.
8226 EVT VT = Cast->getValueType(0);
8227 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8230 SDValue VSel = Cast->getOperand(0);
8231 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8232 VSel.getOperand(0).getOpcode() != ISD::SETCC)
8235 // Does the setcc have the same vector size as the casted select?
8236 SDValue SetCC = VSel.getOperand(0);
8237 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8238 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8241 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8242 SDValue A = VSel.getOperand(1);
8243 SDValue B = VSel.getOperand(2);
8244 SDValue CastA, CastB;
8246 if (CastOpcode == ISD::FP_ROUND) {
8247 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8248 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8249 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8251 CastA = DAG.getNode(CastOpcode, DL, VT, A);
8252 CastB = DAG.getNode(CastOpcode, DL, VT, B);
8254 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8257 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8258 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8259 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8260 const TargetLowering &TLI, EVT VT,
8261 bool LegalOperations, SDNode *N,
8262 SDValue N0, ISD::LoadExtType ExtLoadType) {
8263 SDNode *N0Node = N0.getNode();
8264 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8265 : ISD::isZEXTLoad(N0Node);
8266 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8267 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8270 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8271 EVT MemVT = LN0->getMemoryVT();
8272 if ((LegalOperations || LN0->isVolatile()) &&
8273 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8277 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8278 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8279 Combiner.CombineTo(N, ExtLoad);
8280 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8281 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8284 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8285 // Only generate vector extloads when 1) they're legal, and 2) they are
8286 // deemed desirable by the target.
8287 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8288 const TargetLowering &TLI, EVT VT,
8289 bool LegalOperations, SDNode *N, SDValue N0,
8290 ISD::LoadExtType ExtLoadType,
8291 ISD::NodeType ExtOpc) {
8292 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8293 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8294 ((LegalOperations || VT.isVector() ||
8295 cast<LoadSDNode>(N0)->isVolatile()) &&
8296 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8299 bool DoXform = true;
8300 SmallVector<SDNode *, 4> SetCCs;
8301 if (!N0.hasOneUse())
8302 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8304 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8308 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8309 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8310 LN0->getBasePtr(), N0.getValueType(),
8311 LN0->getMemOperand());
8312 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8313 // If the load value is used only by N, replace it via CombineTo N.
8314 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8315 Combiner.CombineTo(N, ExtLoad);
8316 if (NoReplaceTrunc) {
8317 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8320 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8321 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8323 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8326 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8327 bool LegalOperations) {
8328 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8329 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8331 SDValue SetCC = N->getOperand(0);
8332 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8333 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8336 SDValue X = SetCC.getOperand(0);
8337 SDValue Ones = SetCC.getOperand(1);
8338 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8339 EVT VT = N->getValueType(0);
8340 EVT XVT = X.getValueType();
8341 // setge X, C is canonicalized to setgt, so we do not need to match that
8342 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8343 // not require the 'not' op.
8344 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8345 // Invert and smear/shift the sign bit:
8346 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8347 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8349 SDValue NotX = DAG.getNOT(DL, X, VT);
8350 SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8351 auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8352 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8357 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8358 SDValue N0 = N->getOperand(0);
8359 EVT VT = N->getValueType(0);
8362 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8364 return SDValue(Res, 0);
8366 // fold (sext (sext x)) -> (sext x)
8367 // fold (sext (aext x)) -> (sext x)
8368 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8369 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8371 if (N0.getOpcode() == ISD::TRUNCATE) {
8372 // fold (sext (truncate (load x))) -> (sext (smaller load x))
8373 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8374 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8375 SDNode *oye = N0.getOperand(0).getNode();
8376 if (NarrowLoad.getNode() != N0.getNode()) {
8377 CombineTo(N0.getNode(), NarrowLoad);
8378 // CombineTo deleted the truncate, if needed, but not what's under it.
8381 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8384 // See if the value being truncated is already sign extended. If so, just
8385 // eliminate the trunc/sext pair.
8386 SDValue Op = N0.getOperand(0);
8387 unsigned OpBits = Op.getScalarValueSizeInBits();
8388 unsigned MidBits = N0.getScalarValueSizeInBits();
8389 unsigned DestBits = VT.getScalarSizeInBits();
8390 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8392 if (OpBits == DestBits) {
8393 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
8394 // bits, it is already ready.
8395 if (NumSignBits > DestBits-MidBits)
8397 } else if (OpBits < DestBits) {
8398 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
8399 // bits, just sext from i32.
8400 if (NumSignBits > OpBits-MidBits)
8401 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8403 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
8404 // bits, just truncate to i32.
8405 if (NumSignBits > OpBits-MidBits)
8406 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8409 // fold (sext (truncate x)) -> (sextinreg x).
8410 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8411 N0.getValueType())) {
8412 if (OpBits < DestBits)
8413 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8414 else if (OpBits > DestBits)
8415 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8416 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8417 DAG.getValueType(N0.getValueType()));
8421 // Try to simplify (sext (load x)).
8422 if (SDValue foldedExt =
8423 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8424 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8427 // fold (sext (load x)) to multiple smaller sextloads.
8428 // Only on illegal but splittable vectors.
8429 if (SDValue ExtLoad = CombineExtLoad(N))
8432 // Try to simplify (sext (sextload x)).
8433 if (SDValue foldedExt = tryToFoldExtOfExtload(
8434 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8437 // fold (sext (and/or/xor (load x), cst)) ->
8438 // (and/or/xor (sextload x), (sext cst))
8439 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8440 N0.getOpcode() == ISD::XOR) &&
8441 isa<LoadSDNode>(N0.getOperand(0)) &&
8442 N0.getOperand(1).getOpcode() == ISD::Constant &&
8443 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8444 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8445 EVT MemVT = LN00->getMemoryVT();
8446 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8447 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8448 SmallVector<SDNode*, 4> SetCCs;
8449 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8450 ISD::SIGN_EXTEND, SetCCs, TLI);
8452 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8453 LN00->getChain(), LN00->getBasePtr(),
8454 LN00->getMemoryVT(),
8455 LN00->getMemOperand());
8456 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8457 Mask = Mask.sext(VT.getSizeInBits());
8458 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8459 ExtLoad, DAG.getConstant(Mask, DL, VT));
8460 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8461 bool NoReplaceTruncAnd = !N0.hasOneUse();
8462 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8464 // If N0 has multiple uses, change other uses as well.
8465 if (NoReplaceTruncAnd) {
8467 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8468 CombineTo(N0.getNode(), TruncAnd);
8470 if (NoReplaceTrunc) {
8471 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8473 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8474 LN00->getValueType(0), ExtLoad);
8475 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8477 return SDValue(N,0); // Return N so it doesn't get rechecked!
8482 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8485 if (N0.getOpcode() == ISD::SETCC) {
8486 SDValue N00 = N0.getOperand(0);
8487 SDValue N01 = N0.getOperand(1);
8488 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8489 EVT N00VT = N0.getOperand(0).getValueType();
8491 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8492 // Only do this before legalize for now.
8493 if (VT.isVector() && !LegalOperations &&
8494 TLI.getBooleanContents(N00VT) ==
8495 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8496 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8497 // of the same size as the compared operands. Only optimize sext(setcc())
8498 // if this is the case.
8499 EVT SVT = getSetCCResultType(N00VT);
8501 // We know that the # elements of the results is the same as the
8502 // # elements of the compare (and the # elements of the compare result
8503 // for that matter). Check to see that they are the same size. If so,
8504 // we know that the element size of the sext'd result matches the
8505 // element size of the compare operands.
8506 if (VT.getSizeInBits() == SVT.getSizeInBits())
8507 return DAG.getSetCC(DL, VT, N00, N01, CC);
8509 // If the desired elements are smaller or larger than the source
8510 // elements, we can use a matching integer vector type and then
8511 // truncate/sign extend.
8512 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8513 if (SVT == MatchingVecType) {
8514 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8515 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8519 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8520 // Here, T can be 1 or -1, depending on the type of the setcc and
8521 // getBooleanContents().
8522 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8524 // To determine the "true" side of the select, we need to know the high bit
8525 // of the value returned by the setcc if it evaluates to true.
8526 // If the type of the setcc is i1, then the true case of the select is just
8527 // sext(i1 1), that is, -1.
8528 // If the type of the setcc is larger (say, i8) then the value of the high
8529 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8530 // of the appropriate width.
8531 SDValue ExtTrueVal = (SetCCWidth == 1)
8532 ? DAG.getAllOnesConstant(DL, VT)
8533 : DAG.getBoolConstant(true, DL, VT, N00VT);
8534 SDValue Zero = DAG.getConstant(0, DL, VT);
8536 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8539 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8540 EVT SetCCVT = getSetCCResultType(N00VT);
8541 // Don't do this transform for i1 because there's a select transform
8542 // that would reverse it.
8543 // TODO: We should not do this transform at all without a target hook
8544 // because a sext is likely cheaper than a select?
8545 if (SetCCVT.getScalarSizeInBits() != 1 &&
8546 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8547 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8548 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8553 // fold (sext x) -> (zext x) if the sign bit is known zero.
8554 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8555 DAG.SignBitIsZero(N0))
8556 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8558 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8564 // isTruncateOf - If N is a truncate of some other value, return true, record
8565 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8566 // This function computes KnownBits to avoid a duplicated call to
8567 // computeKnownBits in the caller.
8568 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8570 if (N->getOpcode() == ISD::TRUNCATE) {
8571 Op = N->getOperand(0);
8572 DAG.computeKnownBits(Op, Known);
8576 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
8577 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
8580 SDValue Op0 = N->getOperand(0);
8581 SDValue Op1 = N->getOperand(1);
8582 assert(Op0.getValueType() == Op1.getValueType());
8584 if (isNullConstant(Op0))
8586 else if (isNullConstant(Op1))
8591 DAG.computeKnownBits(Op, Known);
8593 if (!(Known.Zero | 1).isAllOnesValue())
8599 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
8600 SDValue N0 = N->getOperand(0);
8601 EVT VT = N->getValueType(0);
8603 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8605 return SDValue(Res, 0);
8607 // fold (zext (zext x)) -> (zext x)
8608 // fold (zext (aext x)) -> (zext x)
8609 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8610 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
8613 // fold (zext (truncate x)) -> (zext x) or
8614 // (zext (truncate x)) -> (truncate x)
8615 // This is valid when the truncated bits of x are already zero.
8616 // FIXME: We should extend this to work for vectors too.
8619 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, Known)) {
8620 APInt TruncatedBits =
8621 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
8622 APInt(Op.getValueSizeInBits(), 0) :
8623 APInt::getBitsSet(Op.getValueSizeInBits(),
8624 N0.getValueSizeInBits(),
8625 std::min(Op.getValueSizeInBits(),
8626 VT.getSizeInBits()));
8627 if (TruncatedBits.isSubsetOf(Known.Zero))
8628 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8631 // fold (zext (truncate x)) -> (and x, mask)
8632 if (N0.getOpcode() == ISD::TRUNCATE) {
8633 // fold (zext (truncate (load x))) -> (zext (smaller load x))
8634 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
8635 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8636 SDNode *oye = N0.getOperand(0).getNode();
8637 if (NarrowLoad.getNode() != N0.getNode()) {
8638 CombineTo(N0.getNode(), NarrowLoad);
8639 // CombineTo deleted the truncate, if needed, but not what's under it.
8642 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8645 EVT SrcVT = N0.getOperand(0).getValueType();
8646 EVT MinVT = N0.getValueType();
8648 // Try to mask before the extension to avoid having to generate a larger mask,
8649 // possibly over several sub-vectors.
8650 if (SrcVT.bitsLT(VT) && VT.isVector()) {
8651 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
8652 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
8653 SDValue Op = N0.getOperand(0);
8654 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8655 AddToWorklist(Op.getNode());
8656 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
8657 // Transfer the debug info; the new node is equivalent to N0.
8658 DAG.transferDbgValues(N0, ZExtOrTrunc);
8663 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
8664 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8665 AddToWorklist(Op.getNode());
8666 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
8667 // We may safely transfer the debug info describing the truncate node over
8668 // to the equivalent and operation.
8669 DAG.transferDbgValues(N0, And);
8674 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
8675 // if either of the casts is not free.
8676 if (N0.getOpcode() == ISD::AND &&
8677 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8678 N0.getOperand(1).getOpcode() == ISD::Constant &&
8679 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8680 N0.getValueType()) ||
8681 !TLI.isZExtFree(N0.getValueType(), VT))) {
8682 SDValue X = N0.getOperand(0).getOperand(0);
8683 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
8684 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8685 Mask = Mask.zext(VT.getSizeInBits());
8687 return DAG.getNode(ISD::AND, DL, VT,
8688 X, DAG.getConstant(Mask, DL, VT));
8691 // Try to simplify (zext (load x)).
8692 if (SDValue foldedExt =
8693 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8694 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
8697 // fold (zext (load x)) to multiple smaller zextloads.
8698 // Only on illegal but splittable vectors.
8699 if (SDValue ExtLoad = CombineExtLoad(N))
8702 // fold (zext (and/or/xor (load x), cst)) ->
8703 // (and/or/xor (zextload x), (zext cst))
8704 // Unless (and (load x) cst) will match as a zextload already and has
8705 // additional users.
8706 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8707 N0.getOpcode() == ISD::XOR) &&
8708 isa<LoadSDNode>(N0.getOperand(0)) &&
8709 N0.getOperand(1).getOpcode() == ISD::Constant &&
8710 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8711 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8712 EVT MemVT = LN00->getMemoryVT();
8713 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
8714 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
8715 bool DoXform = true;
8716 SmallVector<SDNode*, 4> SetCCs;
8717 if (!N0.hasOneUse()) {
8718 if (N0.getOpcode() == ISD::AND) {
8719 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
8720 EVT LoadResultTy = AndC->getValueType(0);
8722 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
8727 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8728 ISD::ZERO_EXTEND, SetCCs, TLI);
8730 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
8731 LN00->getChain(), LN00->getBasePtr(),
8732 LN00->getMemoryVT(),
8733 LN00->getMemOperand());
8734 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8735 Mask = Mask.zext(VT.getSizeInBits());
8737 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8738 ExtLoad, DAG.getConstant(Mask, DL, VT));
8739 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8740 bool NoReplaceTruncAnd = !N0.hasOneUse();
8741 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8743 // If N0 has multiple uses, change other uses as well.
8744 if (NoReplaceTruncAnd) {
8746 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8747 CombineTo(N0.getNode(), TruncAnd);
8749 if (NoReplaceTrunc) {
8750 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8752 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8753 LN00->getValueType(0), ExtLoad);
8754 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8756 return SDValue(N,0); // Return N so it doesn't get rechecked!
8761 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8762 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8763 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
8766 // Try to simplify (zext (zextload x)).
8767 if (SDValue foldedExt = tryToFoldExtOfExtload(
8768 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
8771 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8774 if (N0.getOpcode() == ISD::SETCC) {
8775 // Only do this before legalize for now.
8776 if (!LegalOperations && VT.isVector() &&
8777 N0.getValueType().getVectorElementType() == MVT::i1) {
8778 EVT N00VT = N0.getOperand(0).getValueType();
8779 if (getSetCCResultType(N00VT) == N0.getValueType())
8782 // We know that the # elements of the results is the same as the #
8783 // elements of the compare (and the # elements of the compare result for
8784 // that matter). Check to see that they are the same size. If so, we know
8785 // that the element size of the sext'd result matches the element size of
8786 // the compare operands.
8788 SDValue VecOnes = DAG.getConstant(1, DL, VT);
8789 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
8790 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
8791 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
8792 N0.getOperand(1), N0.getOperand(2));
8793 return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
8796 // If the desired elements are smaller or larger than the source
8797 // elements we can use a matching integer vector type and then
8798 // truncate/sign extend.
8799 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8801 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
8802 N0.getOperand(1), N0.getOperand(2));
8803 return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
8807 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8809 if (SDValue SCC = SimplifySelectCC(
8810 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8811 DAG.getConstant(0, DL, VT),
8812 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8816 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
8817 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
8818 isa<ConstantSDNode>(N0.getOperand(1)) &&
8819 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
8821 SDValue ShAmt = N0.getOperand(1);
8822 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
8823 if (N0.getOpcode() == ISD::SHL) {
8824 SDValue InnerZExt = N0.getOperand(0);
8825 // If the original shl may be shifting out bits, do not perform this
8827 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
8828 InnerZExt.getOperand(0).getValueSizeInBits();
8829 if (ShAmtVal > KnownZeroBits)
8835 // Ensure that the shift amount is wide enough for the shifted value.
8836 if (VT.getSizeInBits() >= 256)
8837 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
8839 return DAG.getNode(N0.getOpcode(), DL, VT,
8840 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
8844 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8850 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
8851 SDValue N0 = N->getOperand(0);
8852 EVT VT = N->getValueType(0);
8854 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
8856 return SDValue(Res, 0);
8858 // fold (aext (aext x)) -> (aext x)
8859 // fold (aext (zext x)) -> (zext x)
8860 // fold (aext (sext x)) -> (sext x)
8861 if (N0.getOpcode() == ISD::ANY_EXTEND ||
8862 N0.getOpcode() == ISD::ZERO_EXTEND ||
8863 N0.getOpcode() == ISD::SIGN_EXTEND)
8864 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
8866 // fold (aext (truncate (load x))) -> (aext (smaller load x))
8867 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
8868 if (N0.getOpcode() == ISD::TRUNCATE) {
8869 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8870 SDNode *oye = N0.getOperand(0).getNode();
8871 if (NarrowLoad.getNode() != N0.getNode()) {
8872 CombineTo(N0.getNode(), NarrowLoad);
8873 // CombineTo deleted the truncate, if needed, but not what's under it.
8876 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8880 // fold (aext (truncate x))
8881 if (N0.getOpcode() == ISD::TRUNCATE)
8882 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
8884 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
8885 // if the trunc is not free.
8886 if (N0.getOpcode() == ISD::AND &&
8887 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
8888 N0.getOperand(1).getOpcode() == ISD::Constant &&
8889 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
8890 N0.getValueType())) {
8892 SDValue X = N0.getOperand(0).getOperand(0);
8893 X = DAG.getAnyExtOrTrunc(X, DL, VT);
8894 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8895 Mask = Mask.zext(VT.getSizeInBits());
8896 return DAG.getNode(ISD::AND, DL, VT,
8897 X, DAG.getConstant(Mask, DL, VT));
8900 // fold (aext (load x)) -> (aext (truncate (extload x)))
8901 // None of the supported targets knows how to perform load and any_ext
8902 // on vectors in one instruction. We only perform this transformation on
8904 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
8905 ISD::isUNINDEXEDLoad(N0.getNode()) &&
8906 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8907 bool DoXform = true;
8908 SmallVector<SDNode*, 4> SetCCs;
8909 if (!N0.hasOneUse())
8910 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
8913 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8914 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8916 LN0->getBasePtr(), N0.getValueType(),
8917 LN0->getMemOperand());
8918 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
8919 // If the load value is used only by N, replace it via CombineTo N.
8920 bool NoReplaceTrunc = N0.hasOneUse();
8921 CombineTo(N, ExtLoad);
8922 if (NoReplaceTrunc) {
8923 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8925 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
8926 N0.getValueType(), ExtLoad);
8927 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8929 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8933 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
8934 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
8935 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
8936 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
8937 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
8938 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8939 ISD::LoadExtType ExtType = LN0->getExtensionType();
8940 EVT MemVT = LN0->getMemoryVT();
8941 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
8942 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
8943 VT, LN0->getChain(), LN0->getBasePtr(),
8944 MemVT, LN0->getMemOperand());
8945 CombineTo(N, ExtLoad);
8946 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8947 return SDValue(N, 0); // Return N so it doesn't get rechecked!
8951 if (N0.getOpcode() == ISD::SETCC) {
8953 // aext(setcc) -> vsetcc
8954 // aext(setcc) -> truncate(vsetcc)
8955 // aext(setcc) -> aext(vsetcc)
8956 // Only do this before legalize for now.
8957 if (VT.isVector() && !LegalOperations) {
8958 EVT N00VT = N0.getOperand(0).getValueType();
8959 if (getSetCCResultType(N00VT) == N0.getValueType())
8962 // We know that the # elements of the results is the same as the
8963 // # elements of the compare (and the # elements of the compare result
8964 // for that matter). Check to see that they are the same size. If so,
8965 // we know that the element size of the sext'd result matches the
8966 // element size of the compare operands.
8967 if (VT.getSizeInBits() == N00VT.getSizeInBits())
8968 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
8970 cast<CondCodeSDNode>(N0.getOperand(2))->get());
8971 // If the desired elements are smaller or larger than the source
8972 // elements we can use a matching integer vector type and then
8973 // truncate/any extend
8975 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
8977 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
8979 cast<CondCodeSDNode>(N0.getOperand(2))->get());
8980 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
8984 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
8986 if (SDValue SCC = SimplifySelectCC(
8987 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
8988 DAG.getConstant(0, DL, VT),
8989 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
8996 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
8997 unsigned Opcode = N->getOpcode();
8998 SDValue N0 = N->getOperand(0);
8999 SDValue N1 = N->getOperand(1);
9000 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9002 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9003 if (N0.getOpcode() == Opcode &&
9004 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9007 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9008 N0.getOperand(0).getOpcode() == Opcode) {
9009 // We have an assert, truncate, assert sandwich. Make one stronger assert
9010 // by asserting on the smallest asserted type to the larger source type.
9011 // This eliminates the later assert:
9012 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9013 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9014 SDValue BigA = N0.getOperand(0);
9015 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9016 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9017 "Asserting zero/sign-extended bits to a type larger than the "
9018 "truncated destination does not provide information");
9021 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9022 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9023 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9024 BigA.getOperand(0), MinAssertVTVal);
9025 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9031 /// If the result of a wider load is shifted to right of N bits and then
9032 /// truncated to a narrower type and where N is a multiple of number of bits of
9033 /// the narrower type, transform it to a narrower load from address + N / num of
9034 /// bits of new type. Also narrow the load if the result is masked with an AND
9035 /// to effectively produce a smaller type. If the result is to be extended, also
9036 /// fold the extension to form a extending load.
9037 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9038 unsigned Opc = N->getOpcode();
9040 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9041 SDValue N0 = N->getOperand(0);
9042 EVT VT = N->getValueType(0);
9045 // This transformation isn't valid for vector loads.
9049 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9051 if (Opc == ISD::SIGN_EXTEND_INREG) {
9052 ExtType = ISD::SEXTLOAD;
9053 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9054 } else if (Opc == ISD::SRL) {
9055 // Another special-case: SRL is basically zero-extending a narrower value,
9056 // or it maybe shifting a higher subword, half or byte into the lowest
9058 ExtType = ISD::ZEXTLOAD;
9061 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9062 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9066 uint64_t ShiftAmt = N01->getZExtValue();
9067 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9068 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9069 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9071 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9072 VT.getSizeInBits() - ShiftAmt);
9073 } else if (Opc == ISD::AND) {
9074 // An AND with a constant mask is the same as a truncate + zero-extend.
9075 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9076 if (!AndC || !AndC->getAPIntValue().isMask())
9079 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
9080 ExtType = ISD::ZEXTLOAD;
9081 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9085 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9087 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9088 ShAmt = ConstShift->getZExtValue();
9089 unsigned EVTBits = ExtVT.getSizeInBits();
9090 // Is the shift amount a multiple of size of VT?
9091 if ((ShAmt & (EVTBits-1)) == 0) {
9092 N0 = N0.getOperand(0);
9093 // Is the load width a multiple of size of VT?
9094 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9098 // At this point, we must have a load or else we can't do the transform.
9099 if (!isa<LoadSDNode>(N0)) return SDValue();
9101 auto *LN0 = cast<LoadSDNode>(N0);
9103 // Because a SRL must be assumed to *need* to zero-extend the high bits
9104 // (as opposed to anyext the high bits), we can't combine the zextload
9105 // lowering of SRL and an sextload.
9106 if (LN0->getExtensionType() == ISD::SEXTLOAD)
9109 // If the shift amount is larger than the input type then we're not
9110 // accessing any of the loaded bytes. If the load was a zextload/extload
9111 // then the result of the shift+trunc is zero/undef (handled elsewhere).
9112 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9115 // If the SRL is only used by a masking AND, we may be able to adjust
9116 // the ExtVT to make the AND redundant.
9117 SDNode *Mask = *(SRL->use_begin());
9118 if (Mask->getOpcode() == ISD::AND &&
9119 isa<ConstantSDNode>(Mask->getOperand(1))) {
9120 const APInt &ShiftMask =
9121 cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9122 if (ShiftMask.isMask()) {
9123 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9124 ShiftMask.countTrailingOnes());
9125 // If the mask is smaller, recompute the type.
9126 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9127 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9134 // If the load is shifted left (and the result isn't shifted back right),
9135 // we can fold the truncate through the shift.
9136 unsigned ShLeftAmt = 0;
9137 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9138 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9139 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9140 ShLeftAmt = N01->getZExtValue();
9141 N0 = N0.getOperand(0);
9145 // If we haven't found a load, we can't narrow it.
9146 if (!isa<LoadSDNode>(N0))
9149 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9150 if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9153 // For big endian targets, we need to adjust the offset to the pointer to
9154 // load the correct bytes.
9155 if (DAG.getDataLayout().isBigEndian()) {
9156 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9157 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9158 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
9161 EVT PtrType = N0.getOperand(1).getValueType();
9162 uint64_t PtrOff = ShAmt / 8;
9163 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9165 // The original load itself didn't wrap, so an offset within it doesn't.
9167 Flags.setNoUnsignedWrap(true);
9168 SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9169 PtrType, LN0->getBasePtr(),
9170 DAG.getConstant(PtrOff, DL, PtrType),
9172 AddToWorklist(NewPtr.getNode());
9175 if (ExtType == ISD::NON_EXTLOAD)
9176 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9177 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9178 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9180 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9181 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9182 NewAlign, LN0->getMemOperand()->getFlags(),
9185 // Replace the old load's chain with the new load's chain.
9186 WorklistRemover DeadNodes(*this);
9187 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9189 // Shift the result left, if we've swallowed a left shift.
9190 SDValue Result = Load;
9191 if (ShLeftAmt != 0) {
9192 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9193 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9195 // If the shift amount is as large as the result size (but, presumably,
9196 // no larger than the source) then the useful bits of the result are
9197 // zero; we can't simply return the shortened shift, because the result
9198 // of that operation is undefined.
9200 if (ShLeftAmt >= VT.getSizeInBits())
9201 Result = DAG.getConstant(0, DL, VT);
9203 Result = DAG.getNode(ISD::SHL, DL, VT,
9204 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9207 // Return the new loaded value.
9211 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9212 SDValue N0 = N->getOperand(0);
9213 SDValue N1 = N->getOperand(1);
9214 EVT VT = N->getValueType(0);
9215 EVT EVT = cast<VTSDNode>(N1)->getVT();
9216 unsigned VTBits = VT.getScalarSizeInBits();
9217 unsigned EVTBits = EVT.getScalarSizeInBits();
9220 return DAG.getUNDEF(VT);
9222 // fold (sext_in_reg c1) -> c1
9223 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9224 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9226 // If the input is already sign extended, just drop the extension.
9227 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9230 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9231 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9232 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9233 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9234 N0.getOperand(0), N1);
9236 // fold (sext_in_reg (sext x)) -> (sext x)
9237 // fold (sext_in_reg (aext x)) -> (sext x)
9238 // if x is small enough.
9239 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9240 SDValue N00 = N0.getOperand(0);
9241 if (N00.getScalarValueSizeInBits() <= EVTBits &&
9242 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9243 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9246 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9247 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9248 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9249 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9250 N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9251 if (!LegalOperations ||
9252 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9253 return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
9256 // fold (sext_in_reg (zext x)) -> (sext x)
9257 // iff we are extending the source sign bit.
9258 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9259 SDValue N00 = N0.getOperand(0);
9260 if (N00.getScalarValueSizeInBits() == EVTBits &&
9261 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9262 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9265 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9266 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9267 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9269 // fold operands of sext_in_reg based on knowledge that the top bits are not
9271 if (SimplifyDemandedBits(SDValue(N, 0)))
9272 return SDValue(N, 0);
9274 // fold (sext_in_reg (load x)) -> (smaller sextload x)
9275 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9276 if (SDValue NarrowLoad = ReduceLoadWidth(N))
9279 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9280 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9281 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9282 if (N0.getOpcode() == ISD::SRL) {
9283 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9284 if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9285 // We can turn this into an SRA iff the input to the SRL is already sign
9287 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9288 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9289 return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9290 N0.getOperand(0), N0.getOperand(1));
9294 // fold (sext_inreg (extload x)) -> (sextload x)
9295 // If sextload is not supported by target, we can only do the combine when
9296 // load has one use. Doing otherwise can block folding the extload with other
9297 // extends that the target does support.
9298 if (ISD::isEXTLoad(N0.getNode()) &&
9299 ISD::isUNINDEXEDLoad(N0.getNode()) &&
9300 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9301 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9303 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9304 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9305 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9307 LN0->getBasePtr(), EVT,
9308 LN0->getMemOperand());
9309 CombineTo(N, ExtLoad);
9310 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9311 AddToWorklist(ExtLoad.getNode());
9312 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9314 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9315 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9317 EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9318 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9319 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9320 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9321 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9323 LN0->getBasePtr(), EVT,
9324 LN0->getMemOperand());
9325 CombineTo(N, ExtLoad);
9326 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9327 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9330 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9331 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9332 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9333 N0.getOperand(1), false))
9334 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9341 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9342 SDValue N0 = N->getOperand(0);
9343 EVT VT = N->getValueType(0);
9346 return DAG.getUNDEF(VT);
9348 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9350 return SDValue(Res, 0);
9355 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9356 SDValue N0 = N->getOperand(0);
9357 EVT VT = N->getValueType(0);
9360 return DAG.getUNDEF(VT);
9362 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
9364 return SDValue(Res, 0);
9369 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9370 SDValue N0 = N->getOperand(0);
9371 EVT VT = N->getValueType(0);
9372 bool isLE = DAG.getDataLayout().isLittleEndian();
9375 if (N0.getValueType() == N->getValueType(0))
9378 // fold (truncate (truncate x)) -> (truncate x)
9379 if (N0.getOpcode() == ISD::TRUNCATE)
9380 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9382 // fold (truncate c1) -> c1
9383 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9384 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9385 if (C.getNode() != N)
9389 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9390 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9391 N0.getOpcode() == ISD::SIGN_EXTEND ||
9392 N0.getOpcode() == ISD::ANY_EXTEND) {
9393 // if the source is smaller than the dest, we still need an extend.
9394 if (N0.getOperand(0).getValueType().bitsLT(VT))
9395 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9396 // if the source is larger than the dest, than we just need the truncate.
9397 if (N0.getOperand(0).getValueType().bitsGT(VT))
9398 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9399 // if the source and dest are the same type, we can drop both the extend
9400 // and the truncate.
9401 return N0.getOperand(0);
9404 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9405 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9408 // Fold extract-and-trunc into a narrow extract. For example:
9409 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9410 // i32 y = TRUNCATE(i64 x)
9412 // v16i8 b = BITCAST (v2i64 val)
9413 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9415 // Note: We only run this optimization after type legalization (which often
9416 // creates this pattern) and before operation legalization after which
9417 // we need to be more careful about the vector instructions that we generate.
9418 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9419 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9420 EVT VecTy = N0.getOperand(0).getValueType();
9421 EVT ExTy = N0.getValueType();
9422 EVT TrTy = N->getValueType(0);
9424 unsigned NumElem = VecTy.getVectorNumElements();
9425 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9427 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9428 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9430 SDValue EltNo = N0->getOperand(1);
9431 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9432 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9433 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9434 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9437 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9438 DAG.getBitcast(NVT, N0.getOperand(0)),
9439 DAG.getConstant(Index, DL, IndexTy));
9443 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9444 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9445 EVT SrcVT = N0.getValueType();
9446 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9447 TLI.isTruncateFree(SrcVT, VT)) {
9449 SDValue Cond = N0.getOperand(0);
9450 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9451 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9452 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9456 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9457 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9458 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9459 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9460 SDValue Amt = N0.getOperand(1);
9462 DAG.computeKnownBits(Amt, Known);
9463 unsigned Size = VT.getScalarSizeInBits();
9464 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9466 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9468 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9469 if (AmtVT != Amt.getValueType()) {
9470 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9471 AddToWorklist(Amt.getNode());
9473 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9477 // Fold a series of buildvector, bitcast, and truncate if possible.
9479 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9480 // (2xi32 (buildvector x, y)).
9481 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9482 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9483 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9484 N0.getOperand(0).hasOneUse()) {
9485 SDValue BuildVect = N0.getOperand(0);
9486 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9487 EVT TruncVecEltTy = VT.getVectorElementType();
9489 // Check that the element types match.
9490 if (BuildVectEltTy == TruncVecEltTy) {
9491 // Now we only need to compute the offset of the truncated elements.
9492 unsigned BuildVecNumElts = BuildVect.getNumOperands();
9493 unsigned TruncVecNumElts = VT.getVectorNumElements();
9494 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9496 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9497 "Invalid number of elements");
9499 SmallVector<SDValue, 8> Opnds;
9500 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9501 Opnds.push_back(BuildVect.getOperand(i));
9503 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9507 // See if we can simplify the input to this truncate through knowledge that
9508 // only the low bits are being used.
9509 // For example "trunc (or (shl x, 8), y)" // -> trunc y
9510 // Currently we only perform this optimization on scalars because vectors
9511 // may have different active low bits.
9512 if (!VT.isVector()) {
9514 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9515 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9516 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9519 // fold (truncate (load x)) -> (smaller load x)
9520 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9521 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9522 if (SDValue Reduced = ReduceLoadWidth(N))
9525 // Handle the case where the load remains an extending load even
9526 // after truncation.
9527 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9528 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9529 if (!LN0->isVolatile() &&
9530 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9531 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9532 VT, LN0->getChain(), LN0->getBasePtr(),
9534 LN0->getMemOperand());
9535 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9541 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
9542 // where ... are all 'undef'.
9543 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
9544 SmallVector<EVT, 8> VTs;
9547 unsigned NumDefs = 0;
9549 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
9550 SDValue X = N0.getOperand(i);
9556 // Stop if more than one members are non-undef.
9559 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
9560 VT.getVectorElementType(),
9561 X.getValueType().getVectorNumElements()));
9565 return DAG.getUNDEF(VT);
9568 assert(V.getNode() && "The single defined operand is empty!");
9569 SmallVector<SDValue, 8> Opnds;
9570 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
9572 Opnds.push_back(DAG.getUNDEF(VTs[i]));
9575 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
9576 AddToWorklist(NV.getNode());
9577 Opnds.push_back(NV);
9579 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
9583 // Fold truncate of a bitcast of a vector to an extract of the low vector
9586 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
9587 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
9588 SDValue VecSrc = N0.getOperand(0);
9589 EVT SrcVT = VecSrc.getValueType();
9590 if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
9591 (!LegalOperations ||
9592 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
9595 EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
9596 unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
9597 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
9598 VecSrc, DAG.getConstant(Idx, SL, IdxVT));
9602 // Simplify the operands using demanded-bits information.
9603 if (!VT.isVector() &&
9604 SimplifyDemandedBits(SDValue(N, 0)))
9605 return SDValue(N, 0);
9607 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
9608 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
9609 // When the adde's carry is not used.
9610 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
9611 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
9612 (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
9614 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9615 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9616 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
9617 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
9620 // fold (truncate (extract_subvector(ext x))) ->
9621 // (extract_subvector x)
9622 // TODO: This can be generalized to cover cases where the truncate and extract
9623 // do not fully cancel each other out.
9624 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9625 SDValue N00 = N0.getOperand(0);
9626 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
9627 N00.getOpcode() == ISD::ZERO_EXTEND ||
9628 N00.getOpcode() == ISD::ANY_EXTEND) {
9629 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
9630 VT.getVectorElementType())
9631 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
9632 N00.getOperand(0), N0.getOperand(1));
9636 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9642 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
9643 SDValue Elt = N->getOperand(i);
9644 if (Elt.getOpcode() != ISD::MERGE_VALUES)
9645 return Elt.getNode();
9646 return Elt.getOperand(Elt.getResNo()).getNode();
9649 /// build_pair (load, load) -> load
9650 /// if load locations are consecutive.
9651 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
9652 assert(N->getOpcode() == ISD::BUILD_PAIR);
9654 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
9655 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
9657 // A BUILD_PAIR is always having the least significant part in elt 0 and the
9658 // most significant part in elt 1. So when combining into one large load, we
9659 // need to consider the endianness.
9660 if (DAG.getDataLayout().isBigEndian())
9661 std::swap(LD1, LD2);
9663 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
9664 LD1->getAddressSpace() != LD2->getAddressSpace())
9666 EVT LD1VT = LD1->getValueType(0);
9667 unsigned LD1Bytes = LD1VT.getStoreSize();
9668 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
9669 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
9670 unsigned Align = LD1->getAlignment();
9671 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
9672 VT.getTypeForEVT(*DAG.getContext()));
9674 if (NewAlign <= Align &&
9675 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
9676 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
9677 LD1->getPointerInfo(), Align);
9683 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
9684 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
9685 // and Lo parts; on big-endian machines it doesn't.
9686 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
9689 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
9690 const TargetLowering &TLI) {
9691 // If this is not a bitcast to an FP type or if the target doesn't have
9692 // IEEE754-compliant FP logic, we're done.
9693 EVT VT = N->getValueType(0);
9694 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
9697 // TODO: Use splat values for the constant-checking below and remove this
9699 SDValue N0 = N->getOperand(0);
9700 EVT SourceVT = N0.getValueType();
9701 if (SourceVT.isVector())
9706 switch (N0.getOpcode()) {
9708 FPOpcode = ISD::FABS;
9709 SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits());
9712 FPOpcode = ISD::FNEG;
9713 SignMask = APInt::getSignMask(SourceVT.getSizeInBits());
9715 // TODO: ISD::OR --> ISD::FNABS?
9720 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
9721 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
9722 SDValue LogicOp0 = N0.getOperand(0);
9723 ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9724 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
9725 LogicOp0.getOpcode() == ISD::BITCAST &&
9726 LogicOp0->getOperand(0).getValueType() == VT)
9727 return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
9732 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
9733 SDValue N0 = N->getOperand(0);
9734 EVT VT = N->getValueType(0);
9737 return DAG.getUNDEF(VT);
9739 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
9740 // Only do this before legalize, since afterward the target may be depending
9741 // on the bitconvert.
9742 // First check to see if this is all constant.
9744 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
9746 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
9748 EVT DestEltVT = N->getValueType(0).getVectorElementType();
9749 assert(!DestEltVT.isVector() &&
9750 "Element type of vector ValueType must not be vector!");
9752 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
9755 // If the input is a constant, let getNode fold it.
9756 // We always need to check that this is just a fp -> int or int -> conversion
9757 // otherwise we will get back N which will confuse the caller into thinking
9758 // we used CombineTo. This can block target combines from running. If we can't
9759 // allowed legal operations, we need to ensure the resulting operation will be
9761 // TODO: Maybe we should check that the return value isn't N explicitly?
9762 if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
9763 (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) ||
9764 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
9765 (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT))))
9766 return DAG.getBitcast(VT, N0);
9768 // (conv (conv x, t1), t2) -> (conv x, t2)
9769 if (N0.getOpcode() == ISD::BITCAST)
9770 return DAG.getBitcast(VT, N0.getOperand(0));
9772 // fold (conv (load x)) -> (load (conv*)x)
9773 // If the resultant load doesn't need a higher alignment than the original!
9774 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9775 // Do not change the width of a volatile load.
9776 !cast<LoadSDNode>(N0)->isVolatile() &&
9777 // Do not remove the cast if the types differ in endian layout.
9778 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
9779 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
9780 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
9781 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
9782 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9783 unsigned OrigAlign = LN0->getAlignment();
9786 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9787 LN0->getAddressSpace(), OrigAlign, &Fast) &&
9790 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
9791 LN0->getPointerInfo(), OrigAlign,
9792 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9793 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9798 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
9801 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
9802 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
9805 // fold (bitcast (fneg x)) ->
9806 // flipbit = signbit
9807 // (xor (bitcast x) (build_pair flipbit, flipbit))
9809 // fold (bitcast (fabs x)) ->
9810 // flipbit = (and (extract_element (bitcast x), 0), signbit)
9811 // (xor (bitcast x) (build_pair flipbit, flipbit))
9812 // This often reduces constant pool loads.
9813 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
9814 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
9815 N0.getNode()->hasOneUse() && VT.isInteger() &&
9816 !VT.isVector() && !N0.getValueType().isVector()) {
9817 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
9818 AddToWorklist(NewConv.getNode());
9821 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9822 assert(VT.getSizeInBits() == 128);
9823 SDValue SignBit = DAG.getConstant(
9824 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
9826 if (N0.getOpcode() == ISD::FNEG) {
9828 AddToWorklist(FlipBit.getNode());
9830 assert(N0.getOpcode() == ISD::FABS);
9832 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
9833 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9835 AddToWorklist(Hi.getNode());
9836 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
9837 AddToWorklist(FlipBit.getNode());
9840 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9841 AddToWorklist(FlipBits.getNode());
9842 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
9844 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9845 if (N0.getOpcode() == ISD::FNEG)
9846 return DAG.getNode(ISD::XOR, DL, VT,
9847 NewConv, DAG.getConstant(SignBit, DL, VT));
9848 assert(N0.getOpcode() == ISD::FABS);
9849 return DAG.getNode(ISD::AND, DL, VT,
9850 NewConv, DAG.getConstant(~SignBit, DL, VT));
9853 // fold (bitconvert (fcopysign cst, x)) ->
9854 // (or (and (bitconvert x), sign), (and cst, (not sign)))
9855 // Note that we don't handle (copysign x, cst) because this can always be
9856 // folded to an fneg or fabs.
9859 // fold (bitcast (fcopysign cst, x)) ->
9860 // flipbit = (and (extract_element
9861 // (xor (bitcast cst), (bitcast x)), 0),
9863 // (xor (bitcast cst) (build_pair flipbit, flipbit))
9864 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
9865 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
9866 VT.isInteger() && !VT.isVector()) {
9867 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
9868 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
9869 if (isTypeLegal(IntXVT)) {
9870 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
9871 AddToWorklist(X.getNode());
9873 // If X has a different width than the result/lhs, sext it or truncate it.
9874 unsigned VTWidth = VT.getSizeInBits();
9875 if (OrigXWidth < VTWidth) {
9876 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
9877 AddToWorklist(X.getNode());
9878 } else if (OrigXWidth > VTWidth) {
9879 // To get the sign bit in the right place, we have to shift it right
9880 // before truncating.
9882 X = DAG.getNode(ISD::SRL, DL,
9883 X.getValueType(), X,
9884 DAG.getConstant(OrigXWidth-VTWidth, DL,
9886 AddToWorklist(X.getNode());
9887 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
9888 AddToWorklist(X.getNode());
9891 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
9892 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
9893 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9894 AddToWorklist(Cst.getNode());
9895 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
9896 AddToWorklist(X.getNode());
9897 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
9898 AddToWorklist(XorResult.getNode());
9899 SDValue XorResult64 = DAG.getNode(
9900 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
9901 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
9903 AddToWorklist(XorResult64.getNode());
9905 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
9906 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
9907 AddToWorklist(FlipBit.getNode());
9909 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
9910 AddToWorklist(FlipBits.getNode());
9911 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
9913 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
9914 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
9915 X, DAG.getConstant(SignBit, SDLoc(X), VT));
9916 AddToWorklist(X.getNode());
9918 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
9919 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
9920 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
9921 AddToWorklist(Cst.getNode());
9923 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
9927 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
9928 if (N0.getOpcode() == ISD::BUILD_PAIR)
9929 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
9932 // Remove double bitcasts from shuffles - this is often a legacy of
9933 // XformToShuffleWithZero being used to combine bitmaskings (of
9934 // float vectors bitcast to integer vectors) into shuffles.
9935 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
9936 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
9937 N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
9938 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
9939 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
9940 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
9942 // If operands are a bitcast, peek through if it casts the original VT.
9943 // If operands are a constant, just bitcast back to original VT.
9944 auto PeekThroughBitcast = [&](SDValue Op) {
9945 if (Op.getOpcode() == ISD::BITCAST &&
9946 Op.getOperand(0).getValueType() == VT)
9947 return SDValue(Op.getOperand(0));
9948 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
9949 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
9950 return DAG.getBitcast(VT, Op);
9954 // FIXME: If either input vector is bitcast, try to convert the shuffle to
9955 // the result type of this bitcast. This would eliminate at least one
9956 // bitcast. See the transform in InstCombine.
9957 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
9958 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
9963 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
9964 SmallVector<int, 8> NewMask;
9965 for (int M : SVN->getMask())
9966 for (int i = 0; i != MaskScale; ++i)
9967 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
9969 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9971 std::swap(SV0, SV1);
9972 ShuffleVectorSDNode::commuteMask(NewMask);
9973 LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
9977 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
9983 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
9984 EVT VT = N->getValueType(0);
9985 return CombineConsecutiveLoads(N, VT);
9988 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
9989 /// operands. DstEltVT indicates the destination element value type.
9990 SDValue DAGCombiner::
9991 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
9992 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
9994 // If this is already the right type, we're done.
9995 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
9997 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
9998 unsigned DstBitSize = DstEltVT.getSizeInBits();
10000 // If this is a conversion of N elements of one type to N elements of another
10001 // type, convert each element. This handles FP<->INT cases.
10002 if (SrcBitSize == DstBitSize) {
10003 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10004 BV->getValueType(0).getVectorNumElements());
10006 // Due to the FP element handling below calling this routine recursively,
10007 // we can end up with a scalar-to-vector node here.
10008 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
10009 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
10010 DAG.getBitcast(DstEltVT, BV->getOperand(0)));
10012 SmallVector<SDValue, 8> Ops;
10013 for (SDValue Op : BV->op_values()) {
10014 // If the vector element type is not legal, the BUILD_VECTOR operands
10015 // are promoted and implicitly truncated. Make that explicit here.
10016 if (Op.getValueType() != SrcEltVT)
10017 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10018 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10019 AddToWorklist(Ops.back().getNode());
10021 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10024 // Otherwise, we're growing or shrinking the elements. To avoid having to
10025 // handle annoying details of growing/shrinking FP values, we convert them to
10027 if (SrcEltVT.isFloatingPoint()) {
10028 // Convert the input float vector to a int vector where the elements are the
10030 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10031 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10035 // Now we know the input is an integer vector. If the output is a FP type,
10036 // convert to integer first, then to FP of the right size.
10037 if (DstEltVT.isFloatingPoint()) {
10038 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10039 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10041 // Next, convert to FP elements of the same size.
10042 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10047 // Okay, we know the src/dst types are both integers of differing types.
10048 // Handling growing first.
10049 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10050 if (SrcBitSize < DstBitSize) {
10051 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10053 SmallVector<SDValue, 8> Ops;
10054 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10055 i += NumInputsPerOutput) {
10056 bool isLE = DAG.getDataLayout().isLittleEndian();
10057 APInt NewBits = APInt(DstBitSize, 0);
10058 bool EltIsUndef = true;
10059 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10060 // Shift the previously computed bits over.
10061 NewBits <<= SrcBitSize;
10062 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10063 if (Op.isUndef()) continue;
10064 EltIsUndef = false;
10066 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10067 zextOrTrunc(SrcBitSize).zext(DstBitSize);
10071 Ops.push_back(DAG.getUNDEF(DstEltVT));
10073 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10076 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10077 return DAG.getBuildVector(VT, DL, Ops);
10080 // Finally, this must be the case where we are shrinking elements: each input
10081 // turns into multiple outputs.
10082 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10083 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10084 NumOutputsPerInput*BV->getNumOperands());
10085 SmallVector<SDValue, 8> Ops;
10087 for (const SDValue &Op : BV->op_values()) {
10088 if (Op.isUndef()) {
10089 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10093 APInt OpVal = cast<ConstantSDNode>(Op)->
10094 getAPIntValue().zextOrTrunc(SrcBitSize);
10096 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10097 APInt ThisVal = OpVal.trunc(DstBitSize);
10098 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10099 OpVal.lshrInPlace(DstBitSize);
10102 // For big endian targets, swap the order of the pieces of each element.
10103 if (DAG.getDataLayout().isBigEndian())
10104 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10107 return DAG.getBuildVector(VT, DL, Ops);
10110 static bool isContractable(SDNode *N) {
10111 SDNodeFlags F = N->getFlags();
10112 return F.hasAllowContract() || F.hasAllowReassociation();
10115 /// Try to perform FMA combining on a given FADD node.
10116 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10117 SDValue N0 = N->getOperand(0);
10118 SDValue N1 = N->getOperand(1);
10119 EVT VT = N->getValueType(0);
10122 const TargetOptions &Options = DAG.getTarget().Options;
10124 // Floating-point multiply-add with intermediate rounding.
10125 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10127 // Floating-point multiply-add without intermediate rounding.
10129 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10130 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10132 // No valid opcode, do not combine.
10133 if (!HasFMAD && !HasFMA)
10136 SDNodeFlags Flags = N->getFlags();
10137 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10138 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10139 CanFuse || HasFMAD);
10140 // If the addition is not contractable, do not combine.
10141 if (!AllowFusionGlobally && !isContractable(N))
10144 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10145 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10148 // Always prefer FMAD to FMA for precision.
10149 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10150 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10152 // Is the node an FMUL and contractable either due to global flags or
10154 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10155 if (N.getOpcode() != ISD::FMUL)
10157 return AllowFusionGlobally || isContractable(N.getNode());
10159 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10160 // prefer to fold the multiply with fewer uses.
10161 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10162 if (N0.getNode()->use_size() > N1.getNode()->use_size())
10166 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10167 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10168 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10169 N0.getOperand(0), N0.getOperand(1), N1, Flags);
10172 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10173 // Note: Commutes FADD operands.
10174 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10175 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10176 N1.getOperand(0), N1.getOperand(1), N0, Flags);
10179 // Look through FP_EXTEND nodes to do more combining.
10181 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10182 if (N0.getOpcode() == ISD::FP_EXTEND) {
10183 SDValue N00 = N0.getOperand(0);
10184 if (isContractableFMUL(N00) &&
10185 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10186 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10187 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10188 N00.getOperand(0)),
10189 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10190 N00.getOperand(1)), N1, Flags);
10194 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10195 // Note: Commutes FADD operands.
10196 if (N1.getOpcode() == ISD::FP_EXTEND) {
10197 SDValue N10 = N1.getOperand(0);
10198 if (isContractableFMUL(N10) &&
10199 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10200 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10201 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10202 N10.getOperand(0)),
10203 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10204 N10.getOperand(1)), N0, Flags);
10208 // More folding opportunities when target permits.
10210 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10212 N0.getOpcode() == PreferredFusedOpcode &&
10213 N0.getOperand(2).getOpcode() == ISD::FMUL &&
10214 N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10215 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10216 N0.getOperand(0), N0.getOperand(1),
10217 DAG.getNode(PreferredFusedOpcode, SL, VT,
10218 N0.getOperand(2).getOperand(0),
10219 N0.getOperand(2).getOperand(1),
10220 N1, Flags), Flags);
10223 // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10225 N1->getOpcode() == PreferredFusedOpcode &&
10226 N1.getOperand(2).getOpcode() == ISD::FMUL &&
10227 N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10228 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10229 N1.getOperand(0), N1.getOperand(1),
10230 DAG.getNode(PreferredFusedOpcode, SL, VT,
10231 N1.getOperand(2).getOperand(0),
10232 N1.getOperand(2).getOperand(1),
10233 N0, Flags), Flags);
10237 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10238 // -> (fma x, y, (fma (fpext u), (fpext v), z))
10239 auto FoldFAddFMAFPExtFMul = [&] (
10240 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10241 SDNodeFlags Flags) {
10242 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10243 DAG.getNode(PreferredFusedOpcode, SL, VT,
10244 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10245 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10248 if (N0.getOpcode() == PreferredFusedOpcode) {
10249 SDValue N02 = N0.getOperand(2);
10250 if (N02.getOpcode() == ISD::FP_EXTEND) {
10251 SDValue N020 = N02.getOperand(0);
10252 if (isContractableFMUL(N020) &&
10253 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10254 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10255 N020.getOperand(0), N020.getOperand(1),
10261 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10262 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10263 // FIXME: This turns two single-precision and one double-precision
10264 // operation into two double-precision operations, which might not be
10265 // interesting for all targets, especially GPUs.
10266 auto FoldFAddFPExtFMAFMul = [&] (
10267 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10268 SDNodeFlags Flags) {
10269 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10270 DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10271 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10272 DAG.getNode(PreferredFusedOpcode, SL, VT,
10273 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10274 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10277 if (N0.getOpcode() == ISD::FP_EXTEND) {
10278 SDValue N00 = N0.getOperand(0);
10279 if (N00.getOpcode() == PreferredFusedOpcode) {
10280 SDValue N002 = N00.getOperand(2);
10281 if (isContractableFMUL(N002) &&
10282 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10283 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10284 N002.getOperand(0), N002.getOperand(1),
10290 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10291 // -> (fma y, z, (fma (fpext u), (fpext v), x))
10292 if (N1.getOpcode() == PreferredFusedOpcode) {
10293 SDValue N12 = N1.getOperand(2);
10294 if (N12.getOpcode() == ISD::FP_EXTEND) {
10295 SDValue N120 = N12.getOperand(0);
10296 if (isContractableFMUL(N120) &&
10297 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10298 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10299 N120.getOperand(0), N120.getOperand(1),
10305 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10306 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10307 // FIXME: This turns two single-precision and one double-precision
10308 // operation into two double-precision operations, which might not be
10309 // interesting for all targets, especially GPUs.
10310 if (N1.getOpcode() == ISD::FP_EXTEND) {
10311 SDValue N10 = N1.getOperand(0);
10312 if (N10.getOpcode() == PreferredFusedOpcode) {
10313 SDValue N102 = N10.getOperand(2);
10314 if (isContractableFMUL(N102) &&
10315 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10316 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10317 N102.getOperand(0), N102.getOperand(1),
10327 /// Try to perform FMA combining on a given FSUB node.
10328 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10329 SDValue N0 = N->getOperand(0);
10330 SDValue N1 = N->getOperand(1);
10331 EVT VT = N->getValueType(0);
10334 const TargetOptions &Options = DAG.getTarget().Options;
10335 // Floating-point multiply-add with intermediate rounding.
10336 bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10338 // Floating-point multiply-add without intermediate rounding.
10340 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10341 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10343 // No valid opcode, do not combine.
10344 if (!HasFMAD && !HasFMA)
10347 const SDNodeFlags Flags = N->getFlags();
10348 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10349 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10350 CanFuse || HasFMAD);
10352 // If the subtraction is not contractable, do not combine.
10353 if (!AllowFusionGlobally && !isContractable(N))
10356 const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10357 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10360 // Always prefer FMAD to FMA for precision.
10361 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10362 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10364 // Is the node an FMUL and contractable either due to global flags or
10366 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10367 if (N.getOpcode() != ISD::FMUL)
10369 return AllowFusionGlobally || isContractable(N.getNode());
10372 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10373 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10374 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10375 N0.getOperand(0), N0.getOperand(1),
10376 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10379 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10380 // Note: Commutes FSUB operands.
10381 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10382 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10383 DAG.getNode(ISD::FNEG, SL, VT,
10385 N1.getOperand(1), N0, Flags);
10388 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10389 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10390 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10391 SDValue N00 = N0.getOperand(0).getOperand(0);
10392 SDValue N01 = N0.getOperand(0).getOperand(1);
10393 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10394 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10395 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10398 // Look through FP_EXTEND nodes to do more combining.
10400 // fold (fsub (fpext (fmul x, y)), z)
10401 // -> (fma (fpext x), (fpext y), (fneg z))
10402 if (N0.getOpcode() == ISD::FP_EXTEND) {
10403 SDValue N00 = N0.getOperand(0);
10404 if (isContractableFMUL(N00) &&
10405 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10406 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10407 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10408 N00.getOperand(0)),
10409 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10410 N00.getOperand(1)),
10411 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10415 // fold (fsub x, (fpext (fmul y, z)))
10416 // -> (fma (fneg (fpext y)), (fpext z), x)
10417 // Note: Commutes FSUB operands.
10418 if (N1.getOpcode() == ISD::FP_EXTEND) {
10419 SDValue N10 = N1.getOperand(0);
10420 if (isContractableFMUL(N10) &&
10421 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10422 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10423 DAG.getNode(ISD::FNEG, SL, VT,
10424 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10425 N10.getOperand(0))),
10426 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10427 N10.getOperand(1)),
10432 // fold (fsub (fpext (fneg (fmul, x, y))), z)
10433 // -> (fneg (fma (fpext x), (fpext y), z))
10434 // Note: This could be removed with appropriate canonicalization of the
10435 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10436 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10437 // from implementing the canonicalization in visitFSUB.
10438 if (N0.getOpcode() == ISD::FP_EXTEND) {
10439 SDValue N00 = N0.getOperand(0);
10440 if (N00.getOpcode() == ISD::FNEG) {
10441 SDValue N000 = N00.getOperand(0);
10442 if (isContractableFMUL(N000) &&
10443 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10444 return DAG.getNode(ISD::FNEG, SL, VT,
10445 DAG.getNode(PreferredFusedOpcode, SL, VT,
10446 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10447 N000.getOperand(0)),
10448 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10449 N000.getOperand(1)),
10455 // fold (fsub (fneg (fpext (fmul, x, y))), z)
10456 // -> (fneg (fma (fpext x)), (fpext y), z)
10457 // Note: This could be removed with appropriate canonicalization of the
10458 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10459 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10460 // from implementing the canonicalization in visitFSUB.
10461 if (N0.getOpcode() == ISD::FNEG) {
10462 SDValue N00 = N0.getOperand(0);
10463 if (N00.getOpcode() == ISD::FP_EXTEND) {
10464 SDValue N000 = N00.getOperand(0);
10465 if (isContractableFMUL(N000) &&
10466 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10467 return DAG.getNode(ISD::FNEG, SL, VT,
10468 DAG.getNode(PreferredFusedOpcode, SL, VT,
10469 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10470 N000.getOperand(0)),
10471 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10472 N000.getOperand(1)),
10478 // More folding opportunities when target permits.
10480 // fold (fsub (fma x, y, (fmul u, v)), z)
10481 // -> (fma x, y (fma u, v, (fneg z)))
10482 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10483 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10484 N0.getOperand(2)->hasOneUse()) {
10485 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10486 N0.getOperand(0), N0.getOperand(1),
10487 DAG.getNode(PreferredFusedOpcode, SL, VT,
10488 N0.getOperand(2).getOperand(0),
10489 N0.getOperand(2).getOperand(1),
10490 DAG.getNode(ISD::FNEG, SL, VT,
10491 N1), Flags), Flags);
10494 // fold (fsub x, (fma y, z, (fmul u, v)))
10495 // -> (fma (fneg y), z, (fma (fneg u), v, x))
10496 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10497 isContractableFMUL(N1.getOperand(2))) {
10498 SDValue N20 = N1.getOperand(2).getOperand(0);
10499 SDValue N21 = N1.getOperand(2).getOperand(1);
10500 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10501 DAG.getNode(ISD::FNEG, SL, VT,
10504 DAG.getNode(PreferredFusedOpcode, SL, VT,
10505 DAG.getNode(ISD::FNEG, SL, VT, N20),
10506 N21, N0, Flags), Flags);
10510 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
10511 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
10512 if (N0.getOpcode() == PreferredFusedOpcode) {
10513 SDValue N02 = N0.getOperand(2);
10514 if (N02.getOpcode() == ISD::FP_EXTEND) {
10515 SDValue N020 = N02.getOperand(0);
10516 if (isContractableFMUL(N020) &&
10517 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10518 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10519 N0.getOperand(0), N0.getOperand(1),
10520 DAG.getNode(PreferredFusedOpcode, SL, VT,
10521 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10522 N020.getOperand(0)),
10523 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10524 N020.getOperand(1)),
10525 DAG.getNode(ISD::FNEG, SL, VT,
10526 N1), Flags), Flags);
10531 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
10532 // -> (fma (fpext x), (fpext y),
10533 // (fma (fpext u), (fpext v), (fneg z)))
10534 // FIXME: This turns two single-precision and one double-precision
10535 // operation into two double-precision operations, which might not be
10536 // interesting for all targets, especially GPUs.
10537 if (N0.getOpcode() == ISD::FP_EXTEND) {
10538 SDValue N00 = N0.getOperand(0);
10539 if (N00.getOpcode() == PreferredFusedOpcode) {
10540 SDValue N002 = N00.getOperand(2);
10541 if (isContractableFMUL(N002) &&
10542 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10543 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10544 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10545 N00.getOperand(0)),
10546 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10547 N00.getOperand(1)),
10548 DAG.getNode(PreferredFusedOpcode, SL, VT,
10549 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10550 N002.getOperand(0)),
10551 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10552 N002.getOperand(1)),
10553 DAG.getNode(ISD::FNEG, SL, VT,
10554 N1), Flags), Flags);
10559 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
10560 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
10561 if (N1.getOpcode() == PreferredFusedOpcode &&
10562 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
10563 SDValue N120 = N1.getOperand(2).getOperand(0);
10564 if (isContractableFMUL(N120) &&
10565 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10566 SDValue N1200 = N120.getOperand(0);
10567 SDValue N1201 = N120.getOperand(1);
10568 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10569 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
10571 DAG.getNode(PreferredFusedOpcode, SL, VT,
10572 DAG.getNode(ISD::FNEG, SL, VT,
10573 DAG.getNode(ISD::FP_EXTEND, SL,
10575 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10577 N0, Flags), Flags);
10581 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
10582 // -> (fma (fneg (fpext y)), (fpext z),
10583 // (fma (fneg (fpext u)), (fpext v), x))
10584 // FIXME: This turns two single-precision and one double-precision
10585 // operation into two double-precision operations, which might not be
10586 // interesting for all targets, especially GPUs.
10587 if (N1.getOpcode() == ISD::FP_EXTEND &&
10588 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
10589 SDValue CvtSrc = N1.getOperand(0);
10590 SDValue N100 = CvtSrc.getOperand(0);
10591 SDValue N101 = CvtSrc.getOperand(1);
10592 SDValue N102 = CvtSrc.getOperand(2);
10593 if (isContractableFMUL(N102) &&
10594 TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
10595 SDValue N1020 = N102.getOperand(0);
10596 SDValue N1021 = N102.getOperand(1);
10597 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10598 DAG.getNode(ISD::FNEG, SL, VT,
10599 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10601 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
10602 DAG.getNode(PreferredFusedOpcode, SL, VT,
10603 DAG.getNode(ISD::FNEG, SL, VT,
10604 DAG.getNode(ISD::FP_EXTEND, SL,
10606 DAG.getNode(ISD::FP_EXTEND, SL, VT,
10608 N0, Flags), Flags);
10616 /// Try to perform FMA combining on a given FMUL node based on the distributive
10617 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
10618 /// subtraction instead of addition).
10619 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
10620 SDValue N0 = N->getOperand(0);
10621 SDValue N1 = N->getOperand(1);
10622 EVT VT = N->getValueType(0);
10624 const SDNodeFlags Flags = N->getFlags();
10626 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
10628 const TargetOptions &Options = DAG.getTarget().Options;
10630 // The transforms below are incorrect when x == 0 and y == inf, because the
10631 // intermediate multiplication produces a nan.
10632 if (!Options.NoInfsFPMath)
10635 // Floating-point multiply-add without intermediate rounding.
10637 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
10638 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10639 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10641 // Floating-point multiply-add with intermediate rounding. This can result
10642 // in a less precise result due to the changed rounding order.
10643 bool HasFMAD = Options.UnsafeFPMath &&
10644 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10646 // No valid opcode, do not combine.
10647 if (!HasFMAD && !HasFMA)
10650 // Always prefer FMAD to FMA for precision.
10651 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10652 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10654 // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
10655 // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
10656 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10657 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
10658 auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10659 if (XC1 && XC1->isExactlyValue(+1.0))
10660 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10662 if (XC1 && XC1->isExactlyValue(-1.0))
10663 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10664 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10669 if (SDValue FMA = FuseFADD(N0, N1, Flags))
10671 if (SDValue FMA = FuseFADD(N1, N0, Flags))
10674 // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
10675 // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
10676 // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
10677 // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
10678 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
10679 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
10680 auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
10681 if (XC0 && XC0->isExactlyValue(+1.0))
10682 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10683 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10685 if (XC0 && XC0->isExactlyValue(-1.0))
10686 return DAG.getNode(PreferredFusedOpcode, SL, VT,
10687 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
10688 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10690 auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
10691 if (XC1 && XC1->isExactlyValue(+1.0))
10692 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10693 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
10694 if (XC1 && XC1->isExactlyValue(-1.0))
10695 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
10701 if (SDValue FMA = FuseFSUB(N0, N1, Flags))
10703 if (SDValue FMA = FuseFSUB(N1, N0, Flags))
10709 static bool isFMulNegTwo(SDValue &N) {
10710 if (N.getOpcode() != ISD::FMUL)
10712 if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
10713 return CFP->isExactlyValue(-2.0);
10717 SDValue DAGCombiner::visitFADD(SDNode *N) {
10718 SDValue N0 = N->getOperand(0);
10719 SDValue N1 = N->getOperand(1);
10720 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
10721 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
10722 EVT VT = N->getValueType(0);
10724 const TargetOptions &Options = DAG.getTarget().Options;
10725 const SDNodeFlags Flags = N->getFlags();
10729 if (SDValue FoldedVOp = SimplifyVBinOp(N))
10732 // fold (fadd c1, c2) -> c1 + c2
10733 if (N0CFP && N1CFP)
10734 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
10736 // canonicalize constant to RHS
10737 if (N0CFP && !N1CFP)
10738 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
10740 if (SDValue NewSel = foldBinOpIntoSelect(N))
10743 // fold (fadd A, (fneg B)) -> (fsub A, B)
10744 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10745 isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
10746 return DAG.getNode(ISD::FSUB, DL, VT, N0,
10747 GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10749 // fold (fadd (fneg A), B) -> (fsub B, A)
10750 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
10751 isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
10752 return DAG.getNode(ISD::FSUB, DL, VT, N1,
10753 GetNegatedExpression(N0, DAG, LegalOperations), Flags);
10755 // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
10756 // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
10757 if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
10758 (isFMulNegTwo(N1) && N1.hasOneUse())) {
10759 bool N1IsFMul = isFMulNegTwo(N1);
10760 SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
10761 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
10762 return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
10765 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1);
10766 if (N1C && N1C->isZero()) {
10767 if (N1C->isNegative() || Options.UnsafeFPMath ||
10768 Flags.hasNoSignedZeros()) {
10769 // fold (fadd A, 0) -> A
10774 // No FP constant should be created after legalization as Instruction
10775 // Selection pass has a hard time dealing with FP constants.
10776 bool AllowNewConst = (Level < AfterLegalizeDAG);
10778 // If 'unsafe math' or nnan is enabled, fold lots of things.
10779 if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
10780 // If allowed, fold (fadd (fneg x), x) -> 0.0
10781 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
10782 return DAG.getConstantFP(0.0, DL, VT);
10784 // If allowed, fold (fadd x, (fneg x)) -> 0.0
10785 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
10786 return DAG.getConstantFP(0.0, DL, VT);
10789 // If 'unsafe math' or reassoc and nsz, fold lots of things.
10790 // TODO: break out portions of the transformations below for which Unsafe is
10791 // considered and which do not require both nsz and reassoc
10792 if ((Options.UnsafeFPMath ||
10793 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
10795 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
10796 if (N1CFP && N0.getOpcode() == ISD::FADD &&
10797 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
10798 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
10799 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
10802 // We can fold chains of FADD's of the same value into multiplications.
10803 // This transform is not safe in general because we are reducing the number
10804 // of rounding steps.
10805 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
10806 if (N0.getOpcode() == ISD::FMUL) {
10807 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10808 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
10810 // (fadd (fmul x, c), x) -> (fmul x, c+1)
10811 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
10812 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10813 DAG.getConstantFP(1.0, DL, VT), Flags);
10814 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
10817 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
10818 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
10819 N1.getOperand(0) == N1.getOperand(1) &&
10820 N0.getOperand(0) == N1.getOperand(0)) {
10821 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
10822 DAG.getConstantFP(2.0, DL, VT), Flags);
10823 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
10827 if (N1.getOpcode() == ISD::FMUL) {
10828 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10829 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
10831 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
10832 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
10833 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10834 DAG.getConstantFP(1.0, DL, VT), Flags);
10835 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
10838 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
10839 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
10840 N0.getOperand(0) == N0.getOperand(1) &&
10841 N1.getOperand(0) == N0.getOperand(0)) {
10842 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
10843 DAG.getConstantFP(2.0, DL, VT), Flags);
10844 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
10848 if (N0.getOpcode() == ISD::FADD) {
10849 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
10850 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
10851 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
10852 (N0.getOperand(0) == N1)) {
10853 return DAG.getNode(ISD::FMUL, DL, VT,
10854 N1, DAG.getConstantFP(3.0, DL, VT), Flags);
10858 if (N1.getOpcode() == ISD::FADD) {
10859 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
10860 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
10861 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
10862 N1.getOperand(0) == N0) {
10863 return DAG.getNode(ISD::FMUL, DL, VT,
10864 N0, DAG.getConstantFP(3.0, DL, VT), Flags);
10868 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
10869 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
10870 N0.getOperand(0) == N0.getOperand(1) &&
10871 N1.getOperand(0) == N1.getOperand(1) &&
10872 N0.getOperand(0) == N1.getOperand(0)) {
10873 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
10874 DAG.getConstantFP(4.0, DL, VT), Flags);
10877 } // enable-unsafe-fp-math
10879 // FADD -> FMA combines:
10880 if (SDValue Fused = visitFADDForFMACombine(N)) {
10881 AddToWorklist(Fused.getNode());
10887 SDValue DAGCombiner::visitFSUB(SDNode *N) {
10888 SDValue N0 = N->getOperand(0);
10889 SDValue N1 = N->getOperand(1);
10890 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10891 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10892 EVT VT = N->getValueType(0);
10894 const TargetOptions &Options = DAG.getTarget().Options;
10895 const SDNodeFlags Flags = N->getFlags();
10899 if (SDValue FoldedVOp = SimplifyVBinOp(N))
10902 // fold (fsub c1, c2) -> c1-c2
10903 if (N0CFP && N1CFP)
10904 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
10906 if (SDValue NewSel = foldBinOpIntoSelect(N))
10909 // (fsub A, 0) -> A
10910 if (N1CFP && N1CFP->isZero()) {
10911 if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
10912 Flags.hasNoSignedZeros()) {
10918 // (fsub x, x) -> 0.0
10919 if (Options.UnsafeFPMath || Flags.hasNoNaNs())
10920 return DAG.getConstantFP(0.0f, DL, VT);
10923 // (fsub 0, B) -> -B
10924 if (N0CFP && N0CFP->isZero()) {
10925 if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
10926 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10927 return GetNegatedExpression(N1, DAG, LegalOperations);
10928 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
10929 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
10933 // fold (fsub A, (fneg B)) -> (fadd A, B)
10934 if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
10935 return DAG.getNode(ISD::FADD, DL, VT, N0,
10936 GetNegatedExpression(N1, DAG, LegalOperations), Flags);
10938 // If 'unsafe math' is enabled, fold lots of things.
10939 if (Options.UnsafeFPMath) {
10940 // (fsub x, (fadd x, y)) -> (fneg y)
10941 // (fsub x, (fadd y, x)) -> (fneg y)
10942 if (N1.getOpcode() == ISD::FADD) {
10943 SDValue N10 = N1->getOperand(0);
10944 SDValue N11 = N1->getOperand(1);
10946 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
10947 return GetNegatedExpression(N11, DAG, LegalOperations);
10949 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
10950 return GetNegatedExpression(N10, DAG, LegalOperations);
10954 // FSUB -> FMA combines:
10955 if (SDValue Fused = visitFSUBForFMACombine(N)) {
10956 AddToWorklist(Fused.getNode());
10963 SDValue DAGCombiner::visitFMUL(SDNode *N) {
10964 SDValue N0 = N->getOperand(0);
10965 SDValue N1 = N->getOperand(1);
10966 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
10967 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
10968 EVT VT = N->getValueType(0);
10970 const TargetOptions &Options = DAG.getTarget().Options;
10971 const SDNodeFlags Flags = N->getFlags();
10974 if (VT.isVector()) {
10975 // This just handles C1 * C2 for vectors. Other vector folds are below.
10976 if (SDValue FoldedVOp = SimplifyVBinOp(N))
10980 // fold (fmul c1, c2) -> c1*c2
10981 if (N0CFP && N1CFP)
10982 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
10984 // canonicalize constant to RHS
10985 if (isConstantFPBuildVectorOrConstantFP(N0) &&
10986 !isConstantFPBuildVectorOrConstantFP(N1))
10987 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
10989 // fold (fmul A, 1.0) -> A
10990 if (N1CFP && N1CFP->isExactlyValue(1.0))
10993 if (SDValue NewSel = foldBinOpIntoSelect(N))
10996 if (Options.UnsafeFPMath ||
10997 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
10998 // fold (fmul A, 0) -> 0
10999 if (N1CFP && N1CFP->isZero())
11003 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11004 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11005 if (N0.getOpcode() == ISD::FMUL) {
11006 // Fold scalars or any vector constants (not just splats).
11007 // This fold is done in general by InstCombine, but extra fmul insts
11008 // may have been generated during lowering.
11009 SDValue N00 = N0.getOperand(0);
11010 SDValue N01 = N0.getOperand(1);
11011 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
11012 auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
11013 auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
11015 // Check 1: Make sure that the first operand of the inner multiply is NOT
11016 // a constant. Otherwise, we may induce infinite looping.
11017 if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
11018 // Check 2: Make sure that the second operand of the inner multiply and
11019 // the second operand of the outer multiply are constants.
11020 if ((N1CFP && isConstOrConstSplatFP(N01)) ||
11021 (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
11022 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11023 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11028 // Match a special-case: we convert X * 2.0 into fadd.
11029 // fmul (fadd X, X), C -> fmul X, 2.0 * C
11030 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11031 N0.getOperand(0) == N0.getOperand(1)) {
11032 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11033 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11034 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11038 // fold (fmul X, 2.0) -> (fadd X, X)
11039 if (N1CFP && N1CFP->isExactlyValue(+2.0))
11040 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11042 // fold (fmul X, -1.0) -> (fneg X)
11043 if (N1CFP && N1CFP->isExactlyValue(-1.0))
11044 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11045 return DAG.getNode(ISD::FNEG, DL, VT, N0);
11047 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11048 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11049 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11050 // Both can be negated for free, check to see if at least one is cheaper
11052 if (LHSNeg == 2 || RHSNeg == 2)
11053 return DAG.getNode(ISD::FMUL, DL, VT,
11054 GetNegatedExpression(N0, DAG, LegalOperations),
11055 GetNegatedExpression(N1, DAG, LegalOperations),
11060 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11061 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11062 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11063 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11064 TLI.isOperationLegal(ISD::FABS, VT)) {
11065 SDValue Select = N0, X = N1;
11066 if (Select.getOpcode() != ISD::SELECT)
11067 std::swap(Select, X);
11069 SDValue Cond = Select.getOperand(0);
11070 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11071 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11073 if (TrueOpnd && FalseOpnd &&
11074 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11075 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11076 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11077 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11086 std::swap(TrueOpnd, FalseOpnd);
11094 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11095 TLI.isOperationLegal(ISD::FNEG, VT))
11096 return DAG.getNode(ISD::FNEG, DL, VT,
11097 DAG.getNode(ISD::FABS, DL, VT, X));
11098 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11099 return DAG.getNode(ISD::FABS, DL, VT, X);
11106 // FMUL -> FMA combines:
11107 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11108 AddToWorklist(Fused.getNode());
11115 SDValue DAGCombiner::visitFMA(SDNode *N) {
11116 SDValue N0 = N->getOperand(0);
11117 SDValue N1 = N->getOperand(1);
11118 SDValue N2 = N->getOperand(2);
11119 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11120 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11121 EVT VT = N->getValueType(0);
11123 const TargetOptions &Options = DAG.getTarget().Options;
11125 // FMA nodes have flags that propagate to the created nodes.
11126 const SDNodeFlags Flags = N->getFlags();
11127 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11129 // Constant fold FMA.
11130 if (isa<ConstantFPSDNode>(N0) &&
11131 isa<ConstantFPSDNode>(N1) &&
11132 isa<ConstantFPSDNode>(N2)) {
11133 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11136 if (UnsafeFPMath) {
11137 if (N0CFP && N0CFP->isZero())
11139 if (N1CFP && N1CFP->isZero())
11142 // TODO: The FMA node should have flags that propagate to these nodes.
11143 if (N0CFP && N0CFP->isExactlyValue(1.0))
11144 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11145 if (N1CFP && N1CFP->isExactlyValue(1.0))
11146 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11148 // Canonicalize (fma c, x, y) -> (fma x, c, y)
11149 if (isConstantFPBuildVectorOrConstantFP(N0) &&
11150 !isConstantFPBuildVectorOrConstantFP(N1))
11151 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11153 if (UnsafeFPMath) {
11154 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11155 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11156 isConstantFPBuildVectorOrConstantFP(N1) &&
11157 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11158 return DAG.getNode(ISD::FMUL, DL, VT, N0,
11159 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11163 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11164 if (N0.getOpcode() == ISD::FMUL &&
11165 isConstantFPBuildVectorOrConstantFP(N1) &&
11166 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11167 return DAG.getNode(ISD::FMA, DL, VT,
11169 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11175 // (fma x, 1, y) -> (fadd x, y)
11176 // (fma x, -1, y) -> (fadd (fneg x), y)
11178 if (N1CFP->isExactlyValue(1.0))
11179 // TODO: The FMA node should have flags that propagate to this node.
11180 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11182 if (N1CFP->isExactlyValue(-1.0) &&
11183 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11184 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11185 AddToWorklist(RHSNeg.getNode());
11186 // TODO: The FMA node should have flags that propagate to this node.
11187 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11190 // fma (fneg x), K, y -> fma x -K, y
11191 if (N0.getOpcode() == ISD::FNEG &&
11192 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11193 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11194 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11195 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11199 if (UnsafeFPMath) {
11200 // (fma x, c, x) -> (fmul x, (c+1))
11201 if (N1CFP && N0 == N2) {
11202 return DAG.getNode(ISD::FMUL, DL, VT, N0,
11203 DAG.getNode(ISD::FADD, DL, VT, N1,
11204 DAG.getConstantFP(1.0, DL, VT), Flags),
11208 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11209 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11210 return DAG.getNode(ISD::FMUL, DL, VT, N0,
11211 DAG.getNode(ISD::FADD, DL, VT, N1,
11212 DAG.getConstantFP(-1.0, DL, VT), Flags),
11220 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11222 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11223 // Notice that this is not always beneficial. One reason is different targets
11224 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11225 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11226 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11227 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11228 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11229 const SDNodeFlags Flags = N->getFlags();
11230 if (!UnsafeMath && !Flags.hasAllowReciprocal())
11233 // Skip if current node is a reciprocal.
11234 SDValue N0 = N->getOperand(0);
11235 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11236 if (N0CFP && N0CFP->isExactlyValue(1.0))
11239 // Exit early if the target does not want this transform or if there can't
11240 // possibly be enough uses of the divisor to make the transform worthwhile.
11241 SDValue N1 = N->getOperand(1);
11242 unsigned MinUses = TLI.combineRepeatedFPDivisors();
11243 if (!MinUses || N1->use_size() < MinUses)
11246 // Find all FDIV users of the same divisor.
11247 // Use a set because duplicates may be present in the user list.
11248 SetVector<SDNode *> Users;
11249 for (auto *U : N1->uses()) {
11250 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11251 // This division is eligible for optimization only if global unsafe math
11252 // is enabled or if this division allows reciprocal formation.
11253 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11258 // Now that we have the actual number of divisor uses, make sure it meets
11259 // the minimum threshold specified by the target.
11260 if (Users.size() < MinUses)
11263 EVT VT = N->getValueType(0);
11265 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11266 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11268 // Dividend / Divisor -> Dividend * Reciprocal
11269 for (auto *U : Users) {
11270 SDValue Dividend = U->getOperand(0);
11271 if (Dividend != FPOne) {
11272 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11273 Reciprocal, Flags);
11274 CombineTo(U, NewNode);
11275 } else if (U != Reciprocal.getNode()) {
11276 // In the absence of fast-math-flags, this user node is always the
11277 // same node as Reciprocal, but with FMF they may be different nodes.
11278 CombineTo(U, Reciprocal);
11281 return SDValue(N, 0); // N was replaced.
11284 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11285 SDValue N0 = N->getOperand(0);
11286 SDValue N1 = N->getOperand(1);
11287 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11288 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11289 EVT VT = N->getValueType(0);
11291 const TargetOptions &Options = DAG.getTarget().Options;
11292 SDNodeFlags Flags = N->getFlags();
11296 if (SDValue FoldedVOp = SimplifyVBinOp(N))
11299 // fold (fdiv c1, c2) -> c1/c2
11300 if (N0CFP && N1CFP)
11301 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11303 if (SDValue NewSel = foldBinOpIntoSelect(N))
11306 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11307 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11309 // Compute the reciprocal 1.0 / c2.
11310 const APFloat &N1APF = N1CFP->getValueAPF();
11311 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11312 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11313 // Only do the transform if the reciprocal is a legal fp immediate that
11314 // isn't too nasty (eg NaN, denormal, ...).
11315 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11316 (!LegalOperations ||
11317 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11318 // backend)... we should handle this gracefully after Legalize.
11319 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11320 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11321 TLI.isFPImmLegal(Recip, VT)))
11322 return DAG.getNode(ISD::FMUL, DL, VT, N0,
11323 DAG.getConstantFP(Recip, DL, VT), Flags);
11326 // If this FDIV is part of a reciprocal square root, it may be folded
11327 // into a target-specific square root estimate instruction.
11328 if (N1.getOpcode() == ISD::FSQRT) {
11329 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11330 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11332 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11333 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11334 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11336 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11337 AddToWorklist(RV.getNode());
11338 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11340 } else if (N1.getOpcode() == ISD::FP_ROUND &&
11341 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11342 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11344 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11345 AddToWorklist(RV.getNode());
11346 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11348 } else if (N1.getOpcode() == ISD::FMUL) {
11349 // Look through an FMUL. Even though this won't remove the FDIV directly,
11350 // it's still worthwhile to get rid of the FSQRT if possible.
11353 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11354 SqrtOp = N1.getOperand(0);
11355 OtherOp = N1.getOperand(1);
11356 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11357 SqrtOp = N1.getOperand(1);
11358 OtherOp = N1.getOperand(0);
11360 if (SqrtOp.getNode()) {
11361 // We found a FSQRT, so try to make this fold:
11362 // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11363 if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11364 RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11365 AddToWorklist(RV.getNode());
11366 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11371 // Fold into a reciprocal estimate and multiply instead of a real divide.
11372 if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11373 AddToWorklist(RV.getNode());
11374 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11378 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11379 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11380 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11381 // Both can be negated for free, check to see if at least one is cheaper
11383 if (LHSNeg == 2 || RHSNeg == 2)
11384 return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11385 GetNegatedExpression(N0, DAG, LegalOperations),
11386 GetNegatedExpression(N1, DAG, LegalOperations),
11391 if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11392 return CombineRepeatedDivisors;
11397 SDValue DAGCombiner::visitFREM(SDNode *N) {
11398 SDValue N0 = N->getOperand(0);
11399 SDValue N1 = N->getOperand(1);
11400 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11401 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11402 EVT VT = N->getValueType(0);
11404 // fold (frem c1, c2) -> fmod(c1,c2)
11405 if (N0CFP && N1CFP)
11406 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11408 if (SDValue NewSel = foldBinOpIntoSelect(N))
11414 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11415 SDNodeFlags Flags = N->getFlags();
11416 if (!DAG.getTarget().Options.UnsafeFPMath &&
11417 !Flags.hasApproximateFuncs())
11420 SDValue N0 = N->getOperand(0);
11421 if (TLI.isFsqrtCheap(N0, DAG))
11424 // FSQRT nodes have flags that propagate to the created nodes.
11425 return buildSqrtEstimate(N0, Flags);
11428 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11429 /// copysign(x, fp_round(y)) -> copysign(x, y)
11430 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11431 SDValue N1 = N->getOperand(1);
11432 if ((N1.getOpcode() == ISD::FP_EXTEND ||
11433 N1.getOpcode() == ISD::FP_ROUND)) {
11434 // Do not optimize out type conversion of f128 type yet.
11435 // For some targets like x86_64, configuration is changed to keep one f128
11436 // value in one SSE register, but instruction selection cannot handle
11437 // FCOPYSIGN on SSE registers yet.
11438 EVT N1VT = N1->getValueType(0);
11439 EVT N1Op0VT = N1->getOperand(0).getValueType();
11440 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11445 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11446 SDValue N0 = N->getOperand(0);
11447 SDValue N1 = N->getOperand(1);
11448 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11449 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11450 EVT VT = N->getValueType(0);
11452 if (N0CFP && N1CFP) // Constant fold
11453 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11456 const APFloat &V = N1CFP->getValueAPF();
11457 // copysign(x, c1) -> fabs(x) iff ispos(c1)
11458 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11459 if (!V.isNegative()) {
11460 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11461 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11463 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11464 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11465 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11469 // copysign(fabs(x), y) -> copysign(x, y)
11470 // copysign(fneg(x), y) -> copysign(x, y)
11471 // copysign(copysign(x,z), y) -> copysign(x, y)
11472 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11473 N0.getOpcode() == ISD::FCOPYSIGN)
11474 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11476 // copysign(x, abs(y)) -> abs(x)
11477 if (N1.getOpcode() == ISD::FABS)
11478 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11480 // copysign(x, copysign(y,z)) -> copysign(x, z)
11481 if (N1.getOpcode() == ISD::FCOPYSIGN)
11482 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11484 // copysign(x, fp_extend(y)) -> copysign(x, y)
11485 // copysign(x, fp_round(y)) -> copysign(x, y)
11486 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11487 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11492 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
11493 const TargetLowering &TLI) {
11494 // This optimization is guarded by a function attribute because it may produce
11495 // unexpected results. Ie, programs may be relying on the platform-specific
11496 // undefined behavior when the float-to-int conversion overflows.
11497 const Function &F = DAG.getMachineFunction().getFunction();
11498 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
11499 if (StrictOverflow.getValueAsString().equals("false"))
11502 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
11503 // replacing casts with a libcall. We also must be allowed to ignore -0.0
11504 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
11505 // conversions would return +0.0.
11506 // FIXME: We should be able to use node-level FMF here.
11507 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
11508 EVT VT = N->getValueType(0);
11509 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
11510 !DAG.getTarget().Options.NoSignedZerosFPMath)
11513 // fptosi/fptoui round towards zero, so converting from FP to integer and
11514 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
11515 SDValue N0 = N->getOperand(0);
11516 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
11517 N0.getOperand(0).getValueType() == VT)
11518 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11520 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
11521 N0.getOperand(0).getValueType() == VT)
11522 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
11527 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
11528 SDValue N0 = N->getOperand(0);
11529 EVT VT = N->getValueType(0);
11530 EVT OpVT = N0.getValueType();
11532 // fold (sint_to_fp c1) -> c1fp
11533 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11534 // ...but only if the target supports immediate floating-point values
11535 (!LegalOperations ||
11536 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11537 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11539 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
11540 // but UINT_TO_FP is legal on this target, try to convert.
11541 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
11542 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
11543 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
11544 if (DAG.SignBitIsZero(N0))
11545 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11548 // The next optimizations are desirable only if SELECT_CC can be lowered.
11549 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11550 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11551 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
11553 (!LegalOperations ||
11554 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11557 { N0.getOperand(0), N0.getOperand(1),
11558 DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11559 N0.getOperand(2) };
11560 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11563 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
11564 // (select_cc x, y, 1.0, 0.0,, cc)
11565 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
11566 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
11567 (!LegalOperations ||
11568 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11571 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
11572 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11573 N0.getOperand(0).getOperand(2) };
11574 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11578 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11584 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
11585 SDValue N0 = N->getOperand(0);
11586 EVT VT = N->getValueType(0);
11587 EVT OpVT = N0.getValueType();
11589 // fold (uint_to_fp c1) -> c1fp
11590 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
11591 // ...but only if the target supports immediate floating-point values
11592 (!LegalOperations ||
11593 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
11594 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
11596 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
11597 // but SINT_TO_FP is legal on this target, try to convert.
11598 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
11599 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
11600 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
11601 if (DAG.SignBitIsZero(N0))
11602 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
11605 // The next optimizations are desirable only if SELECT_CC can be lowered.
11606 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
11607 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
11608 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
11609 (!LegalOperations ||
11610 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
11613 { N0.getOperand(0), N0.getOperand(1),
11614 DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
11615 N0.getOperand(2) };
11616 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
11620 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
11626 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
11627 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
11628 SDValue N0 = N->getOperand(0);
11629 EVT VT = N->getValueType(0);
11631 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
11634 SDValue Src = N0.getOperand(0);
11635 EVT SrcVT = Src.getValueType();
11636 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
11637 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
11639 // We can safely assume the conversion won't overflow the output range,
11640 // because (for example) (uint8_t)18293.f is undefined behavior.
11642 // Since we can assume the conversion won't overflow, our decision as to
11643 // whether the input will fit in the float should depend on the minimum
11644 // of the input range and output range.
11646 // This means this is also safe for a signed input and unsigned output, since
11647 // a negative input would lead to undefined behavior.
11648 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
11649 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
11650 unsigned ActualSize = std::min(InputSize, OutputSize);
11651 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
11653 // We can only fold away the float conversion if the input range can be
11654 // represented exactly in the float range.
11655 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
11656 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
11657 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
11658 : ISD::ZERO_EXTEND;
11659 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
11661 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
11662 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
11663 return DAG.getBitcast(VT, Src);
11668 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
11669 SDValue N0 = N->getOperand(0);
11670 EVT VT = N->getValueType(0);
11672 // fold (fp_to_sint c1fp) -> c1
11673 if (isConstantFPBuildVectorOrConstantFP(N0))
11674 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
11676 return FoldIntToFPToInt(N, DAG);
11679 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
11680 SDValue N0 = N->getOperand(0);
11681 EVT VT = N->getValueType(0);
11683 // fold (fp_to_uint c1fp) -> c1
11684 if (isConstantFPBuildVectorOrConstantFP(N0))
11685 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
11687 return FoldIntToFPToInt(N, DAG);
11690 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
11691 SDValue N0 = N->getOperand(0);
11692 SDValue N1 = N->getOperand(1);
11693 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11694 EVT VT = N->getValueType(0);
11696 // fold (fp_round c1fp) -> c1fp
11698 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
11700 // fold (fp_round (fp_extend x)) -> x
11701 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
11702 return N0.getOperand(0);
11704 // fold (fp_round (fp_round x)) -> (fp_round x)
11705 if (N0.getOpcode() == ISD::FP_ROUND) {
11706 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
11707 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
11709 // Skip this folding if it results in an fp_round from f80 to f16.
11711 // f80 to f16 always generates an expensive (and as yet, unimplemented)
11712 // libcall to __truncxfhf2 instead of selecting native f16 conversion
11713 // instructions from f32 or f64. Moreover, the first (value-preserving)
11714 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
11716 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
11719 // If the first fp_round isn't a value preserving truncation, it might
11720 // introduce a tie in the second fp_round, that wouldn't occur in the
11721 // single-step fp_round we want to fold to.
11722 // In other words, double rounding isn't the same as rounding.
11723 // Also, this is a value preserving truncation iff both fp_round's are.
11724 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
11726 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
11727 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
11731 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
11732 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
11733 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
11734 N0.getOperand(0), N1);
11735 AddToWorklist(Tmp.getNode());
11736 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
11737 Tmp, N0.getOperand(1));
11740 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11746 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
11747 SDValue N0 = N->getOperand(0);
11748 EVT VT = N->getValueType(0);
11749 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11750 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11752 // fold (fp_round_inreg c1fp) -> c1fp
11753 if (N0CFP && isTypeLegal(EVT)) {
11755 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
11756 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
11762 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
11763 SDValue N0 = N->getOperand(0);
11764 EVT VT = N->getValueType(0);
11766 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
11767 if (N->hasOneUse() &&
11768 N->use_begin()->getOpcode() == ISD::FP_ROUND)
11771 // fold (fp_extend c1fp) -> c1fp
11772 if (isConstantFPBuildVectorOrConstantFP(N0))
11773 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
11775 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
11776 if (N0.getOpcode() == ISD::FP16_TO_FP &&
11777 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
11778 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
11780 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
11782 if (N0.getOpcode() == ISD::FP_ROUND
11783 && N0.getConstantOperandVal(1) == 1) {
11784 SDValue In = N0.getOperand(0);
11785 if (In.getValueType() == VT) return In;
11786 if (VT.bitsLT(In.getValueType()))
11787 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
11788 In, N0.getOperand(1));
11789 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
11792 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
11793 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11794 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11795 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11796 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11798 LN0->getBasePtr(), N0.getValueType(),
11799 LN0->getMemOperand());
11800 CombineTo(N, ExtLoad);
11801 CombineTo(N0.getNode(),
11802 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
11803 N0.getValueType(), ExtLoad,
11804 DAG.getIntPtrConstant(1, SDLoc(N0))),
11805 ExtLoad.getValue(1));
11806 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11809 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11815 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
11816 SDValue N0 = N->getOperand(0);
11817 EVT VT = N->getValueType(0);
11819 // fold (fceil c1) -> fceil(c1)
11820 if (isConstantFPBuildVectorOrConstantFP(N0))
11821 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
11826 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
11827 SDValue N0 = N->getOperand(0);
11828 EVT VT = N->getValueType(0);
11830 // fold (ftrunc c1) -> ftrunc(c1)
11831 if (isConstantFPBuildVectorOrConstantFP(N0))
11832 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
11834 // fold ftrunc (known rounded int x) -> x
11835 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
11836 // likely to be generated to extract integer from a rounded floating value.
11837 switch (N0.getOpcode()) {
11841 case ISD::FNEARBYINT:
11850 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
11851 SDValue N0 = N->getOperand(0);
11852 EVT VT = N->getValueType(0);
11854 // fold (ffloor c1) -> ffloor(c1)
11855 if (isConstantFPBuildVectorOrConstantFP(N0))
11856 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
11861 // FIXME: FNEG and FABS have a lot in common; refactor.
11862 SDValue DAGCombiner::visitFNEG(SDNode *N) {
11863 SDValue N0 = N->getOperand(0);
11864 EVT VT = N->getValueType(0);
11866 // Constant fold FNEG.
11867 if (isConstantFPBuildVectorOrConstantFP(N0))
11868 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
11870 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
11871 &DAG.getTarget().Options))
11872 return GetNegatedExpression(N0, DAG, LegalOperations);
11874 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
11875 // constant pool values.
11876 if (!TLI.isFNegFree(VT) &&
11877 N0.getOpcode() == ISD::BITCAST &&
11878 N0.getNode()->hasOneUse()) {
11879 SDValue Int = N0.getOperand(0);
11880 EVT IntVT = Int.getValueType();
11881 if (IntVT.isInteger() && !IntVT.isVector()) {
11883 if (N0.getValueType().isVector()) {
11884 // For a vector, get a mask such as 0x80... per scalar element
11886 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
11887 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11889 // For a scalar, just generate 0x80...
11890 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
11893 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
11894 DAG.getConstant(SignMask, DL0, IntVT));
11895 AddToWorklist(Int.getNode());
11896 return DAG.getBitcast(VT, Int);
11900 // (fneg (fmul c, x)) -> (fmul -c, x)
11901 if (N0.getOpcode() == ISD::FMUL &&
11902 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
11903 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
11905 APFloat CVal = CFP1->getValueAPF();
11907 if (Level >= AfterLegalizeDAG &&
11908 (TLI.isFPImmLegal(CVal, VT) ||
11909 TLI.isOperationLegal(ISD::ConstantFP, VT)))
11910 return DAG.getNode(
11911 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
11912 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
11920 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
11921 SDValue N0 = N->getOperand(0);
11922 SDValue N1 = N->getOperand(1);
11923 EVT VT = N->getValueType(0);
11924 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11925 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11927 if (N0CFP && N1CFP) {
11928 const APFloat &C0 = N0CFP->getValueAPF();
11929 const APFloat &C1 = N1CFP->getValueAPF();
11930 return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
11933 // Canonicalize to constant on RHS.
11934 if (isConstantFPBuildVectorOrConstantFP(N0) &&
11935 !isConstantFPBuildVectorOrConstantFP(N1))
11936 return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
11941 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
11942 SDValue N0 = N->getOperand(0);
11943 SDValue N1 = N->getOperand(1);
11944 EVT VT = N->getValueType(0);
11945 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
11946 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
11948 if (N0CFP && N1CFP) {
11949 const APFloat &C0 = N0CFP->getValueAPF();
11950 const APFloat &C1 = N1CFP->getValueAPF();
11951 return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
11954 // Canonicalize to constant on RHS.
11955 if (isConstantFPBuildVectorOrConstantFP(N0) &&
11956 !isConstantFPBuildVectorOrConstantFP(N1))
11957 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
11962 SDValue DAGCombiner::visitFABS(SDNode *N) {
11963 SDValue N0 = N->getOperand(0);
11964 EVT VT = N->getValueType(0);
11966 // fold (fabs c1) -> fabs(c1)
11967 if (isConstantFPBuildVectorOrConstantFP(N0))
11968 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11970 // fold (fabs (fabs x)) -> (fabs x)
11971 if (N0.getOpcode() == ISD::FABS)
11972 return N->getOperand(0);
11974 // fold (fabs (fneg x)) -> (fabs x)
11975 // fold (fabs (fcopysign x, y)) -> (fabs x)
11976 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
11977 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
11979 // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
11980 // constant pool values.
11981 if (!TLI.isFAbsFree(VT) &&
11982 N0.getOpcode() == ISD::BITCAST &&
11983 N0.getNode()->hasOneUse()) {
11984 SDValue Int = N0.getOperand(0);
11985 EVT IntVT = Int.getValueType();
11986 if (IntVT.isInteger() && !IntVT.isVector()) {
11988 if (N0.getValueType().isVector()) {
11989 // For a vector, get a mask such as 0x7f... per scalar element
11991 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
11992 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
11994 // For a scalar, just generate 0x7f...
11995 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
11998 Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
11999 DAG.getConstant(SignMask, DL, IntVT));
12000 AddToWorklist(Int.getNode());
12001 return DAG.getBitcast(N->getValueType(0), Int);
12008 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12009 SDValue Chain = N->getOperand(0);
12010 SDValue N1 = N->getOperand(1);
12011 SDValue N2 = N->getOperand(2);
12013 // If N is a constant we could fold this into a fallthrough or unconditional
12014 // branch. However that doesn't happen very often in normal code, because
12015 // Instcombine/SimplifyCFG should have handled the available opportunities.
12016 // If we did this folding here, it would be necessary to update the
12017 // MachineBasicBlock CFG, which is awkward.
12019 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12021 if (N1.getOpcode() == ISD::SETCC &&
12022 TLI.isOperationLegalOrCustom(ISD::BR_CC,
12023 N1.getOperand(0).getValueType())) {
12024 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12025 Chain, N1.getOperand(2),
12026 N1.getOperand(0), N1.getOperand(1), N2);
12029 if (N1.hasOneUse()) {
12030 if (SDValue NewN1 = rebuildSetCC(N1))
12031 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12037 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12038 if (N.getOpcode() == ISD::SRL ||
12039 (N.getOpcode() == ISD::TRUNCATE &&
12040 (N.getOperand(0).hasOneUse() &&
12041 N.getOperand(0).getOpcode() == ISD::SRL))) {
12042 // Look pass the truncate.
12043 if (N.getOpcode() == ISD::TRUNCATE)
12044 N = N.getOperand(0);
12046 // Match this pattern so that we can generate simpler code:
12049 // %b = and i32 %a, 2
12050 // %c = srl i32 %b, 1
12051 // brcond i32 %c ...
12056 // %b = and i32 %a, 2
12057 // %c = setcc eq %b, 0
12060 // This applies only when the AND constant value has one bit set and the
12061 // SRL constant is equal to the log2 of the AND constant. The back-end is
12062 // smart enough to convert the result into a TEST/JMP sequence.
12063 SDValue Op0 = N.getOperand(0);
12064 SDValue Op1 = N.getOperand(1);
12066 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12067 SDValue AndOp1 = Op0.getOperand(1);
12069 if (AndOp1.getOpcode() == ISD::Constant) {
12070 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12072 if (AndConst.isPowerOf2() &&
12073 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12075 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12076 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12083 // Transform br(xor(x, y)) -> br(x != y)
12084 // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12085 if (N.getOpcode() == ISD::XOR) {
12086 // Because we may call this on a speculatively constructed
12087 // SimplifiedSetCC Node, we need to simplify this node first.
12088 // Ideally this should be folded into SimplifySetCC and not
12089 // here. For now, grab a handle to N so we don't lose it from
12090 // replacements interal to the visit.
12091 HandleSDNode XORHandle(N);
12092 while (N.getOpcode() == ISD::XOR) {
12093 SDValue Tmp = visitXOR(N.getNode());
12094 // No simplification done.
12095 if (!Tmp.getNode())
12097 // Returning N is form in-visit replacement that may invalidated
12098 // N. Grab value from Handle.
12099 if (Tmp.getNode() == N.getNode())
12100 N = XORHandle.getValue();
12101 else // Node simplified. Try simplifying again.
12105 if (N.getOpcode() != ISD::XOR)
12108 SDNode *TheXor = N.getNode();
12110 SDValue Op0 = TheXor->getOperand(0);
12111 SDValue Op1 = TheXor->getOperand(1);
12113 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12114 bool Equal = false;
12115 if (isOneConstant(Op0) && Op0.hasOneUse() &&
12116 Op0.getOpcode() == ISD::XOR) {
12117 TheXor = Op0.getNode();
12121 EVT SetCCVT = N.getValueType();
12123 SetCCVT = getSetCCResultType(SetCCVT);
12124 // Replace the uses of XOR with SETCC
12125 return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12126 Equal ? ISD::SETEQ : ISD::SETNE);
12133 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12135 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12136 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12137 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12139 // If N is a constant we could fold this into a fallthrough or unconditional
12140 // branch. However that doesn't happen very often in normal code, because
12141 // Instcombine/SimplifyCFG should have handled the available opportunities.
12142 // If we did this folding here, it would be necessary to update the
12143 // MachineBasicBlock CFG, which is awkward.
12145 // Use SimplifySetCC to simplify SETCC's.
12146 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12147 CondLHS, CondRHS, CC->get(), SDLoc(N),
12149 if (Simp.getNode()) AddToWorklist(Simp.getNode());
12151 // fold to a simpler setcc
12152 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12153 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12154 N->getOperand(0), Simp.getOperand(2),
12155 Simp.getOperand(0), Simp.getOperand(1),
12161 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12162 /// and that N may be folded in the load / store addressing mode.
12163 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12165 const TargetLowering &TLI) {
12169 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
12170 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12172 VT = LD->getMemoryVT();
12173 AS = LD->getAddressSpace();
12174 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
12175 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12177 VT = ST->getMemoryVT();
12178 AS = ST->getAddressSpace();
12182 TargetLowering::AddrMode AM;
12183 if (N->getOpcode() == ISD::ADD) {
12184 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12187 AM.BaseOffs = Offset->getSExtValue();
12191 } else if (N->getOpcode() == ISD::SUB) {
12192 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12195 AM.BaseOffs = -Offset->getSExtValue();
12202 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12203 VT.getTypeForEVT(*DAG.getContext()), AS);
12206 /// Try turning a load/store into a pre-indexed load/store when the base
12207 /// pointer is an add or subtract and it has other uses besides the load/store.
12208 /// After the transformation, the new indexed load/store has effectively folded
12209 /// the add/subtract in and all of its other uses are redirected to the
12210 /// new load/store.
12211 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12212 if (Level < AfterLegalizeDAG)
12215 bool isLoad = true;
12218 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12219 if (LD->isIndexed())
12221 VT = LD->getMemoryVT();
12222 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12223 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12225 Ptr = LD->getBasePtr();
12226 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12227 if (ST->isIndexed())
12229 VT = ST->getMemoryVT();
12230 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12231 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12233 Ptr = ST->getBasePtr();
12239 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12240 // out. There is no reason to make this a preinc/predec.
12241 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12242 Ptr.getNode()->hasOneUse())
12245 // Ask the target to do addressing mode selection.
12248 ISD::MemIndexedMode AM = ISD::UNINDEXED;
12249 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12252 // Backends without true r+i pre-indexed forms may need to pass a
12253 // constant base with a variable offset so that constant coercion
12254 // will work with the patterns in canonical form.
12255 bool Swapped = false;
12256 if (isa<ConstantSDNode>(BasePtr)) {
12257 std::swap(BasePtr, Offset);
12261 // Don't create a indexed load / store with zero offset.
12262 if (isNullConstant(Offset))
12265 // Try turning it into a pre-indexed load / store except when:
12266 // 1) The new base ptr is a frame index.
12267 // 2) If N is a store and the new base ptr is either the same as or is a
12268 // predecessor of the value being stored.
12269 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12270 // that would create a cycle.
12271 // 4) All uses are load / store ops that use it as old base ptr.
12273 // Check #1. Preinc'ing a frame index would require copying the stack pointer
12274 // (plus the implicit offset) to a register to preinc anyway.
12275 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12280 SDValue Val = cast<StoreSDNode>(N)->getValue();
12281 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
12285 // Caches for hasPredecessorHelper.
12286 SmallPtrSet<const SDNode *, 32> Visited;
12287 SmallVector<const SDNode *, 16> Worklist;
12288 Worklist.push_back(N);
12290 // If the offset is a constant, there may be other adds of constants that
12291 // can be folded with this one. We should do this to avoid having to keep
12292 // a copy of the original base pointer.
12293 SmallVector<SDNode *, 16> OtherUses;
12294 if (isa<ConstantSDNode>(Offset))
12295 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12296 UE = BasePtr.getNode()->use_end();
12298 SDUse &Use = UI.getUse();
12299 // Skip the use that is Ptr and uses of other results from BasePtr's
12300 // node (important for nodes that return multiple results).
12301 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12304 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12307 if (Use.getUser()->getOpcode() != ISD::ADD &&
12308 Use.getUser()->getOpcode() != ISD::SUB) {
12313 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12314 if (!isa<ConstantSDNode>(Op1)) {
12319 // FIXME: In some cases, we can be smarter about this.
12320 if (Op1.getValueType() != Offset.getValueType()) {
12325 OtherUses.push_back(Use.getUser());
12329 std::swap(BasePtr, Offset);
12331 // Now check for #3 and #4.
12332 bool RealUse = false;
12334 for (SDNode *Use : Ptr.getNode()->uses()) {
12337 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12340 // If Ptr may be folded in addressing mode of other use, then it's
12341 // not profitable to do this transformation.
12342 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12351 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12352 BasePtr, Offset, AM);
12354 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12355 BasePtr, Offset, AM);
12358 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12359 Result.getNode()->dump(&DAG); dbgs() << '\n');
12360 WorklistRemover DeadNodes(*this);
12362 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12363 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12365 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12368 // Finally, since the node is now dead, remove it from the graph.
12369 deleteAndRecombine(N);
12372 std::swap(BasePtr, Offset);
12374 // Replace other uses of BasePtr that can be updated to use Ptr
12375 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12376 unsigned OffsetIdx = 1;
12377 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12379 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12380 BasePtr.getNode() && "Expected BasePtr operand");
12382 // We need to replace ptr0 in the following expression:
12383 // x0 * offset0 + y0 * ptr0 = t0
12385 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12387 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12388 // indexed load/store and the expression that needs to be re-written.
12390 // Therefore, we have:
12391 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12393 ConstantSDNode *CN =
12394 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12395 int X0, X1, Y0, Y1;
12396 const APInt &Offset0 = CN->getAPIntValue();
12397 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12399 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12400 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12401 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12402 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12404 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12406 APInt CNV = Offset0;
12407 if (X0 < 0) CNV = -CNV;
12408 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12409 else CNV = CNV - Offset1;
12411 SDLoc DL(OtherUses[i]);
12413 // We can now generate the new expression.
12414 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12415 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12417 SDValue NewUse = DAG.getNode(Opcode,
12419 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12420 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12421 deleteAndRecombine(OtherUses[i]);
12424 // Replace the uses of Ptr with uses of the updated base value.
12425 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12426 deleteAndRecombine(Ptr.getNode());
12427 AddToWorklist(Result.getNode());
12432 /// Try to combine a load/store with a add/sub of the base pointer node into a
12433 /// post-indexed load/store. The transformation folded the add/subtract into the
12434 /// new indexed load/store effectively and all of its uses are redirected to the
12435 /// new load/store.
12436 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12437 if (Level < AfterLegalizeDAG)
12440 bool isLoad = true;
12443 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
12444 if (LD->isIndexed())
12446 VT = LD->getMemoryVT();
12447 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
12448 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
12450 Ptr = LD->getBasePtr();
12451 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
12452 if (ST->isIndexed())
12454 VT = ST->getMemoryVT();
12455 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
12456 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
12458 Ptr = ST->getBasePtr();
12464 if (Ptr.getNode()->hasOneUse())
12467 for (SDNode *Op : Ptr.getNode()->uses()) {
12469 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
12474 ISD::MemIndexedMode AM = ISD::UNINDEXED;
12475 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
12476 // Don't create a indexed load / store with zero offset.
12477 if (isNullConstant(Offset))
12480 // Try turning it into a post-indexed load / store except when
12481 // 1) All uses are load / store ops that use it as base ptr (and
12482 // it may be folded as addressing mmode).
12483 // 2) Op must be independent of N, i.e. Op is neither a predecessor
12484 // nor a successor of N. Otherwise, if Op is folded that would
12487 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12491 bool TryNext = false;
12492 for (SDNode *Use : BasePtr.getNode()->uses()) {
12493 if (Use == Ptr.getNode())
12496 // If all the uses are load / store addresses, then don't do the
12498 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
12499 bool RealUse = false;
12500 for (SDNode *UseUse : Use->uses()) {
12501 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
12516 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
12517 SDValue Result = isLoad
12518 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12519 BasePtr, Offset, AM)
12520 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12521 BasePtr, Offset, AM);
12522 ++PostIndexedNodes;
12524 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
12525 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
12527 WorklistRemover DeadNodes(*this);
12529 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12530 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12532 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12535 // Finally, since the node is now dead, remove it from the graph.
12536 deleteAndRecombine(N);
12538 // Replace the uses of Use with uses of the updated base value.
12539 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
12540 Result.getValue(isLoad ? 1 : 0));
12541 deleteAndRecombine(Op);
12550 /// Return the base-pointer arithmetic from an indexed \p LD.
12551 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
12552 ISD::MemIndexedMode AM = LD->getAddressingMode();
12553 assert(AM != ISD::UNINDEXED);
12554 SDValue BP = LD->getOperand(1);
12555 SDValue Inc = LD->getOperand(2);
12557 // Some backends use TargetConstants for load offsets, but don't expect
12558 // TargetConstants in general ADD nodes. We can convert these constants into
12559 // regular Constants (if the constant is not opaque).
12560 assert((Inc.getOpcode() != ISD::TargetConstant ||
12561 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
12562 "Cannot split out indexing using opaque target constants");
12563 if (Inc.getOpcode() == ISD::TargetConstant) {
12564 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
12565 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
12566 ConstInc->getValueType(0));
12570 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
12571 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
12574 SDValue DAGCombiner::visitLOAD(SDNode *N) {
12575 LoadSDNode *LD = cast<LoadSDNode>(N);
12576 SDValue Chain = LD->getChain();
12577 SDValue Ptr = LD->getBasePtr();
12579 // If load is not volatile and there are no uses of the loaded value (and
12580 // the updated indexed value in case of indexed loads), change uses of the
12581 // chain value into uses of the chain input (i.e. delete the dead load).
12582 if (!LD->isVolatile()) {
12583 if (N->getValueType(1) == MVT::Other) {
12584 // Unindexed loads.
12585 if (!N->hasAnyUseOfValue(0)) {
12586 // It's not safe to use the two value CombineTo variant here. e.g.
12587 // v1, chain2 = load chain1, loc
12588 // v2, chain3 = load chain2, loc
12590 // Now we replace use of chain2 with chain1. This makes the second load
12591 // isomorphic to the one we are deleting, and thus makes this load live.
12592 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
12593 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
12595 WorklistRemover DeadNodes(*this);
12596 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
12597 AddUsersToWorklist(Chain.getNode());
12598 if (N->use_empty())
12599 deleteAndRecombine(N);
12601 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12605 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
12607 // If this load has an opaque TargetConstant offset, then we cannot split
12608 // the indexing into an add/sub directly (that TargetConstant may not be
12609 // valid for a different type of node, and we cannot convert an opaque
12610 // target constant into a regular constant).
12611 bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
12612 cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
12614 if (!N->hasAnyUseOfValue(0) &&
12615 ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
12616 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
12618 if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
12619 Index = SplitIndexingFromLoad(LD);
12620 // Try to fold the base pointer arithmetic into subsequent loads and
12622 AddUsersToWorklist(N);
12624 Index = DAG.getUNDEF(N->getValueType(1));
12625 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
12626 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
12627 dbgs() << " and 2 other values\n");
12628 WorklistRemover DeadNodes(*this);
12629 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
12630 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
12631 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
12632 deleteAndRecombine(N);
12633 return SDValue(N, 0); // Return N so it doesn't get rechecked!
12638 // If this load is directly stored, replace the load value with the stored
12640 // TODO: Handle store large -> read small portion.
12641 // TODO: Handle TRUNCSTORE/LOADEXT
12642 if (OptLevel != CodeGenOpt::None &&
12643 ISD::isNormalLoad(N) && !LD->isVolatile()) {
12644 if (ISD::isNON_TRUNCStore(Chain.getNode())) {
12645 StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
12646 if (PrevST->getBasePtr() == Ptr &&
12647 PrevST->getValue().getValueType() == N->getValueType(0))
12648 return CombineTo(N, PrevST->getOperand(1), Chain);
12652 // Try to infer better alignment information than the load already has.
12653 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
12654 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12655 if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
12656 SDValue NewLoad = DAG.getExtLoad(
12657 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
12658 LD->getPointerInfo(), LD->getMemoryVT(), Align,
12659 LD->getMemOperand()->getFlags(), LD->getAAInfo());
12660 // NewLoad will always be N as we are only refining the alignment
12661 assert(NewLoad.getNode() == N);
12667 if (LD->isUnindexed()) {
12668 // Walk up chain skipping non-aliasing memory nodes.
12669 SDValue BetterChain = FindBetterChain(N, Chain);
12671 // If there is a better chain.
12672 if (Chain != BetterChain) {
12675 // Replace the chain to void dependency.
12676 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
12677 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
12678 BetterChain, Ptr, LD->getMemOperand());
12680 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
12681 LD->getValueType(0),
12682 BetterChain, Ptr, LD->getMemoryVT(),
12683 LD->getMemOperand());
12686 // Create token factor to keep old chain connected.
12687 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
12688 MVT::Other, Chain, ReplLoad.getValue(1));
12690 // Replace uses with load result and token factor
12691 return CombineTo(N, ReplLoad.getValue(0), Token);
12695 // Try transforming N to an indexed load.
12696 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12697 return SDValue(N, 0);
12699 // Try to slice up N to more direct loads if the slices are mapped to
12700 // different register banks or pairing can take place.
12701 if (SliceUpLoad(N))
12702 return SDValue(N, 0);
12709 /// Helper structure used to slice a load in smaller loads.
12710 /// Basically a slice is obtained from the following sequence:
12711 /// Origin = load Ty1, Base
12712 /// Shift = srl Ty1 Origin, CstTy Amount
12713 /// Inst = trunc Shift to Ty2
12715 /// Then, it will be rewritten into:
12716 /// Slice = load SliceTy, Base + SliceOffset
12717 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
12719 /// SliceTy is deduced from the number of bits that are actually used to
12721 struct LoadedSlice {
12722 /// Helper structure used to compute the cost of a slice.
12724 /// Are we optimizing for code size.
12728 unsigned Loads = 0;
12729 unsigned Truncates = 0;
12730 unsigned CrossRegisterBanksCopies = 0;
12731 unsigned ZExts = 0;
12732 unsigned Shift = 0;
12734 Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
12736 /// Get the cost of one isolated slice.
12737 Cost(const LoadedSlice &LS, bool ForCodeSize = false)
12738 : ForCodeSize(ForCodeSize), Loads(1) {
12739 EVT TruncType = LS.Inst->getValueType(0);
12740 EVT LoadedType = LS.getLoadedType();
12741 if (TruncType != LoadedType &&
12742 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
12746 /// Account for slicing gain in the current cost.
12747 /// Slicing provide a few gains like removing a shift or a
12748 /// truncate. This method allows to grow the cost of the original
12749 /// load with the gain from this slice.
12750 void addSliceGain(const LoadedSlice &LS) {
12751 // Each slice saves a truncate.
12752 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
12753 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
12754 LS.Inst->getValueType(0)))
12756 // If there is a shift amount, this slice gets rid of it.
12759 // If this slice can merge a cross register bank copy, account for it.
12760 if (LS.canMergeExpensiveCrossRegisterBankCopy())
12761 ++CrossRegisterBanksCopies;
12764 Cost &operator+=(const Cost &RHS) {
12765 Loads += RHS.Loads;
12766 Truncates += RHS.Truncates;
12767 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
12768 ZExts += RHS.ZExts;
12769 Shift += RHS.Shift;
12773 bool operator==(const Cost &RHS) const {
12774 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
12775 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
12776 ZExts == RHS.ZExts && Shift == RHS.Shift;
12779 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
12781 bool operator<(const Cost &RHS) const {
12782 // Assume cross register banks copies are as expensive as loads.
12783 // FIXME: Do we want some more target hooks?
12784 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
12785 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
12786 // Unless we are optimizing for code size, consider the
12787 // expensive operation first.
12788 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
12789 return ExpensiveOpsLHS < ExpensiveOpsRHS;
12790 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
12791 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
12794 bool operator>(const Cost &RHS) const { return RHS < *this; }
12796 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
12798 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
12801 // The last instruction that represent the slice. This should be a
12802 // truncate instruction.
12805 // The original load instruction.
12806 LoadSDNode *Origin;
12808 // The right shift amount in bits from the original load.
12811 // The DAG from which Origin came from.
12812 // This is used to get some contextual information about legal types, etc.
12815 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
12816 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
12817 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
12819 /// Get the bits used in a chunk of bits \p BitWidth large.
12820 /// \return Result is \p BitWidth and has used bits set to 1 and
12821 /// not used bits set to 0.
12822 APInt getUsedBits() const {
12823 // Reproduce the trunc(lshr) sequence:
12824 // - Start from the truncated value.
12825 // - Zero extend to the desired bit width.
12827 assert(Origin && "No original load to compare against.");
12828 unsigned BitWidth = Origin->getValueSizeInBits(0);
12829 assert(Inst && "This slice is not bound to an instruction");
12830 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
12831 "Extracted slice is bigger than the whole type!");
12832 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
12833 UsedBits.setAllBits();
12834 UsedBits = UsedBits.zext(BitWidth);
12835 UsedBits <<= Shift;
12839 /// Get the size of the slice to be loaded in bytes.
12840 unsigned getLoadedSize() const {
12841 unsigned SliceSize = getUsedBits().countPopulation();
12842 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
12843 return SliceSize / 8;
12846 /// Get the type that will be loaded for this slice.
12847 /// Note: This may not be the final type for the slice.
12848 EVT getLoadedType() const {
12849 assert(DAG && "Missing context");
12850 LLVMContext &Ctxt = *DAG->getContext();
12851 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
12854 /// Get the alignment of the load used for this slice.
12855 unsigned getAlignment() const {
12856 unsigned Alignment = Origin->getAlignment();
12857 unsigned Offset = getOffsetFromBase();
12859 Alignment = MinAlign(Alignment, Alignment + Offset);
12863 /// Check if this slice can be rewritten with legal operations.
12864 bool isLegal() const {
12865 // An invalid slice is not legal.
12866 if (!Origin || !Inst || !DAG)
12869 // Offsets are for indexed load only, we do not handle that.
12870 if (!Origin->getOffset().isUndef())
12873 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12875 // Check that the type is legal.
12876 EVT SliceType = getLoadedType();
12877 if (!TLI.isTypeLegal(SliceType))
12880 // Check that the load is legal for this type.
12881 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
12884 // Check that the offset can be computed.
12885 // 1. Check its type.
12886 EVT PtrType = Origin->getBasePtr().getValueType();
12887 if (PtrType == MVT::Untyped || PtrType.isExtended())
12890 // 2. Check that it fits in the immediate.
12891 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
12894 // 3. Check that the computation is legal.
12895 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
12898 // Check that the zext is legal if it needs one.
12899 EVT TruncateType = Inst->getValueType(0);
12900 if (TruncateType != SliceType &&
12901 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
12907 /// Get the offset in bytes of this slice in the original chunk of
12909 /// \pre DAG != nullptr.
12910 uint64_t getOffsetFromBase() const {
12911 assert(DAG && "Missing context.");
12912 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
12913 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
12914 uint64_t Offset = Shift / 8;
12915 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
12916 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
12917 "The size of the original loaded type is not a multiple of a"
12919 // If Offset is bigger than TySizeInBytes, it means we are loading all
12920 // zeros. This should have been optimized before in the process.
12921 assert(TySizeInBytes > Offset &&
12922 "Invalid shift amount for given loaded size");
12924 Offset = TySizeInBytes - Offset - getLoadedSize();
12928 /// Generate the sequence of instructions to load the slice
12929 /// represented by this object and redirect the uses of this slice to
12930 /// this new sequence of instructions.
12931 /// \pre this->Inst && this->Origin are valid Instructions and this
12932 /// object passed the legal check: LoadedSlice::isLegal returned true.
12933 /// \return The last instruction of the sequence used to load the slice.
12934 SDValue loadSlice() const {
12935 assert(Inst && Origin && "Unable to replace a non-existing slice.");
12936 const SDValue &OldBaseAddr = Origin->getBasePtr();
12937 SDValue BaseAddr = OldBaseAddr;
12938 // Get the offset in that chunk of bytes w.r.t. the endianness.
12939 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
12940 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
12942 // BaseAddr = BaseAddr + Offset.
12943 EVT ArithType = BaseAddr.getValueType();
12945 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
12946 DAG->getConstant(Offset, DL, ArithType));
12949 // Create the type of the loaded slice according to its size.
12950 EVT SliceType = getLoadedType();
12952 // Create the load for the slice.
12954 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
12955 Origin->getPointerInfo().getWithOffset(Offset),
12956 getAlignment(), Origin->getMemOperand()->getFlags());
12957 // If the final type is not the same as the loaded type, this means that
12958 // we have to pad with zero. Create a zero extend for that.
12959 EVT FinalType = Inst->getValueType(0);
12960 if (SliceType != FinalType)
12962 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
12966 /// Check if this slice can be merged with an expensive cross register
12967 /// bank copy. E.g.,
12969 /// f = bitcast i32 i to float
12970 bool canMergeExpensiveCrossRegisterBankCopy() const {
12971 if (!Inst || !Inst->hasOneUse())
12973 SDNode *Use = *Inst->use_begin();
12974 if (Use->getOpcode() != ISD::BITCAST)
12976 assert(DAG && "Missing context");
12977 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
12978 EVT ResVT = Use->getValueType(0);
12979 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
12980 const TargetRegisterClass *ArgRC =
12981 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
12982 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
12985 // At this point, we know that we perform a cross-register-bank copy.
12986 // Check if it is expensive.
12987 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
12988 // Assume bitcasts are cheap, unless both register classes do not
12989 // explicitly share a common sub class.
12990 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
12993 // Check if it will be merged with the load.
12994 // 1. Check the alignment constraint.
12995 unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
12996 ResVT.getTypeForEVT(*DAG->getContext()));
12998 if (RequiredAlignment > getAlignment())
13001 // 2. Check that the load is a legal operation for that type.
13002 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13005 // 3. Check that we do not have a zext in the way.
13006 if (Inst->getValueType(0) != getLoadedType())
13013 } // end anonymous namespace
13015 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13016 /// \p UsedBits looks like 0..0 1..1 0..0.
13017 static bool areUsedBitsDense(const APInt &UsedBits) {
13018 // If all the bits are one, this is dense!
13019 if (UsedBits.isAllOnesValue())
13022 // Get rid of the unused bits on the right.
13023 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13024 // Get rid of the unused bits on the left.
13025 if (NarrowedUsedBits.countLeadingZeros())
13026 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13027 // Check that the chunk of bits is completely used.
13028 return NarrowedUsedBits.isAllOnesValue();
13031 /// Check whether or not \p First and \p Second are next to each other
13032 /// in memory. This means that there is no hole between the bits loaded
13033 /// by \p First and the bits loaded by \p Second.
13034 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13035 const LoadedSlice &Second) {
13036 assert(First.Origin == Second.Origin && First.Origin &&
13037 "Unable to match different memory origins.");
13038 APInt UsedBits = First.getUsedBits();
13039 assert((UsedBits & Second.getUsedBits()) == 0 &&
13040 "Slices are not supposed to overlap.");
13041 UsedBits |= Second.getUsedBits();
13042 return areUsedBitsDense(UsedBits);
13045 /// Adjust the \p GlobalLSCost according to the target
13046 /// paring capabilities and the layout of the slices.
13047 /// \pre \p GlobalLSCost should account for at least as many loads as
13048 /// there is in the slices in \p LoadedSlices.
13049 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13050 LoadedSlice::Cost &GlobalLSCost) {
13051 unsigned NumberOfSlices = LoadedSlices.size();
13052 // If there is less than 2 elements, no pairing is possible.
13053 if (NumberOfSlices < 2)
13056 // Sort the slices so that elements that are likely to be next to each
13057 // other in memory are next to each other in the list.
13058 llvm::sort(LoadedSlices.begin(), LoadedSlices.end(),
13059 [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13060 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13061 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13063 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13064 // First (resp. Second) is the first (resp. Second) potentially candidate
13065 // to be placed in a paired load.
13066 const LoadedSlice *First = nullptr;
13067 const LoadedSlice *Second = nullptr;
13068 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13069 // Set the beginning of the pair.
13071 Second = &LoadedSlices[CurrSlice];
13073 // If First is NULL, it means we start a new pair.
13074 // Get to the next slice.
13078 EVT LoadedType = First->getLoadedType();
13080 // If the types of the slices are different, we cannot pair them.
13081 if (LoadedType != Second->getLoadedType())
13084 // Check if the target supplies paired loads for this type.
13085 unsigned RequiredAlignment = 0;
13086 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13087 // move to the next pair, this type is hopeless.
13091 // Check if we meet the alignment requirement.
13092 if (RequiredAlignment > First->getAlignment())
13095 // Check that both loads are next to each other in memory.
13096 if (!areSlicesNextToEachOther(*First, *Second))
13099 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13100 --GlobalLSCost.Loads;
13101 // Move to the next pair.
13106 /// Check the profitability of all involved LoadedSlice.
13107 /// Currently, it is considered profitable if there is exactly two
13108 /// involved slices (1) which are (2) next to each other in memory, and
13109 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13111 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13112 /// the elements themselves.
13114 /// FIXME: When the cost model will be mature enough, we can relax
13115 /// constraints (1) and (2).
13116 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13117 const APInt &UsedBits, bool ForCodeSize) {
13118 unsigned NumberOfSlices = LoadedSlices.size();
13119 if (StressLoadSlicing)
13120 return NumberOfSlices > 1;
13123 if (NumberOfSlices != 2)
13127 if (!areUsedBitsDense(UsedBits))
13131 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13132 // The original code has one big load.
13133 OrigCost.Loads = 1;
13134 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13135 const LoadedSlice &LS = LoadedSlices[CurrSlice];
13136 // Accumulate the cost of all the slices.
13137 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13138 GlobalSlicingCost += SliceCost;
13140 // Account as cost in the original configuration the gain obtained
13141 // with the current slices.
13142 OrigCost.addSliceGain(LS);
13145 // If the target supports paired load, adjust the cost accordingly.
13146 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13147 return OrigCost > GlobalSlicingCost;
13150 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13151 /// operations, split it in the various pieces being extracted.
13153 /// This sort of thing is introduced by SROA.
13154 /// This slicing takes care not to insert overlapping loads.
13155 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13156 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13157 if (Level < AfterLegalizeDAG)
13160 LoadSDNode *LD = cast<LoadSDNode>(N);
13161 if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13162 !LD->getValueType(0).isInteger())
13165 // Keep track of already used bits to detect overlapping values.
13166 // In that case, we will just abort the transformation.
13167 APInt UsedBits(LD->getValueSizeInBits(0), 0);
13169 SmallVector<LoadedSlice, 4> LoadedSlices;
13171 // Check if this load is used as several smaller chunks of bits.
13172 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13173 // of computation for each trunc.
13174 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13175 UI != UIEnd; ++UI) {
13176 // Skip the uses of the chain.
13177 if (UI.getUse().getResNo() != 0)
13180 SDNode *User = *UI;
13181 unsigned Shift = 0;
13183 // Check if this is a trunc(lshr).
13184 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13185 isa<ConstantSDNode>(User->getOperand(1))) {
13186 Shift = User->getConstantOperandVal(1);
13187 User = *User->use_begin();
13190 // At this point, User is a Truncate, iff we encountered, trunc or
13192 if (User->getOpcode() != ISD::TRUNCATE)
13195 // The width of the type must be a power of 2 and greater than 8-bits.
13196 // Otherwise the load cannot be represented in LLVM IR.
13197 // Moreover, if we shifted with a non-8-bits multiple, the slice
13198 // will be across several bytes. We do not support that.
13199 unsigned Width = User->getValueSizeInBits(0);
13200 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13203 // Build the slice for this chain of computations.
13204 LoadedSlice LS(User, LD, Shift, &DAG);
13205 APInt CurrentUsedBits = LS.getUsedBits();
13207 // Check if this slice overlaps with another.
13208 if ((CurrentUsedBits & UsedBits) != 0)
13210 // Update the bits used globally.
13211 UsedBits |= CurrentUsedBits;
13213 // Check if the new slice would be legal.
13217 // Record the slice.
13218 LoadedSlices.push_back(LS);
13221 // Abort slicing if it does not seem to be profitable.
13222 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13227 // Rewrite each chain to use an independent load.
13228 // By construction, each chain can be represented by a unique load.
13230 // Prepare the argument for the new token factor for all the slices.
13231 SmallVector<SDValue, 8> ArgChains;
13232 for (SmallVectorImpl<LoadedSlice>::const_iterator
13233 LSIt = LoadedSlices.begin(),
13234 LSItEnd = LoadedSlices.end();
13235 LSIt != LSItEnd; ++LSIt) {
13236 SDValue SliceInst = LSIt->loadSlice();
13237 CombineTo(LSIt->Inst, SliceInst, true);
13238 if (SliceInst.getOpcode() != ISD::LOAD)
13239 SliceInst = SliceInst.getOperand(0);
13240 assert(SliceInst->getOpcode() == ISD::LOAD &&
13241 "It takes more than a zext to get to the loaded slice!!");
13242 ArgChains.push_back(SliceInst.getValue(1));
13245 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13247 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13248 AddToWorklist(Chain.getNode());
13252 /// Check to see if V is (and load (ptr), imm), where the load is having
13253 /// specific bytes cleared out. If so, return the byte size being masked out
13254 /// and the shift amount.
13255 static std::pair<unsigned, unsigned>
13256 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13257 std::pair<unsigned, unsigned> Result(0, 0);
13259 // Check for the structure we're looking for.
13260 if (V->getOpcode() != ISD::AND ||
13261 !isa<ConstantSDNode>(V->getOperand(1)) ||
13262 !ISD::isNormalLoad(V->getOperand(0).getNode()))
13265 // Check the chain and pointer.
13266 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13267 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
13269 // This only handles simple types.
13270 if (V.getValueType() != MVT::i16 &&
13271 V.getValueType() != MVT::i32 &&
13272 V.getValueType() != MVT::i64)
13275 // Check the constant mask. Invert it so that the bits being masked out are
13276 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
13277 // follow the sign bit for uniformity.
13278 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13279 unsigned NotMaskLZ = countLeadingZeros(NotMask);
13280 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
13281 unsigned NotMaskTZ = countTrailingZeros(NotMask);
13282 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
13283 if (NotMaskLZ == 64) return Result; // All zero mask.
13285 // See if we have a continuous run of bits. If so, we have 0*1+0*
13286 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13289 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13290 if (V.getValueType() != MVT::i64 && NotMaskLZ)
13291 NotMaskLZ -= 64-V.getValueSizeInBits();
13293 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
13294 switch (MaskedBytes) {
13298 default: return Result; // All one mask, or 5-byte mask.
13301 // Verify that the first bit starts at a multiple of mask so that the access
13302 // is aligned the same as the access width.
13303 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
13305 // For narrowing to be valid, it must be the case that the load the
13306 // immediately preceeding memory operation before the store.
13307 if (LD == Chain.getNode())
13309 else if (Chain->getOpcode() == ISD::TokenFactor &&
13310 SDValue(LD, 1).hasOneUse()) {
13311 // LD has only 1 chain use so they are no indirect dependencies.
13313 for (const SDValue &ChainOp : Chain->op_values())
13314 if (ChainOp.getNode() == LD) {
13321 return Result; // Fail.
13323 Result.first = MaskedBytes;
13324 Result.second = NotMaskTZ/8;
13328 /// Check to see if IVal is something that provides a value as specified by
13329 /// MaskInfo. If so, replace the specified store with a narrower store of
13330 /// truncated IVal.
13332 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
13333 SDValue IVal, StoreSDNode *St,
13335 unsigned NumBytes = MaskInfo.first;
13336 unsigned ByteShift = MaskInfo.second;
13337 SelectionDAG &DAG = DC->getDAG();
13339 // Check to see if IVal is all zeros in the part being masked in by the 'or'
13340 // that uses this. If not, this is not a replacement.
13341 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
13342 ByteShift*8, (ByteShift+NumBytes)*8);
13343 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
13345 // Check that it is legal on the target to do this. It is legal if the new
13346 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
13348 MVT VT = MVT::getIntegerVT(NumBytes*8);
13349 if (!DC->isTypeLegal(VT))
13352 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
13353 // shifted by ByteShift and truncated down to NumBytes.
13356 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
13357 DAG.getConstant(ByteShift*8, DL,
13358 DC->getShiftAmountTy(IVal.getValueType())));
13361 // Figure out the offset for the store and the alignment of the access.
13363 unsigned NewAlign = St->getAlignment();
13365 if (DAG.getDataLayout().isLittleEndian())
13366 StOffset = ByteShift;
13368 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
13370 SDValue Ptr = St->getBasePtr();
13373 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
13374 Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
13375 NewAlign = MinAlign(NewAlign, StOffset);
13378 // Truncate down to the new size.
13379 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
13383 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
13384 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
13388 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
13389 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
13390 /// narrowing the load and store if it would end up being a win for performance
13392 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
13393 StoreSDNode *ST = cast<StoreSDNode>(N);
13394 if (ST->isVolatile())
13397 SDValue Chain = ST->getChain();
13398 SDValue Value = ST->getValue();
13399 SDValue Ptr = ST->getBasePtr();
13400 EVT VT = Value.getValueType();
13402 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
13405 unsigned Opc = Value.getOpcode();
13407 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
13408 // is a byte mask indicating a consecutive number of bytes, check to see if
13409 // Y is known to provide just those bytes. If so, we try to replace the
13410 // load + replace + store sequence with a single (narrower) store, which makes
13412 if (Opc == ISD::OR) {
13413 std::pair<unsigned, unsigned> MaskedLoad;
13414 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
13415 if (MaskedLoad.first)
13416 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13417 Value.getOperand(1), ST,this))
13418 return SDValue(NewST, 0);
13420 // Or is commutative, so try swapping X and Y.
13421 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
13422 if (MaskedLoad.first)
13423 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
13424 Value.getOperand(0), ST,this))
13425 return SDValue(NewST, 0);
13428 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
13429 Value.getOperand(1).getOpcode() != ISD::Constant)
13432 SDValue N0 = Value.getOperand(0);
13433 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13434 Chain == SDValue(N0.getNode(), 1)) {
13435 LoadSDNode *LD = cast<LoadSDNode>(N0);
13436 if (LD->getBasePtr() != Ptr ||
13437 LD->getPointerInfo().getAddrSpace() !=
13438 ST->getPointerInfo().getAddrSpace())
13441 // Find the type to narrow it the load / op / store to.
13442 SDValue N1 = Value.getOperand(1);
13443 unsigned BitWidth = N1.getValueSizeInBits();
13444 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
13445 if (Opc == ISD::AND)
13446 Imm ^= APInt::getAllOnesValue(BitWidth);
13447 if (Imm == 0 || Imm.isAllOnesValue())
13449 unsigned ShAmt = Imm.countTrailingZeros();
13450 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
13451 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
13452 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13453 // The narrowing should be profitable, the load/store operation should be
13454 // legal (or custom) and the store size should be equal to the NewVT width.
13455 while (NewBW < BitWidth &&
13456 (NewVT.getStoreSizeInBits() != NewBW ||
13457 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
13458 !TLI.isNarrowingProfitable(VT, NewVT))) {
13459 NewBW = NextPowerOf2(NewBW);
13460 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
13462 if (NewBW >= BitWidth)
13465 // If the lsb changed does not start at the type bitwidth boundary,
13466 // start at the previous one.
13468 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
13469 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
13470 std::min(BitWidth, ShAmt + NewBW));
13471 if ((Imm & Mask) == Imm) {
13472 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
13473 if (Opc == ISD::AND)
13474 NewImm ^= APInt::getAllOnesValue(NewBW);
13475 uint64_t PtrOff = ShAmt / 8;
13476 // For big endian targets, we need to adjust the offset to the pointer to
13477 // load the correct bytes.
13478 if (DAG.getDataLayout().isBigEndian())
13479 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
13481 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
13482 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
13483 if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
13486 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
13487 Ptr.getValueType(), Ptr,
13488 DAG.getConstant(PtrOff, SDLoc(LD),
13489 Ptr.getValueType()));
13491 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
13492 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
13493 LD->getMemOperand()->getFlags(), LD->getAAInfo());
13494 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
13495 DAG.getConstant(NewImm, SDLoc(Value),
13498 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
13499 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
13501 AddToWorklist(NewPtr.getNode());
13502 AddToWorklist(NewLD.getNode());
13503 AddToWorklist(NewVal.getNode());
13504 WorklistRemover DeadNodes(*this);
13505 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
13514 /// For a given floating point load / store pair, if the load value isn't used
13515 /// by any other operations, then consider transforming the pair to integer
13516 /// load / store operations if the target deems the transformation profitable.
13517 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
13518 StoreSDNode *ST = cast<StoreSDNode>(N);
13519 SDValue Chain = ST->getChain();
13520 SDValue Value = ST->getValue();
13521 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
13522 Value.hasOneUse() &&
13523 Chain == SDValue(Value.getNode(), 1)) {
13524 LoadSDNode *LD = cast<LoadSDNode>(Value);
13525 EVT VT = LD->getMemoryVT();
13526 if (!VT.isFloatingPoint() ||
13527 VT != ST->getMemoryVT() ||
13528 LD->isNonTemporal() ||
13529 ST->isNonTemporal() ||
13530 LD->getPointerInfo().getAddrSpace() != 0 ||
13531 ST->getPointerInfo().getAddrSpace() != 0)
13534 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
13535 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
13536 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
13537 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
13538 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
13541 unsigned LDAlign = LD->getAlignment();
13542 unsigned STAlign = ST->getAlignment();
13543 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
13544 unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
13545 if (LDAlign < ABIAlign || STAlign < ABIAlign)
13549 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
13550 LD->getPointerInfo(), LDAlign);
13553 DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
13554 ST->getPointerInfo(), STAlign);
13556 AddToWorklist(NewLD.getNode());
13557 AddToWorklist(NewST.getNode());
13558 WorklistRemover DeadNodes(*this);
13559 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
13567 // This is a helper function for visitMUL to check the profitability
13568 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
13569 // MulNode is the original multiply, AddNode is (add x, c1),
13570 // and ConstNode is c2.
13572 // If the (add x, c1) has multiple uses, we could increase
13573 // the number of adds if we make this transformation.
13574 // It would only be worth doing this if we can remove a
13575 // multiply in the process. Check for that here.
13579 // We're checking for cases where we have common "c3 * A" expressions.
13580 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
13582 SDValue &ConstNode) {
13585 // If the add only has one use, this would be OK to do.
13586 if (AddNode.getNode()->hasOneUse())
13589 // Walk all the users of the constant with which we're multiplying.
13590 for (SDNode *Use : ConstNode->uses()) {
13591 if (Use == MulNode) // This use is the one we're on right now. Skip it.
13594 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
13596 SDNode *MulVar = AddNode.getOperand(0).getNode();
13598 // OtherOp is what we're multiplying against the constant.
13599 if (Use->getOperand(0) == ConstNode)
13600 OtherOp = Use->getOperand(1).getNode();
13602 OtherOp = Use->getOperand(0).getNode();
13604 // Check to see if multiply is with the same operand of our "add".
13606 // ConstNode = CONST
13607 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
13609 // AddNode = (A + c1) <-- MulVar is A.
13610 // = AddNode * ConstNode <-- current visiting instruction.
13612 // If we make this transformation, we will have a common
13613 // multiply (ConstNode * A) that we can save.
13614 if (OtherOp == MulVar)
13617 // Now check to see if a future expansion will give us a common
13620 // ConstNode = CONST
13621 // AddNode = (A + c1)
13622 // ... = AddNode * ConstNode <-- current visiting instruction.
13624 // OtherOp = (A + c2)
13625 // Use = OtherOp * ConstNode <-- visiting Use.
13627 // If we make this transformation, we will have a common
13628 // multiply (CONST * A) after we also do the same transformation
13629 // to the "t2" instruction.
13630 if (OtherOp->getOpcode() == ISD::ADD &&
13631 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
13632 OtherOp->getOperand(0).getNode() == MulVar)
13637 // Didn't find a case where this would be profitable.
13641 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
13642 unsigned NumStores) {
13643 SmallVector<SDValue, 8> Chains;
13644 SmallPtrSet<const SDNode *, 8> Visited;
13645 SDLoc StoreDL(StoreNodes[0].MemNode);
13647 for (unsigned i = 0; i < NumStores; ++i) {
13648 Visited.insert(StoreNodes[i].MemNode);
13651 // don't include nodes that are children
13652 for (unsigned i = 0; i < NumStores; ++i) {
13653 if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
13654 Chains.push_back(StoreNodes[i].MemNode->getChain());
13657 assert(Chains.size() > 0 && "Chain should have generated a chain");
13658 return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
13661 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
13662 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
13663 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
13664 // Make sure we have something to merge.
13668 // The latest Node in the DAG.
13669 SDLoc DL(StoreNodes[0].MemNode);
13671 int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
13672 unsigned SizeInBits = NumStores * ElementSizeBits;
13673 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
13677 unsigned Elts = NumStores * NumMemElts;
13678 // Get the type for the merged vector store.
13679 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
13681 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
13685 if (IsConstantSrc) {
13686 SmallVector<SDValue, 8> BuildVector;
13687 for (unsigned I = 0; I != NumStores; ++I) {
13688 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
13689 SDValue Val = St->getValue();
13690 // If constant is of the wrong type, convert it now.
13691 if (MemVT != Val.getValueType()) {
13692 Val = peekThroughBitcast(Val);
13693 // Deal with constants of wrong size.
13694 if (ElementSizeBits != Val.getValueSizeInBits()) {
13696 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
13697 if (isa<ConstantFPSDNode>(Val)) {
13698 // Not clear how to truncate FP values.
13700 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
13701 Val = DAG.getConstant(C->getAPIntValue()
13702 .zextOrTrunc(Val.getValueSizeInBits())
13703 .zextOrTrunc(ElementSizeBits),
13704 SDLoc(C), IntMemVT);
13706 // Make sure correctly size type is the correct type.
13707 Val = DAG.getBitcast(MemVT, Val);
13709 BuildVector.push_back(Val);
13711 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13712 : ISD::BUILD_VECTOR,
13713 DL, StoreTy, BuildVector);
13715 SmallVector<SDValue, 8> Ops;
13716 for (unsigned i = 0; i < NumStores; ++i) {
13717 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
13718 SDValue Val = peekThroughBitcast(St->getValue());
13719 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
13720 // type MemVT. If the underlying value is not the correct
13721 // type, but it is an extraction of an appropriate vector we
13722 // can recast Val to be of the correct type. This may require
13723 // converting between EXTRACT_VECTOR_ELT and
13724 // EXTRACT_SUBVECTOR.
13725 if ((MemVT != Val.getValueType()) &&
13726 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13727 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
13728 SDValue Vec = Val.getOperand(0);
13729 EVT MemVTScalarTy = MemVT.getScalarType();
13730 // We may need to add a bitcast here to get types to line up.
13731 if (MemVTScalarTy != Vec.getValueType()) {
13732 unsigned Elts = Vec.getValueType().getSizeInBits() /
13733 MemVTScalarTy.getSizeInBits();
13735 EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
13736 Vec = DAG.getBitcast(NewVecTy, Vec);
13738 auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
13739 : ISD::EXTRACT_VECTOR_ELT;
13740 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
13742 Ops.push_back(Val);
13745 // Build the extracted vector elements back into a vector.
13746 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
13747 : ISD::BUILD_VECTOR,
13751 // We should always use a vector store when merging extracted vector
13752 // elements, so this path implies a store of constants.
13753 assert(IsConstantSrc && "Merged vector elements should use vector store");
13755 APInt StoreInt(SizeInBits, 0);
13757 // Construct a single integer constant which is made of the smaller
13758 // constant inputs.
13759 bool IsLE = DAG.getDataLayout().isLittleEndian();
13760 for (unsigned i = 0; i < NumStores; ++i) {
13761 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
13762 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
13764 SDValue Val = St->getValue();
13765 Val = peekThroughBitcast(Val);
13766 StoreInt <<= ElementSizeBits;
13767 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
13768 StoreInt |= C->getAPIntValue()
13769 .zextOrTrunc(ElementSizeBits)
13770 .zextOrTrunc(SizeInBits);
13771 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
13772 StoreInt |= C->getValueAPF()
13774 .zextOrTrunc(ElementSizeBits)
13775 .zextOrTrunc(SizeInBits);
13776 // If fp truncation is necessary give up for now.
13777 if (MemVT.getSizeInBits() != ElementSizeBits)
13780 llvm_unreachable("Invalid constant element type");
13784 // Create the new Load and Store operations.
13785 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
13788 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
13789 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
13791 // make sure we use trunc store if it's necessary to be legal.
13794 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
13795 FirstInChain->getPointerInfo(),
13796 FirstInChain->getAlignment());
13797 } else { // Must be realized as a trunc store
13798 EVT LegalizedStoredValTy =
13799 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
13800 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
13801 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
13802 SDValue ExtendedStoreVal =
13803 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
13804 LegalizedStoredValTy);
13805 NewStore = DAG.getTruncStore(
13806 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
13807 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
13808 FirstInChain->getAlignment(),
13809 FirstInChain->getMemOperand()->getFlags());
13812 // Replace all merged stores with the new store.
13813 for (unsigned i = 0; i < NumStores; ++i)
13814 CombineTo(StoreNodes[i].MemNode, NewStore);
13816 AddToWorklist(NewChain.getNode());
13820 void DAGCombiner::getStoreMergeCandidates(
13821 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
13822 SDNode *&RootNode) {
13823 // This holds the base pointer, index, and the offset in bytes from the base
13825 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
13826 EVT MemVT = St->getMemoryVT();
13828 SDValue Val = peekThroughBitcast(St->getValue());
13829 // We must have a base and an offset.
13830 if (!BasePtr.getBase().getNode())
13833 // Do not handle stores to undef base pointers.
13834 if (BasePtr.getBase().isUndef())
13837 bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
13838 bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
13839 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
13840 bool IsLoadSrc = isa<LoadSDNode>(Val);
13841 BaseIndexOffset LBasePtr;
13842 // Match on loadbaseptr if relevant.
13845 auto *Ld = cast<LoadSDNode>(Val);
13846 LBasePtr = BaseIndexOffset::match(Ld, DAG);
13847 LoadVT = Ld->getMemoryVT();
13848 // Load and store should be the same type.
13849 if (MemVT != LoadVT)
13851 // Loads must only have one use.
13852 if (!Ld->hasNUsesOfValue(1, 0))
13854 // The memory operands must not be volatile.
13855 if (Ld->isVolatile() || Ld->isIndexed())
13858 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
13859 int64_t &Offset) -> bool {
13860 if (Other->isVolatile() || Other->isIndexed())
13862 SDValue Val = peekThroughBitcast(Other->getValue());
13863 // Allow merging constants of different types as integers.
13864 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
13865 : Other->getMemoryVT() != MemVT;
13869 // The Load's Base Ptr must also match
13870 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
13871 auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
13872 if (LoadVT != OtherLd->getMemoryVT())
13874 // Loads must only have one use.
13875 if (!OtherLd->hasNUsesOfValue(1, 0))
13877 // The memory operands must not be volatile.
13878 if (OtherLd->isVolatile() || OtherLd->isIndexed())
13880 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
13885 if (IsConstantSrc) {
13888 if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
13891 if (IsExtractVecSrc) {
13892 // Do not merge truncated stores here.
13893 if (Other->isTruncatingStore())
13895 if (!MemVT.bitsEq(Val.getValueType()))
13897 if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
13898 Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13901 Ptr = BaseIndexOffset::match(Other, DAG);
13902 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
13905 // We looking for a root node which is an ancestor to all mergable
13906 // stores. We search up through a load, to our root and then down
13907 // through all children. For instance we will find Store{1,2,3} if
13908 // St is Store1, Store2. or Store3 where the root is not a load
13909 // which always true for nonvolatile ops. TODO: Expand
13910 // the search to find all valid candidates through multiple layers of loads.
13913 // |-------|-------|
13914 // Load Load Store3
13918 // FIXME: We should be able to climb and
13919 // descend TokenFactors to find candidates as well.
13921 RootNode = St->getChain().getNode();
13923 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
13924 RootNode = Ldn->getChain().getNode();
13925 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13926 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
13927 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
13928 if (I2.getOperandNo() == 0)
13929 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
13930 BaseIndexOffset Ptr;
13932 if (CandidateMatch(OtherST, Ptr, PtrDiff))
13933 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13936 for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
13937 if (I.getOperandNo() == 0)
13938 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
13939 BaseIndexOffset Ptr;
13941 if (CandidateMatch(OtherST, Ptr, PtrDiff))
13942 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
13946 // We need to check that merging these stores does not cause a loop in
13947 // the DAG. Any store candidate may depend on another candidate
13948 // indirectly through its operand (we already consider dependencies
13949 // through the chain). Check in parallel by searching up from
13950 // non-chain operands of candidates.
13951 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
13952 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
13953 SDNode *RootNode) {
13954 // FIXME: We should be able to truncate a full search of
13955 // predecessors by doing a BFS and keeping tabs the originating
13956 // stores from which worklist nodes come from in a similar way to
13957 // TokenFactor simplfication.
13959 SmallPtrSet<const SDNode *, 32> Visited;
13960 SmallVector<const SDNode *, 8> Worklist;
13962 // RootNode is a predecessor to all candidates so we need not search
13963 // past it. Add RootNode (peeking through TokenFactors). Do not count
13964 // these towards size check.
13966 Worklist.push_back(RootNode);
13967 while (!Worklist.empty()) {
13968 auto N = Worklist.pop_back_val();
13969 if (N->getOpcode() == ISD::TokenFactor) {
13970 for (SDValue Op : N->ops())
13971 Worklist.push_back(Op.getNode());
13976 // Don't count pruning nodes towards max.
13977 unsigned int Max = 1024 + Visited.size();
13978 // Search Ops of store candidates.
13979 for (unsigned i = 0; i < NumStores; ++i) {
13980 SDNode *N = StoreNodes[i].MemNode;
13981 // Of the 4 Store Operands:
13982 // * Chain (Op 0) -> We have already considered these
13983 // in candidate selection and can be
13985 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
13986 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant
13987 // and so no cycles are possible.
13988 // * (Op 3) -> appears to always be undef. Cannot be source of cycle.
13990 // Thus we need only check predecessors of the value operands.
13991 auto *Op = N->getOperand(1).getNode();
13992 if (Visited.insert(Op).second)
13993 Worklist.push_back(Op);
13995 // Search through DAG. We can stop early if we find a store node.
13996 for (unsigned i = 0; i < NumStores; ++i)
13997 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14003 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14004 if (OptLevel == CodeGenOpt::None)
14007 EVT MemVT = St->getMemoryVT();
14008 int64_t ElementSizeBytes = MemVT.getStoreSize();
14009 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14011 if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14014 bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14015 Attribute::NoImplicitFloat);
14017 // This function cannot currently deal with non-byte-sized memory sizes.
14018 if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14021 if (!MemVT.isSimple())
14024 // Perform an early exit check. Do not bother looking at stored values that
14025 // are not constants, loads, or extracted vector elements.
14026 SDValue StoredVal = peekThroughBitcast(St->getValue());
14027 bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14028 bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14029 isa<ConstantFPSDNode>(StoredVal);
14030 bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14031 StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14033 if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14036 SmallVector<MemOpLink, 8> StoreNodes;
14038 // Find potential store merge candidates by searching through chain sub-DAG
14039 getStoreMergeCandidates(St, StoreNodes, RootNode);
14041 // Check if there is anything to merge.
14042 if (StoreNodes.size() < 2)
14045 // Sort the memory operands according to their distance from the
14047 llvm::sort(StoreNodes.begin(), StoreNodes.end(),
14048 [](MemOpLink LHS, MemOpLink RHS) {
14049 return LHS.OffsetFromBase < RHS.OffsetFromBase;
14052 // Store Merge attempts to merge the lowest stores. This generally
14053 // works out as if successful, as the remaining stores are checked
14054 // after the first collection of stores is merged. However, in the
14055 // case that a non-mergeable store is found first, e.g., {p[-2],
14056 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14057 // mergeable cases. To prevent this, we prune such stores from the
14058 // front of StoreNodes here.
14061 while (StoreNodes.size() > 1) {
14062 unsigned StartIdx = 0;
14063 while ((StartIdx + 1 < StoreNodes.size()) &&
14064 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14065 StoreNodes[StartIdx + 1].OffsetFromBase)
14068 // Bail if we don't have enough candidates to merge.
14069 if (StartIdx + 1 >= StoreNodes.size())
14073 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14075 // Scan the memory operations on the chain and find the first
14076 // non-consecutive store memory address.
14077 unsigned NumConsecutiveStores = 1;
14078 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14079 // Check that the addresses are consecutive starting from the second
14080 // element in the list of stores.
14081 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14082 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14083 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14085 NumConsecutiveStores = i + 1;
14088 if (NumConsecutiveStores < 2) {
14089 StoreNodes.erase(StoreNodes.begin(),
14090 StoreNodes.begin() + NumConsecutiveStores);
14094 // The node with the lowest store address.
14095 LLVMContext &Context = *DAG.getContext();
14096 const DataLayout &DL = DAG.getDataLayout();
14098 // Store the constants into memory as one consecutive store.
14099 if (IsConstantSrc) {
14100 while (NumConsecutiveStores >= 2) {
14101 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14102 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14103 unsigned FirstStoreAlign = FirstInChain->getAlignment();
14104 unsigned LastLegalType = 1;
14105 unsigned LastLegalVectorType = 1;
14106 bool LastIntegerTrunc = false;
14107 bool NonZero = false;
14108 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14109 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14110 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14111 SDValue StoredVal = ST->getValue();
14112 bool IsElementZero = false;
14113 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14114 IsElementZero = C->isNullValue();
14115 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14116 IsElementZero = C->getConstantFPValue()->isNullValue();
14117 if (IsElementZero) {
14118 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14119 FirstZeroAfterNonZero = i;
14121 NonZero |= !IsElementZero;
14123 // Find a legal type for the constant store.
14124 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14125 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14126 bool IsFast = false;
14128 // Break early when size is too large to be legal.
14129 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14132 if (TLI.isTypeLegal(StoreTy) &&
14133 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14134 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14135 FirstStoreAlign, &IsFast) &&
14137 LastIntegerTrunc = false;
14138 LastLegalType = i + 1;
14139 // Or check whether a truncstore is legal.
14140 } else if (TLI.getTypeAction(Context, StoreTy) ==
14141 TargetLowering::TypePromoteInteger) {
14142 EVT LegalizedStoredValTy =
14143 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14144 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14145 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14146 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14147 FirstStoreAlign, &IsFast) &&
14149 LastIntegerTrunc = true;
14150 LastLegalType = i + 1;
14154 // We only use vectors if the constant is known to be zero or the
14155 // target allows it and the function is not marked with the
14156 // noimplicitfloat attribute.
14158 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14160 // Find a legal type for the vector store.
14161 unsigned Elts = (i + 1) * NumMemElts;
14162 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14163 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14164 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14165 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14166 FirstStoreAlign, &IsFast) &&
14168 LastLegalVectorType = i + 1;
14172 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14173 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14175 // Check if we found a legal integer type that creates a meaningful
14178 // We know that candidate stores are in order and of correct
14179 // shape. While there is no mergeable sequence from the
14180 // beginning one may start later in the sequence. The only
14181 // reason a merge of size N could have failed where another of
14182 // the same size would not have, is if the alignment has
14183 // improved or we've dropped a non-zero value. Drop as many
14184 // candidates as we can here.
14185 unsigned NumSkip = 1;
14187 (NumSkip < NumConsecutiveStores) &&
14188 (NumSkip < FirstZeroAfterNonZero) &&
14189 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14192 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14193 NumConsecutiveStores -= NumSkip;
14197 // Check that we can merge these candidates without causing a cycle.
14198 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14200 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14201 NumConsecutiveStores -= NumElem;
14205 RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14206 UseVector, LastIntegerTrunc);
14208 // Remove merged stores for next iteration.
14209 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14210 NumConsecutiveStores -= NumElem;
14215 // When extracting multiple vector elements, try to store them
14216 // in one vector store rather than a sequence of scalar stores.
14217 if (IsExtractVecSrc) {
14218 // Loop on Consecutive Stores on success.
14219 while (NumConsecutiveStores >= 2) {
14220 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14221 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14222 unsigned FirstStoreAlign = FirstInChain->getAlignment();
14223 unsigned NumStoresToMerge = 1;
14224 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14225 // Find a legal type for the vector store.
14226 unsigned Elts = (i + 1) * NumMemElts;
14228 EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14231 // Break early when size is too large to be legal.
14232 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14235 if (TLI.isTypeLegal(Ty) &&
14236 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14237 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14238 FirstStoreAlign, &IsFast) &&
14240 NumStoresToMerge = i + 1;
14243 // Check if we found a legal integer type creating a meaningful
14245 if (NumStoresToMerge < 2) {
14246 // We know that candidate stores are in order and of correct
14247 // shape. While there is no mergeable sequence from the
14248 // beginning one may start later in the sequence. The only
14249 // reason a merge of size N could have failed where another of
14250 // the same size would not have, is if the alignment has
14251 // improved. Drop as many candidates as we can here.
14252 unsigned NumSkip = 1;
14254 (NumSkip < NumConsecutiveStores) &&
14255 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14258 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14259 NumConsecutiveStores -= NumSkip;
14263 // Check that we can merge these candidates without causing a cycle.
14264 if (!checkMergeStoreCandidatesForDependencies(
14265 StoreNodes, NumStoresToMerge, RootNode)) {
14266 StoreNodes.erase(StoreNodes.begin(),
14267 StoreNodes.begin() + NumStoresToMerge);
14268 NumConsecutiveStores -= NumStoresToMerge;
14272 RV |= MergeStoresOfConstantsOrVecElts(
14273 StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14275 StoreNodes.erase(StoreNodes.begin(),
14276 StoreNodes.begin() + NumStoresToMerge);
14277 NumConsecutiveStores -= NumStoresToMerge;
14282 // Below we handle the case of multiple consecutive stores that
14283 // come from multiple consecutive loads. We merge them into a single
14284 // wide load and a single wide store.
14286 // Look for load nodes which are used by the stored values.
14287 SmallVector<MemOpLink, 8> LoadNodes;
14289 // Find acceptable loads. Loads need to have the same chain (token factor),
14290 // must not be zext, volatile, indexed, and they must be consecutive.
14291 BaseIndexOffset LdBasePtr;
14293 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14294 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14295 SDValue Val = peekThroughBitcast(St->getValue());
14296 LoadSDNode *Ld = cast<LoadSDNode>(Val);
14298 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
14299 // If this is not the first ptr that we check.
14300 int64_t LdOffset = 0;
14301 if (LdBasePtr.getBase().getNode()) {
14302 // The base ptr must be the same.
14303 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
14306 // Check that all other base pointers are the same as this one.
14310 // We found a potential memory operand to merge.
14311 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
14314 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
14315 // If we have load/store pair instructions and we only have two values,
14316 // don't bother merging.
14317 unsigned RequiredAlignment;
14318 if (LoadNodes.size() == 2 &&
14319 TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
14320 StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
14321 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
14322 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
14325 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14326 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14327 unsigned FirstStoreAlign = FirstInChain->getAlignment();
14328 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
14329 unsigned FirstLoadAS = FirstLoad->getAddressSpace();
14330 unsigned FirstLoadAlign = FirstLoad->getAlignment();
14332 // Scan the memory operations on the chain and find the first
14333 // non-consecutive load memory address. These variables hold the index in
14334 // the store node array.
14336 unsigned LastConsecutiveLoad = 1;
14338 // This variable refers to the size and not index in the array.
14339 unsigned LastLegalVectorType = 1;
14340 unsigned LastLegalIntegerType = 1;
14341 bool isDereferenceable = true;
14342 bool DoIntegerTruncate = false;
14343 StartAddress = LoadNodes[0].OffsetFromBase;
14344 SDValue FirstChain = FirstLoad->getChain();
14345 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
14346 // All loads must share the same chain.
14347 if (LoadNodes[i].MemNode->getChain() != FirstChain)
14350 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
14351 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14353 LastConsecutiveLoad = i;
14355 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
14356 isDereferenceable = false;
14358 // Find a legal type for the vector store.
14359 unsigned Elts = (i + 1) * NumMemElts;
14360 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14362 // Break early when size is too large to be legal.
14363 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14366 bool IsFastSt, IsFastLd;
14367 if (TLI.isTypeLegal(StoreTy) &&
14368 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14369 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14370 FirstStoreAlign, &IsFastSt) &&
14372 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14373 FirstLoadAlign, &IsFastLd) &&
14375 LastLegalVectorType = i + 1;
14378 // Find a legal type for the integer store.
14379 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14380 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14381 if (TLI.isTypeLegal(StoreTy) &&
14382 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14383 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14384 FirstStoreAlign, &IsFastSt) &&
14386 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14387 FirstLoadAlign, &IsFastLd) &&
14389 LastLegalIntegerType = i + 1;
14390 DoIntegerTruncate = false;
14391 // Or check whether a truncstore and extload is legal.
14392 } else if (TLI.getTypeAction(Context, StoreTy) ==
14393 TargetLowering::TypePromoteInteger) {
14394 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
14395 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14396 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14397 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
14399 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
14401 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
14402 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14403 FirstStoreAlign, &IsFastSt) &&
14405 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
14406 FirstLoadAlign, &IsFastLd) &&
14408 LastLegalIntegerType = i + 1;
14409 DoIntegerTruncate = true;
14414 // Only use vector types if the vector type is larger than the integer
14415 // type. If they are the same, use integers.
14417 LastLegalVectorType > LastLegalIntegerType && !NoVectors;
14418 unsigned LastLegalType =
14419 std::max(LastLegalVectorType, LastLegalIntegerType);
14421 // We add +1 here because the LastXXX variables refer to location while
14422 // the NumElem refers to array/index size.
14424 std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
14425 NumElem = std::min(LastLegalType, NumElem);
14428 // We know that candidate stores are in order and of correct
14429 // shape. While there is no mergeable sequence from the
14430 // beginning one may start later in the sequence. The only
14431 // reason a merge of size N could have failed where another of
14432 // the same size would not have is if the alignment or either
14433 // the load or store has improved. Drop as many candidates as we
14435 unsigned NumSkip = 1;
14436 while ((NumSkip < LoadNodes.size()) &&
14437 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
14438 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14440 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14441 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
14442 NumConsecutiveStores -= NumSkip;
14446 // Check that we can merge these candidates without causing a cycle.
14447 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14449 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14450 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14451 NumConsecutiveStores -= NumElem;
14455 // Find if it is better to use vectors or integers to load and store
14459 // Find a legal type for the vector store.
14460 unsigned Elts = NumElem * NumMemElts;
14461 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14463 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
14464 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
14467 SDLoc LoadDL(LoadNodes[0].MemNode);
14468 SDLoc StoreDL(StoreNodes[0].MemNode);
14470 // The merged loads are required to have the same incoming chain, so
14471 // using the first's chain is acceptable.
14473 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
14474 AddToWorklist(NewStoreChain.getNode());
14476 MachineMemOperand::Flags MMOFlags =
14477 isDereferenceable ? MachineMemOperand::MODereferenceable
14478 : MachineMemOperand::MONone;
14480 SDValue NewLoad, NewStore;
14481 if (UseVectorTy || !DoIntegerTruncate) {
14483 DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
14484 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14485 FirstLoadAlign, MMOFlags);
14486 NewStore = DAG.getStore(
14487 NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
14488 FirstInChain->getPointerInfo(), FirstStoreAlign);
14489 } else { // This must be the truncstore/extload case
14491 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
14492 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
14493 FirstLoad->getChain(), FirstLoad->getBasePtr(),
14494 FirstLoad->getPointerInfo(), JointMemOpVT,
14495 FirstLoadAlign, MMOFlags);
14496 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
14497 FirstInChain->getBasePtr(),
14498 FirstInChain->getPointerInfo(),
14499 JointMemOpVT, FirstInChain->getAlignment(),
14500 FirstInChain->getMemOperand()->getFlags());
14503 // Transfer chain users from old loads to the new load.
14504 for (unsigned i = 0; i < NumElem; ++i) {
14505 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
14506 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
14507 SDValue(NewLoad.getNode(), 1));
14510 // Replace the all stores with the new store. Recursively remove
14511 // corresponding value if its no longer used.
14512 for (unsigned i = 0; i < NumElem; ++i) {
14513 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
14514 CombineTo(StoreNodes[i].MemNode, NewStore);
14515 if (Val.getNode()->use_empty())
14516 recursivelyDeleteUnusedNodes(Val.getNode());
14520 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14521 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
14522 NumConsecutiveStores -= NumElem;
14528 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
14532 // Replace the chain to avoid dependency.
14533 if (ST->isTruncatingStore()) {
14534 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
14535 ST->getBasePtr(), ST->getMemoryVT(),
14536 ST->getMemOperand());
14538 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
14539 ST->getMemOperand());
14542 // Create token to keep both nodes around.
14543 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
14544 MVT::Other, ST->getChain(), ReplStore);
14546 // Make sure the new and old chains are cleaned up.
14547 AddToWorklist(Token.getNode());
14549 // Don't add users to work list.
14550 return CombineTo(ST, Token, false);
14553 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
14554 SDValue Value = ST->getValue();
14555 if (Value.getOpcode() == ISD::TargetConstantFP)
14560 SDValue Chain = ST->getChain();
14561 SDValue Ptr = ST->getBasePtr();
14563 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
14565 // NOTE: If the original store is volatile, this transform must not increase
14566 // the number of stores. For example, on x86-32 an f64 can be stored in one
14567 // processor operation but an i64 (which is not legal) requires two. So the
14568 // transform should not be done in this case.
14571 switch (CFP->getSimpleValueType(0).SimpleTy) {
14573 llvm_unreachable("Unknown FP type");
14574 case MVT::f16: // We don't do this for these yet.
14580 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
14581 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14583 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
14584 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
14586 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
14591 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
14592 !ST->isVolatile()) ||
14593 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
14595 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
14596 getZExtValue(), SDLoc(CFP), MVT::i64);
14597 return DAG.getStore(Chain, DL, Tmp,
14598 Ptr, ST->getMemOperand());
14601 if (!ST->isVolatile() &&
14602 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
14603 // Many FP stores are not made apparent until after legalize, e.g. for
14604 // argument passing. Since this is so common, custom legalize the
14605 // 64-bit integer store into two 32-bit stores.
14606 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
14607 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
14608 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
14609 if (DAG.getDataLayout().isBigEndian())
14612 unsigned Alignment = ST->getAlignment();
14613 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14614 AAMDNodes AAInfo = ST->getAAInfo();
14616 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14617 ST->getAlignment(), MMOFlags, AAInfo);
14618 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14619 DAG.getConstant(4, DL, Ptr.getValueType()));
14620 Alignment = MinAlign(Alignment, 4U);
14621 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
14622 ST->getPointerInfo().getWithOffset(4),
14623 Alignment, MMOFlags, AAInfo);
14624 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14632 SDValue DAGCombiner::visitSTORE(SDNode *N) {
14633 StoreSDNode *ST = cast<StoreSDNode>(N);
14634 SDValue Chain = ST->getChain();
14635 SDValue Value = ST->getValue();
14636 SDValue Ptr = ST->getBasePtr();
14638 // If this is a store of a bit convert, store the input value if the
14639 // resultant store does not need a higher alignment than the original.
14640 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
14641 ST->isUnindexed()) {
14642 EVT SVT = Value.getOperand(0).getValueType();
14643 if (((!LegalOperations && !ST->isVolatile()) ||
14644 TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
14645 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
14646 unsigned OrigAlign = ST->getAlignment();
14648 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
14649 ST->getAddressSpace(), OrigAlign, &Fast) &&
14651 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
14652 ST->getPointerInfo(), OrigAlign,
14653 ST->getMemOperand()->getFlags(), ST->getAAInfo());
14658 // Turn 'store undef, Ptr' -> nothing.
14659 if (Value.isUndef() && ST->isUnindexed())
14662 // Try to infer better alignment information than the store already has.
14663 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
14664 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14665 if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
14667 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
14668 ST->getMemoryVT(), Align,
14669 ST->getMemOperand()->getFlags(), ST->getAAInfo());
14670 // NewStore will always be N as we are only refining the alignment
14671 assert(NewStore.getNode() == N);
14677 // Try transforming a pair floating point load / store ops to integer
14678 // load / store ops.
14679 if (SDValue NewST = TransformFPLoadStorePair(N))
14682 if (ST->isUnindexed()) {
14683 // Walk up chain skipping non-aliasing memory nodes, on this store and any
14684 // adjacent stores.
14685 if (findBetterNeighborChains(ST)) {
14686 // replaceStoreChain uses CombineTo, which handled all of the worklist
14687 // manipulation. Return the original node to not do anything else.
14688 return SDValue(ST, 0);
14690 Chain = ST->getChain();
14693 // FIXME: is there such a thing as a truncating indexed store?
14694 if (ST->isTruncatingStore() && ST->isUnindexed() &&
14695 Value.getValueType().isInteger()) {
14696 // See if we can simplify the input to this truncstore with knowledge that
14697 // only the low bits are being used. For example:
14698 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
14699 SDValue Shorter = DAG.GetDemandedBits(
14700 Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14701 ST->getMemoryVT().getScalarSizeInBits()));
14702 AddToWorklist(Value.getNode());
14703 if (Shorter.getNode())
14704 return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
14705 Ptr, ST->getMemoryVT(), ST->getMemOperand());
14707 // Otherwise, see if we can simplify the operation with
14708 // SimplifyDemandedBits, which only works if the value has a single use.
14709 if (SimplifyDemandedBits(
14711 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
14712 ST->getMemoryVT().getScalarSizeInBits()))) {
14713 // Re-visit the store if anything changed and the store hasn't been merged
14714 // with another node (N is deleted) SimplifyDemandedBits will add Value's
14715 // node back to the worklist if necessary, but we also need to re-visit
14716 // the Store node itself.
14717 if (N->getOpcode() != ISD::DELETED_NODE)
14719 return SDValue(N, 0);
14723 // If this is a load followed by a store to the same location, then the store
14725 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
14726 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
14727 ST->isUnindexed() && !ST->isVolatile() &&
14728 // There can't be any side effects between the load and store, such as
14729 // a call or store.
14730 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
14731 // The store is dead, remove it.
14736 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
14737 if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
14738 !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
14739 ST->getMemoryVT() == ST1->getMemoryVT()) {
14740 // If this is a store followed by a store with the same value to the same
14741 // location, then the store is dead/noop.
14742 if (ST1->getValue() == Value) {
14743 // The store is dead, remove it.
14747 // If this is a store who's preceeding store to the same location
14748 // and no one other node is chained to that store we can effectively
14749 // drop the store. Do not remove stores to undef as they may be used as
14751 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
14752 !ST1->getBasePtr().isUndef()) {
14753 // ST1 is fully overwritten and can be elided. Combine with it's chain
14755 CombineTo(ST1, ST1->getChain());
14761 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
14762 // truncating store. We can do this even if this is already a truncstore.
14763 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
14764 && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
14765 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
14766 ST->getMemoryVT())) {
14767 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
14768 Ptr, ST->getMemoryVT(), ST->getMemOperand());
14771 // Always perform this optimization before types are legal. If the target
14772 // prefers, also try this after legalization to catch stores that were created
14773 // by intrinsics or other nodes.
14774 if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
14776 // There can be multiple store sequences on the same chain.
14777 // Keep trying to merge store sequences until we are unable to do so
14778 // or until we merge the last store on the chain.
14779 bool Changed = MergeConsecutiveStores(ST);
14780 if (!Changed) break;
14781 // Return N as merge only uses CombineTo and no worklist clean
14782 // up is necessary.
14783 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
14784 return SDValue(N, 0);
14788 // Try transforming N to an indexed store.
14789 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14790 return SDValue(N, 0);
14792 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
14794 // Make sure to do this only after attempting to merge stores in order to
14795 // avoid changing the types of some subset of stores due to visit order,
14796 // preventing their merging.
14797 if (isa<ConstantFPSDNode>(ST->getValue())) {
14798 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
14802 if (SDValue NewSt = splitMergedValStore(ST))
14805 return ReduceLoadOpStoreWidth(N);
14808 /// For the instruction sequence of store below, F and I values
14809 /// are bundled together as an i64 value before being stored into memory.
14810 /// Sometimes it is more efficent to generate separate stores for F and I,
14811 /// which can remove the bitwise instructions or sink them to colder places.
14813 /// (store (or (zext (bitcast F to i32) to i64),
14814 /// (shl (zext I to i64), 32)), addr) -->
14815 /// (store F, addr) and (store I, addr+4)
14817 /// Similarly, splitting for other merged store can also be beneficial, like:
14818 /// For pair of {i32, i32}, i64 store --> two i32 stores.
14819 /// For pair of {i32, i16}, i64 store --> two i32 stores.
14820 /// For pair of {i16, i16}, i32 store --> two i16 stores.
14821 /// For pair of {i16, i8}, i32 store --> two i16 stores.
14822 /// For pair of {i8, i8}, i16 store --> two i8 stores.
14824 /// We allow each target to determine specifically which kind of splitting is
14827 /// The store patterns are commonly seen from the simple code snippet below
14828 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
14829 /// void goo(const std::pair<int, float> &);
14832 /// goo(std::make_pair(tmp, ftmp));
14836 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
14837 if (OptLevel == CodeGenOpt::None)
14840 SDValue Val = ST->getValue();
14843 // Match OR operand.
14844 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
14847 // Match SHL operand and get Lower and Higher parts of Val.
14848 SDValue Op1 = Val.getOperand(0);
14849 SDValue Op2 = Val.getOperand(1);
14851 if (Op1.getOpcode() != ISD::SHL) {
14852 std::swap(Op1, Op2);
14853 if (Op1.getOpcode() != ISD::SHL)
14857 Hi = Op1.getOperand(0);
14858 if (!Op1.hasOneUse())
14861 // Match shift amount to HalfValBitSize.
14862 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
14863 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
14864 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
14867 // Lo and Hi are zero-extended from int with size less equal than 32
14869 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
14870 !Lo.getOperand(0).getValueType().isScalarInteger() ||
14871 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
14872 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
14873 !Hi.getOperand(0).getValueType().isScalarInteger() ||
14874 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
14877 // Use the EVT of low and high parts before bitcast as the input
14878 // of target query.
14879 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
14880 ? Lo.getOperand(0).getValueType()
14881 : Lo.getValueType();
14882 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
14883 ? Hi.getOperand(0).getValueType()
14884 : Hi.getValueType();
14885 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
14888 // Start to split store.
14889 unsigned Alignment = ST->getAlignment();
14890 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
14891 AAMDNodes AAInfo = ST->getAAInfo();
14893 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
14894 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
14895 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
14896 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
14898 SDValue Chain = ST->getChain();
14899 SDValue Ptr = ST->getBasePtr();
14900 // Lower value store.
14901 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
14902 ST->getAlignment(), MMOFlags, AAInfo);
14904 DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
14905 DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
14906 // Higher value store.
14908 DAG.getStore(St0, DL, Hi, Ptr,
14909 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
14910 Alignment / 2, MMOFlags, AAInfo);
14914 /// Convert a disguised subvector insertion into a shuffle:
14915 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
14916 /// bitcast(shuffle (bitcast V), (extended X), Mask)
14917 /// Note: We do not use an insert_subvector node because that requires a legal
14918 /// subvector type.
14919 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
14920 SDValue InsertVal = N->getOperand(1);
14921 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
14922 !InsertVal.getOperand(0).getValueType().isVector())
14925 SDValue SubVec = InsertVal.getOperand(0);
14926 SDValue DestVec = N->getOperand(0);
14927 EVT SubVecVT = SubVec.getValueType();
14928 EVT VT = DestVec.getValueType();
14929 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
14930 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
14931 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
14933 // Step 1: Create a shuffle mask that implements this insert operation. The
14934 // vector that we are inserting into will be operand 0 of the shuffle, so
14935 // those elements are just 'i'. The inserted subvector is in the first
14936 // positions of operand 1 of the shuffle. Example:
14937 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
14938 SmallVector<int, 16> Mask(NumMaskVals);
14939 for (unsigned i = 0; i != NumMaskVals; ++i) {
14940 if (i / NumSrcElts == InsIndex)
14941 Mask[i] = (i % NumSrcElts) + NumMaskVals;
14946 // Bail out if the target can not handle the shuffle we want to create.
14947 EVT SubVecEltVT = SubVecVT.getVectorElementType();
14948 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
14949 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
14952 // Step 2: Create a wide vector from the inserted source vector by appending
14953 // undefined elements. This is the same size as our destination vector.
14955 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
14956 ConcatOps[0] = SubVec;
14957 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
14959 // Step 3: Shuffle in the padded subvector.
14960 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
14961 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
14962 AddToWorklist(PaddedSubV.getNode());
14963 AddToWorklist(DestVecBC.getNode());
14964 AddToWorklist(Shuf.getNode());
14965 return DAG.getBitcast(VT, Shuf);
14968 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
14969 SDValue InVec = N->getOperand(0);
14970 SDValue InVal = N->getOperand(1);
14971 SDValue EltNo = N->getOperand(2);
14974 // If the inserted element is an UNDEF, just use the input vector.
14975 if (InVal.isUndef())
14978 EVT VT = InVec.getValueType();
14980 // Remove redundant insertions:
14981 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
14982 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14983 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
14986 // We must know which element is being inserted for folds below here.
14987 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
14990 unsigned Elt = IndexC->getZExtValue();
14992 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
14995 // Canonicalize insert_vector_elt dag nodes.
14997 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
14998 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15000 // Do this only if the child insert_vector node has one use; also
15001 // do this only if indices are both constants and Idx1 < Idx0.
15002 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15003 && isa<ConstantSDNode>(InVec.getOperand(2))) {
15004 unsigned OtherElt = InVec.getConstantOperandVal(2);
15005 if (Elt < OtherElt) {
15007 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15008 InVec.getOperand(0), InVal, EltNo);
15009 AddToWorklist(NewOp.getNode());
15010 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15011 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15015 // If we can't generate a legal BUILD_VECTOR, exit
15016 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15019 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15020 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
15021 // vector elements.
15022 SmallVector<SDValue, 8> Ops;
15023 // Do not combine these two vectors if the output vector will not replace
15024 // the input vector.
15025 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15026 Ops.append(InVec.getNode()->op_begin(),
15027 InVec.getNode()->op_end());
15028 } else if (InVec.isUndef()) {
15029 unsigned NElts = VT.getVectorNumElements();
15030 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
15035 // Insert the element
15036 if (Elt < Ops.size()) {
15037 // All the operands of BUILD_VECTOR must have the same type;
15038 // we enforce that here.
15039 EVT OpVT = Ops[0].getValueType();
15040 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15043 // Return the new vector
15044 return DAG.getBuildVector(VT, DL, Ops);
15047 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
15048 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
15049 assert(!OriginalLoad->isVolatile());
15051 EVT ResultVT = EVE->getValueType(0);
15052 EVT VecEltVT = InVecVT.getVectorElementType();
15053 unsigned Align = OriginalLoad->getAlignment();
15054 unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15055 VecEltVT.getTypeForEVT(*DAG.getContext()));
15057 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15060 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15061 ISD::NON_EXTLOAD : ISD::EXTLOAD;
15062 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15067 SDValue NewPtr = OriginalLoad->getBasePtr();
15069 EVT PtrType = NewPtr.getValueType();
15070 MachinePointerInfo MPI;
15072 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15073 int Elt = ConstEltNo->getZExtValue();
15074 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15075 Offset = DAG.getConstant(PtrOff, DL, PtrType);
15076 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15078 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15079 Offset = DAG.getNode(
15080 ISD::MUL, DL, PtrType, Offset,
15081 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15082 MPI = OriginalLoad->getPointerInfo();
15084 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15086 // The replacement we need to do here is a little tricky: we need to
15087 // replace an extractelement of a load with a load.
15088 // Use ReplaceAllUsesOfValuesWith to do the replacement.
15089 // Note that this replacement assumes that the extractvalue is the only
15090 // use of the load; that's okay because we don't want to perform this
15091 // transformation in other cases anyway.
15094 if (ResultVT.bitsGT(VecEltVT)) {
15095 // If the result type of vextract is wider than the load, then issue an
15096 // extending load instead.
15097 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15101 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15102 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15103 Align, OriginalLoad->getMemOperand()->getFlags(),
15104 OriginalLoad->getAAInfo());
15105 Chain = Load.getValue(1);
15107 Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15108 MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15109 OriginalLoad->getAAInfo());
15110 Chain = Load.getValue(1);
15111 if (ResultVT.bitsLT(VecEltVT))
15112 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15114 Load = DAG.getBitcast(ResultVT, Load);
15116 WorklistRemover DeadNodes(*this);
15117 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15118 SDValue To[] = { Load, Chain };
15119 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15120 // Since we're explicitly calling ReplaceAllUses, add the new node to the
15121 // worklist explicitly as well.
15122 AddToWorklist(Load.getNode());
15123 AddUsersToWorklist(Load.getNode()); // Add users too
15124 // Make sure to revisit this node to clean it up; it will usually be dead.
15125 AddToWorklist(EVE);
15127 return SDValue(EVE, 0);
15130 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15131 // (vextract (scalar_to_vector val, 0) -> val
15132 SDValue InVec = N->getOperand(0);
15133 EVT VT = InVec.getValueType();
15134 EVT NVT = N->getValueType(0);
15136 if (InVec.isUndef())
15137 return DAG.getUNDEF(NVT);
15139 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15140 // Check if the result type doesn't match the inserted element type. A
15141 // SCALAR_TO_VECTOR may truncate the inserted element and the
15142 // EXTRACT_VECTOR_ELT may widen the extracted vector.
15143 SDValue InOp = InVec.getOperand(0);
15144 if (InOp.getValueType() != NVT) {
15145 assert(InOp.getValueType().isInteger() && NVT.isInteger());
15146 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
15151 SDValue EltNo = N->getOperand(1);
15152 ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
15154 // extract_vector_elt of out-of-bounds element -> UNDEF
15155 if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements()))
15156 return DAG.getUNDEF(NVT);
15158 // extract_vector_elt (build_vector x, y), 1 -> y
15160 InVec.getOpcode() == ISD::BUILD_VECTOR &&
15161 TLI.isTypeLegal(VT) &&
15162 (InVec.hasOneUse() ||
15163 TLI.aggressivelyPreferBuildVectorSources(VT))) {
15164 SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
15165 EVT InEltVT = Elt.getValueType();
15167 // Sometimes build_vector's scalar input types do not match result type.
15168 if (NVT == InEltVT)
15171 // TODO: It may be useful to truncate if free if the build_vector implicitly
15175 // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
15176 bool isLE = DAG.getDataLayout().isLittleEndian();
15177 unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
15178 if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
15179 ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
15180 SDValue BCSrc = InVec.getOperand(0);
15181 if (BCSrc.getValueType().isScalarInteger())
15182 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
15185 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15187 // This only really matters if the index is non-constant since other combines
15188 // on the constant elements already work.
15189 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15190 EltNo == InVec.getOperand(2)) {
15191 SDValue Elt = InVec.getOperand(1);
15192 return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
15195 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
15196 // We only perform this optimization before the op legalization phase because
15197 // we may introduce new vector instructions which are not backed by TD
15198 // patterns. For example on AVX, extracting elements from a wide vector
15199 // without using extract_subvector. However, if we can find an underlying
15200 // scalar value, then we can always use that.
15201 if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
15202 int NumElem = VT.getVectorNumElements();
15203 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
15204 // Find the new index to extract from.
15205 int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
15207 // Extracting an undef index is undef.
15209 return DAG.getUNDEF(NVT);
15211 // Select the right vector half to extract from.
15213 if (OrigElt < NumElem) {
15214 SVInVec = InVec->getOperand(0);
15216 SVInVec = InVec->getOperand(1);
15217 OrigElt -= NumElem;
15220 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
15221 SDValue InOp = SVInVec.getOperand(OrigElt);
15222 if (InOp.getValueType() != NVT) {
15223 assert(InOp.getValueType().isInteger() && NVT.isInteger());
15224 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
15230 // FIXME: We should handle recursing on other vector shuffles and
15231 // scalar_to_vector here as well.
15233 if (!LegalOperations ||
15234 // FIXME: Should really be just isOperationLegalOrCustom.
15235 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
15236 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
15237 EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15238 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
15239 DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
15243 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
15244 // simplify it based on the (valid) extraction indices.
15245 if (llvm::all_of(InVec->uses(), [&](SDNode *Use) {
15246 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15247 Use->getOperand(0) == InVec &&
15248 isa<ConstantSDNode>(Use->getOperand(1));
15250 APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements());
15251 for (SDNode *Use : InVec->uses()) {
15252 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
15253 if (CstElt->getAPIntValue().ult(VT.getVectorNumElements()))
15254 DemandedElts.setBit(CstElt->getZExtValue());
15256 if (SimplifyDemandedVectorElts(InVec, DemandedElts, true))
15257 return SDValue(N, 0);
15260 bool BCNumEltsChanged = false;
15261 EVT ExtVT = VT.getVectorElementType();
15264 // If the result of load has to be truncated, then it's not necessarily
15266 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
15269 if (InVec.getOpcode() == ISD::BITCAST) {
15270 // Don't duplicate a load with other uses.
15271 if (!InVec.hasOneUse())
15274 EVT BCVT = InVec.getOperand(0).getValueType();
15275 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
15277 if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
15278 BCNumEltsChanged = true;
15279 InVec = InVec.getOperand(0);
15280 ExtVT = BCVT.getVectorElementType();
15283 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
15284 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
15285 ISD::isNormalLoad(InVec.getNode()) &&
15286 !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
15287 SDValue Index = N->getOperand(1);
15288 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
15289 if (!OrigLoad->isVolatile()) {
15290 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
15296 // Perform only after legalization to ensure build_vector / vector_shuffle
15297 // optimizations have already been done.
15298 if (!LegalOperations) return SDValue();
15300 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
15301 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
15302 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
15305 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
15307 LoadSDNode *LN0 = nullptr;
15308 const ShuffleVectorSDNode *SVN = nullptr;
15309 if (ISD::isNormalLoad(InVec.getNode())) {
15310 LN0 = cast<LoadSDNode>(InVec);
15311 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15312 InVec.getOperand(0).getValueType() == ExtVT &&
15313 ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
15314 // Don't duplicate a load with other uses.
15315 if (!InVec.hasOneUse())
15318 LN0 = cast<LoadSDNode>(InVec.getOperand(0));
15319 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
15320 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
15322 // (load $addr+1*size)
15324 // Don't duplicate a load with other uses.
15325 if (!InVec.hasOneUse())
15328 // If the bit convert changed the number of elements, it is unsafe
15329 // to examine the mask.
15330 if (BCNumEltsChanged)
15333 // Select the input vector, guarding against out of range extract vector.
15334 unsigned NumElems = VT.getVectorNumElements();
15335 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
15336 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
15338 if (InVec.getOpcode() == ISD::BITCAST) {
15339 // Don't duplicate a load with other uses.
15340 if (!InVec.hasOneUse())
15343 InVec = InVec.getOperand(0);
15345 if (ISD::isNormalLoad(InVec.getNode())) {
15346 LN0 = cast<LoadSDNode>(InVec);
15347 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
15348 EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
15352 // Make sure we found a non-volatile load and the extractelement is
15354 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
15357 // If Idx was -1 above, Elt is going to be -1, so just return undef.
15359 return DAG.getUNDEF(LVT);
15361 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
15367 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
15368 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
15369 // We perform this optimization post type-legalization because
15370 // the type-legalizer often scalarizes integer-promoted vectors.
15371 // Performing this optimization before may create bit-casts which
15372 // will be type-legalized to complex code sequences.
15373 // We perform this optimization only before the operation legalizer because we
15374 // may introduce illegal operations.
15375 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
15378 unsigned NumInScalars = N->getNumOperands();
15380 EVT VT = N->getValueType(0);
15382 // Check to see if this is a BUILD_VECTOR of a bunch of values
15383 // which come from any_extend or zero_extend nodes. If so, we can create
15384 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
15385 // optimizations. We do not handle sign-extend because we can't fill the sign
15387 EVT SourceType = MVT::Other;
15388 bool AllAnyExt = true;
15390 for (unsigned i = 0; i != NumInScalars; ++i) {
15391 SDValue In = N->getOperand(i);
15392 // Ignore undef inputs.
15393 if (In.isUndef()) continue;
15395 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
15396 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
15398 // Abort if the element is not an extension.
15399 if (!ZeroExt && !AnyExt) {
15400 SourceType = MVT::Other;
15404 // The input is a ZeroExt or AnyExt. Check the original type.
15405 EVT InTy = In.getOperand(0).getValueType();
15407 // Check that all of the widened source types are the same.
15408 if (SourceType == MVT::Other)
15411 else if (InTy != SourceType) {
15412 // Multiple income types. Abort.
15413 SourceType = MVT::Other;
15417 // Check if all of the extends are ANY_EXTENDs.
15418 AllAnyExt &= AnyExt;
15421 // In order to have valid types, all of the inputs must be extended from the
15422 // same source type and all of the inputs must be any or zero extend.
15423 // Scalar sizes must be a power of two.
15424 EVT OutScalarTy = VT.getScalarType();
15425 bool ValidTypes = SourceType != MVT::Other &&
15426 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
15427 isPowerOf2_32(SourceType.getSizeInBits());
15429 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
15430 // turn into a single shuffle instruction.
15434 bool isLE = DAG.getDataLayout().isLittleEndian();
15435 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
15436 assert(ElemRatio > 1 && "Invalid element size ratio");
15437 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
15438 DAG.getConstant(0, DL, SourceType);
15440 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
15441 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
15443 // Populate the new build_vector
15444 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
15445 SDValue Cast = N->getOperand(i);
15446 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
15447 Cast.getOpcode() == ISD::ZERO_EXTEND ||
15448 Cast.isUndef()) && "Invalid cast opcode");
15450 if (Cast.isUndef())
15451 In = DAG.getUNDEF(SourceType);
15453 In = Cast->getOperand(0);
15454 unsigned Index = isLE ? (i * ElemRatio) :
15455 (i * ElemRatio + (ElemRatio - 1));
15457 assert(Index < Ops.size() && "Invalid index");
15461 // The type of the new BUILD_VECTOR node.
15462 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
15463 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
15464 "Invalid vector size");
15465 // Check if the new vector type is legal.
15466 if (!isTypeLegal(VecVT) ||
15467 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
15468 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
15471 // Make the new BUILD_VECTOR.
15472 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
15474 // The new BUILD_VECTOR node has the potential to be further optimized.
15475 AddToWorklist(BV.getNode());
15476 // Bitcast to the desired type.
15477 return DAG.getBitcast(VT, BV);
15480 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
15481 EVT VT = N->getValueType(0);
15483 unsigned NumInScalars = N->getNumOperands();
15486 EVT SrcVT = MVT::Other;
15487 unsigned Opcode = ISD::DELETED_NODE;
15488 unsigned NumDefs = 0;
15490 for (unsigned i = 0; i != NumInScalars; ++i) {
15491 SDValue In = N->getOperand(i);
15492 unsigned Opc = In.getOpcode();
15494 if (Opc == ISD::UNDEF)
15497 // If all scalar values are floats and converted from integers.
15498 if (Opcode == ISD::DELETED_NODE &&
15499 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
15506 EVT InVT = In.getOperand(0).getValueType();
15508 // If all scalar values are typed differently, bail out. It's chosen to
15509 // simplify BUILD_VECTOR of integer types.
15510 if (SrcVT == MVT::Other)
15517 // If the vector has just one element defined, it's not worth to fold it into
15518 // a vectorized one.
15522 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
15523 && "Should only handle conversion from integer to float.");
15524 assert(SrcVT != MVT::Other && "Cannot determine source type!");
15526 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
15528 if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
15531 // Just because the floating-point vector type is legal does not necessarily
15532 // mean that the corresponding integer vector type is.
15533 if (!isTypeLegal(NVT))
15536 SmallVector<SDValue, 8> Opnds;
15537 for (unsigned i = 0; i != NumInScalars; ++i) {
15538 SDValue In = N->getOperand(i);
15541 Opnds.push_back(DAG.getUNDEF(SrcVT));
15543 Opnds.push_back(In.getOperand(0));
15545 SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
15546 AddToWorklist(BV.getNode());
15548 return DAG.getNode(Opcode, DL, VT, BV);
15551 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
15552 ArrayRef<int> VectorMask,
15553 SDValue VecIn1, SDValue VecIn2,
15554 unsigned LeftIdx) {
15555 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15556 SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
15558 EVT VT = N->getValueType(0);
15559 EVT InVT1 = VecIn1.getValueType();
15560 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
15562 unsigned Vec2Offset = 0;
15563 unsigned NumElems = VT.getVectorNumElements();
15564 unsigned ShuffleNumElems = NumElems;
15566 // In case both the input vectors are extracted from same base
15567 // vector we do not need extra addend (Vec2Offset) while
15568 // computing shuffle mask.
15569 if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15570 !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
15571 !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
15572 Vec2Offset = InVT1.getVectorNumElements();
15574 // We can't generate a shuffle node with mismatched input and output types.
15575 // Try to make the types match the type of the output.
15576 if (InVT1 != VT || InVT2 != VT) {
15577 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
15578 // If the output vector length is a multiple of both input lengths,
15579 // we can concatenate them and pad the rest with undefs.
15580 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
15581 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
15582 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
15583 ConcatOps[0] = VecIn1;
15584 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
15585 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15586 VecIn2 = SDValue();
15587 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
15588 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
15591 if (!VecIn2.getNode()) {
15592 // If we only have one input vector, and it's twice the size of the
15593 // output, split it in two.
15594 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
15595 DAG.getConstant(NumElems, DL, IdxTy));
15596 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
15597 // Since we now have shorter input vectors, adjust the offset of the
15598 // second vector's start.
15599 Vec2Offset = NumElems;
15600 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
15601 // VecIn1 is wider than the output, and we have another, possibly
15602 // smaller input. Pad the smaller input with undefs, shuffle at the
15603 // input vector width, and extract the output.
15604 // The shuffle type is different than VT, so check legality again.
15605 if (LegalOperations &&
15606 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
15609 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
15610 // lower it back into a BUILD_VECTOR. So if the inserted type is
15611 // illegal, don't even try.
15612 if (InVT1 != InVT2) {
15613 if (!TLI.isTypeLegal(InVT2))
15615 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
15616 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
15618 ShuffleNumElems = NumElems * 2;
15620 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
15621 // than VecIn1. We can't handle this for now - this case will disappear
15622 // when we start sorting the vectors by type.
15625 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
15626 InVT1.getSizeInBits() == VT.getSizeInBits()) {
15627 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
15628 ConcatOps[0] = VecIn2;
15629 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
15631 // TODO: Support cases where the length mismatch isn't exactly by a
15633 // TODO: Move this check upwards, so that if we have bad type
15634 // mismatches, we don't create any DAG nodes.
15639 // Initialize mask to undef.
15640 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
15642 // Only need to run up to the number of elements actually used, not the
15643 // total number of elements in the shuffle - if we are shuffling a wider
15644 // vector, the high lanes should be set to undef.
15645 for (unsigned i = 0; i != NumElems; ++i) {
15646 if (VectorMask[i] <= 0)
15649 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
15650 if (VectorMask[i] == (int)LeftIdx) {
15651 Mask[i] = ExtIndex;
15652 } else if (VectorMask[i] == (int)LeftIdx + 1) {
15653 Mask[i] = Vec2Offset + ExtIndex;
15657 // The type the input vectors may have changed above.
15658 InVT1 = VecIn1.getValueType();
15660 // If we already have a VecIn2, it should have the same type as VecIn1.
15661 // If we don't, get an undef/zero vector of the appropriate type.
15662 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
15663 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
15665 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
15666 if (ShuffleNumElems > NumElems)
15667 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
15672 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
15673 // operations. If the types of the vectors we're extracting from allow it,
15674 // turn this into a vector_shuffle node.
15675 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
15677 EVT VT = N->getValueType(0);
15679 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
15680 if (!isTypeLegal(VT))
15683 // May only combine to shuffle after legalize if shuffle is legal.
15684 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
15687 bool UsesZeroVector = false;
15688 unsigned NumElems = N->getNumOperands();
15690 // Record, for each element of the newly built vector, which input vector
15691 // that element comes from. -1 stands for undef, 0 for the zero vector,
15692 // and positive values for the input vectors.
15693 // VectorMask maps each element to its vector number, and VecIn maps vector
15694 // numbers to their initial SDValues.
15696 SmallVector<int, 8> VectorMask(NumElems, -1);
15697 SmallVector<SDValue, 8> VecIn;
15698 VecIn.push_back(SDValue());
15700 for (unsigned i = 0; i != NumElems; ++i) {
15701 SDValue Op = N->getOperand(i);
15706 // See if we can use a blend with a zero vector.
15707 // TODO: Should we generalize this to a blend with an arbitrary constant
15709 if (isNullConstant(Op) || isNullFPConstant(Op)) {
15710 UsesZeroVector = true;
15715 // Not an undef or zero. If the input is something other than an
15716 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
15717 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15718 !isa<ConstantSDNode>(Op.getOperand(1)))
15720 SDValue ExtractedFromVec = Op.getOperand(0);
15722 APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
15723 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
15726 // All inputs must have the same element type as the output.
15727 if (VT.getVectorElementType() !=
15728 ExtractedFromVec.getValueType().getVectorElementType())
15731 // Have we seen this input vector before?
15732 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
15733 // a map back from SDValues to numbers isn't worth it.
15734 unsigned Idx = std::distance(
15735 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
15736 if (Idx == VecIn.size())
15737 VecIn.push_back(ExtractedFromVec);
15739 VectorMask[i] = Idx;
15742 // If we didn't find at least one input vector, bail out.
15743 if (VecIn.size() < 2)
15746 // If all the Operands of BUILD_VECTOR extract from same
15747 // vector, then split the vector efficiently based on the maximum
15748 // vector access index and adjust the VectorMask and
15749 // VecIn accordingly.
15750 if (VecIn.size() == 2) {
15751 unsigned MaxIndex = 0;
15752 unsigned NearestPow2 = 0;
15753 SDValue Vec = VecIn.back();
15754 EVT InVT = Vec.getValueType();
15755 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
15756 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
15758 for (unsigned i = 0; i < NumElems; i++) {
15759 if (VectorMask[i] <= 0)
15761 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
15762 IndexVec[i] = Index;
15763 MaxIndex = std::max(MaxIndex, Index);
15766 NearestPow2 = PowerOf2Ceil(MaxIndex);
15767 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
15768 NumElems * 2 < NearestPow2) {
15769 unsigned SplitSize = NearestPow2 / 2;
15770 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
15771 InVT.getVectorElementType(), SplitSize);
15772 if (TLI.isTypeLegal(SplitVT)) {
15773 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15774 DAG.getConstant(SplitSize, DL, IdxTy));
15775 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
15776 DAG.getConstant(0, DL, IdxTy));
15778 VecIn.push_back(VecIn1);
15779 VecIn.push_back(VecIn2);
15781 for (unsigned i = 0; i < NumElems; i++) {
15782 if (VectorMask[i] <= 0)
15784 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
15790 // TODO: We want to sort the vectors by descending length, so that adjacent
15791 // pairs have similar length, and the longer vector is always first in the
15794 // TODO: Should this fire if some of the input vectors has illegal type (like
15795 // it does now), or should we let legalization run its course first?
15798 // Take pairs of vectors, and shuffle them so that the result has elements
15799 // from these vectors in the correct places.
15800 // For example, given:
15801 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
15802 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
15803 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
15804 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
15805 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
15806 // We will generate:
15807 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
15808 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
15809 SmallVector<SDValue, 4> Shuffles;
15810 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
15811 unsigned LeftIdx = 2 * In + 1;
15812 SDValue VecLeft = VecIn[LeftIdx];
15814 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
15816 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
15817 VecRight, LeftIdx))
15818 Shuffles.push_back(Shuffle);
15823 // If we need the zero vector as an "ingredient" in the blend tree, add it
15824 // to the list of shuffles.
15825 if (UsesZeroVector)
15826 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
15827 : DAG.getConstantFP(0.0, DL, VT));
15829 // If we only have one shuffle, we're done.
15830 if (Shuffles.size() == 1)
15831 return Shuffles[0];
15833 // Update the vector mask to point to the post-shuffle vectors.
15834 for (int &Vec : VectorMask)
15836 Vec = Shuffles.size() - 1;
15838 Vec = (Vec - 1) / 2;
15840 // More than one shuffle. Generate a binary tree of blends, e.g. if from
15841 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
15843 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
15844 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
15845 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
15846 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
15847 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
15848 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
15849 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
15851 // Make sure the initial size of the shuffle list is even.
15852 if (Shuffles.size() % 2)
15853 Shuffles.push_back(DAG.getUNDEF(VT));
15855 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
15857 Shuffles[CurSize] = DAG.getUNDEF(VT);
15860 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
15862 int Right = 2 * In + 1;
15863 SmallVector<int, 8> Mask(NumElems, -1);
15864 for (unsigned i = 0; i != NumElems; ++i) {
15865 if (VectorMask[i] == Left) {
15867 VectorMask[i] = In;
15868 } else if (VectorMask[i] == Right) {
15869 Mask[i] = i + NumElems;
15870 VectorMask[i] = In;
15875 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
15878 return Shuffles[0];
15881 // Try to turn a build vector of zero extends of extract vector elts into a
15882 // a vector zero extend and possibly an extract subvector.
15883 // TODO: Support sign extend or any extend?
15884 // TODO: Allow undef elements?
15885 // TODO: Don't require the extracts to start at element 0.
15886 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
15887 if (LegalOperations)
15890 EVT VT = N->getValueType(0);
15892 SDValue Op0 = N->getOperand(0);
15893 auto checkElem = [&](SDValue Op) -> int64_t {
15894 if (Op.getOpcode() == ISD::ZERO_EXTEND &&
15895 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15896 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
15897 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
15898 return C->getZExtValue();
15902 // Make sure the first element matches
15903 // (zext (extract_vector_elt X, C))
15904 int64_t Offset = checkElem(Op0);
15908 unsigned NumElems = N->getNumOperands();
15909 SDValue In = Op0.getOperand(0).getOperand(0);
15910 EVT InSVT = In.getValueType().getScalarType();
15911 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
15913 // Don't create an illegal input type after type legalization.
15914 if (LegalTypes && !TLI.isTypeLegal(InVT))
15917 // Ensure all the elements come from the same vector and are adjacent.
15918 for (unsigned i = 1; i != NumElems; ++i) {
15919 if ((Offset + i) != checkElem(N->getOperand(i)))
15924 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
15925 Op0.getOperand(0).getOperand(1));
15926 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
15929 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
15930 EVT VT = N->getValueType(0);
15932 // A vector built entirely of undefs is undef.
15933 if (ISD::allOperandsUndef(N))
15934 return DAG.getUNDEF(VT);
15936 // If this is a splat of a bitcast from another vector, change to a
15939 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
15940 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
15942 // If X is a build_vector itself, the concat can become a larger build_vector.
15943 // TODO: Maybe this is useful for non-splat too?
15944 if (!LegalOperations) {
15945 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
15946 Splat = peekThroughBitcast(Splat);
15947 EVT SrcVT = Splat.getValueType();
15948 if (SrcVT.isVector()) {
15949 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
15950 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
15951 SrcVT.getVectorElementType(), NumElts);
15952 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
15953 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
15954 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
15956 return DAG.getBitcast(VT, Concat);
15962 // Check if we can express BUILD VECTOR via subvector extract.
15963 if (!LegalTypes && (N->getNumOperands() > 1)) {
15964 SDValue Op0 = N->getOperand(0);
15965 auto checkElem = [&](SDValue Op) -> uint64_t {
15966 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
15967 (Op0.getOperand(0) == Op.getOperand(0)))
15968 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
15969 return CNode->getZExtValue();
15973 int Offset = checkElem(Op0);
15974 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
15975 if (Offset + i != checkElem(N->getOperand(i))) {
15981 if ((Offset == 0) &&
15982 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
15983 return Op0.getOperand(0);
15984 if ((Offset != -1) &&
15985 ((Offset % N->getValueType(0).getVectorNumElements()) ==
15986 0)) // IDX must be multiple of output size.
15987 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
15988 Op0.getOperand(0), Op0.getOperand(1));
15991 if (SDValue V = convertBuildVecZextToZext(N))
15994 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
15997 if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
16000 if (SDValue V = reduceBuildVecToShuffle(N))
16006 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16007 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16008 EVT OpVT = N->getOperand(0).getValueType();
16010 // If the operands are legal vectors, leave them alone.
16011 if (TLI.isTypeLegal(OpVT))
16015 EVT VT = N->getValueType(0);
16016 SmallVector<SDValue, 8> Ops;
16018 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16019 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16021 // Keep track of what we encounter.
16022 bool AnyInteger = false;
16023 bool AnyFP = false;
16024 for (const SDValue &Op : N->ops()) {
16025 if (ISD::BITCAST == Op.getOpcode() &&
16026 !Op.getOperand(0).getValueType().isVector())
16027 Ops.push_back(Op.getOperand(0));
16028 else if (ISD::UNDEF == Op.getOpcode())
16029 Ops.push_back(ScalarUndef);
16033 // Note whether we encounter an integer or floating point scalar.
16034 // If it's neither, bail out, it could be something weird like x86mmx.
16035 EVT LastOpVT = Ops.back().getValueType();
16036 if (LastOpVT.isFloatingPoint())
16038 else if (LastOpVT.isInteger())
16044 // If any of the operands is a floating point scalar bitcast to a vector,
16045 // use floating point types throughout, and bitcast everything.
16046 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16048 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16049 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16051 for (SDValue &Op : Ops) {
16052 if (Op.getValueType() == SVT)
16057 Op = DAG.getBitcast(SVT, Op);
16062 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16063 VT.getSizeInBits() / SVT.getSizeInBits());
16064 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16067 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16068 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16069 // most two distinct vectors the same size as the result, attempt to turn this
16070 // into a legal shuffle.
16071 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16072 EVT VT = N->getValueType(0);
16073 EVT OpVT = N->getOperand(0).getValueType();
16074 int NumElts = VT.getVectorNumElements();
16075 int NumOpElts = OpVT.getVectorNumElements();
16077 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16078 SmallVector<int, 8> Mask;
16080 for (SDValue Op : N->ops()) {
16081 // Peek through any bitcast.
16082 Op = peekThroughBitcast(Op);
16084 // UNDEF nodes convert to UNDEF shuffle mask values.
16085 if (Op.isUndef()) {
16086 Mask.append((unsigned)NumOpElts, -1);
16090 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16093 // What vector are we extracting the subvector from and at what index?
16094 SDValue ExtVec = Op.getOperand(0);
16096 // We want the EVT of the original extraction to correctly scale the
16097 // extraction index.
16098 EVT ExtVT = ExtVec.getValueType();
16100 // Peek through any bitcast.
16101 ExtVec = peekThroughBitcast(ExtVec);
16103 // UNDEF nodes convert to UNDEF shuffle mask values.
16104 if (ExtVec.isUndef()) {
16105 Mask.append((unsigned)NumOpElts, -1);
16109 if (!isa<ConstantSDNode>(Op.getOperand(1)))
16111 int ExtIdx = Op.getConstantOperandVal(1);
16113 // Ensure that we are extracting a subvector from a vector the same
16114 // size as the result.
16115 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16118 // Scale the subvector index to account for any bitcast.
16119 int NumExtElts = ExtVT.getVectorNumElements();
16120 if (0 == (NumExtElts % NumElts))
16121 ExtIdx /= (NumExtElts / NumElts);
16122 else if (0 == (NumElts % NumExtElts))
16123 ExtIdx *= (NumElts / NumExtElts);
16127 // At most we can reference 2 inputs in the final shuffle.
16128 if (SV0.isUndef() || SV0 == ExtVec) {
16130 for (int i = 0; i != NumOpElts; ++i)
16131 Mask.push_back(i + ExtIdx);
16132 } else if (SV1.isUndef() || SV1 == ExtVec) {
16134 for (int i = 0; i != NumOpElts; ++i)
16135 Mask.push_back(i + ExtIdx + NumElts);
16141 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16144 return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16145 DAG.getBitcast(VT, SV1), Mask);
16148 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16149 // If we only have one input vector, we don't need to do any concatenation.
16150 if (N->getNumOperands() == 1)
16151 return N->getOperand(0);
16153 // Check if all of the operands are undefs.
16154 EVT VT = N->getValueType(0);
16155 if (ISD::allOperandsUndef(N))
16156 return DAG.getUNDEF(VT);
16158 // Optimize concat_vectors where all but the first of the vectors are undef.
16159 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16160 return Op.isUndef();
16162 SDValue In = N->getOperand(0);
16163 assert(In.getValueType().isVector() && "Must concat vectors");
16165 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
16166 if (In->getOpcode() == ISD::BITCAST &&
16167 !In->getOperand(0).getValueType().isVector()) {
16168 SDValue Scalar = In->getOperand(0);
16170 // If the bitcast type isn't legal, it might be a trunc of a legal type;
16171 // look through the trunc so we can still do the transform:
16172 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16173 if (Scalar->getOpcode() == ISD::TRUNCATE &&
16174 !TLI.isTypeLegal(Scalar.getValueType()) &&
16175 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16176 Scalar = Scalar->getOperand(0);
16178 EVT SclTy = Scalar->getValueType(0);
16180 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
16183 // Bail out if the vector size is not a multiple of the scalar size.
16184 if (VT.getSizeInBits() % SclTy.getSizeInBits())
16187 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
16188 if (VNTNumElms < 2)
16191 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
16192 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
16195 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
16196 return DAG.getBitcast(VT, Res);
16200 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
16201 // We have already tested above for an UNDEF only concatenation.
16202 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
16203 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
16204 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
16205 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
16207 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
16208 SmallVector<SDValue, 8> Opnds;
16209 EVT SVT = VT.getScalarType();
16212 if (!SVT.isFloatingPoint()) {
16213 // If BUILD_VECTOR are from built from integer, they may have different
16214 // operand types. Get the smallest type and truncate all operands to it.
16215 bool FoundMinVT = false;
16216 for (const SDValue &Op : N->ops())
16217 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16218 EVT OpSVT = Op.getOperand(0).getValueType();
16219 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
16222 assert(FoundMinVT && "Concat vector type mismatch");
16225 for (const SDValue &Op : N->ops()) {
16226 EVT OpVT = Op.getValueType();
16227 unsigned NumElts = OpVT.getVectorNumElements();
16229 if (ISD::UNDEF == Op.getOpcode())
16230 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
16232 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
16233 if (SVT.isFloatingPoint()) {
16234 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
16235 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
16237 for (unsigned i = 0; i != NumElts; ++i)
16239 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
16244 assert(VT.getVectorNumElements() == Opnds.size() &&
16245 "Concat vector type mismatch");
16246 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
16249 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
16250 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
16253 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
16254 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
16255 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
16258 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
16259 // nodes often generate nop CONCAT_VECTOR nodes.
16260 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
16261 // place the incoming vectors at the exact same location.
16262 SDValue SingleSource = SDValue();
16263 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
16265 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16266 SDValue Op = N->getOperand(i);
16271 // Check if this is the identity extract:
16272 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16275 // Find the single incoming vector for the extract_subvector.
16276 if (SingleSource.getNode()) {
16277 if (Op.getOperand(0) != SingleSource)
16280 SingleSource = Op.getOperand(0);
16282 // Check the source type is the same as the type of the result.
16283 // If not, this concat may extend the vector, so we can not
16284 // optimize it away.
16285 if (SingleSource.getValueType() != N->getValueType(0))
16289 unsigned IdentityIndex = i * PartNumElem;
16290 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
16291 // The extract index must be constant.
16295 // Check that we are reading from the identity index.
16296 if (CS->getZExtValue() != IdentityIndex)
16300 if (SingleSource.getNode())
16301 return SingleSource;
16306 /// If we are extracting a subvector produced by a wide binary operator with at
16307 /// at least one operand that was the result of a vector concatenation, then try
16308 /// to use the narrow vector operands directly to avoid the concatenation and
16310 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
16311 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
16312 // some of these bailouts with other transforms.
16314 // The extract index must be a constant, so we can map it to a concat operand.
16315 auto *ExtractIndex = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16319 // Only handle the case where we are doubling and then halving. A larger ratio
16320 // may require more than two narrow binops to replace the wide binop.
16321 EVT VT = Extract->getValueType(0);
16322 unsigned NumElems = VT.getVectorNumElements();
16323 assert((ExtractIndex->getZExtValue() % NumElems) == 0 &&
16324 "Extract index is not a multiple of the vector length.");
16325 if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2)
16328 // We are looking for an optionally bitcasted wide vector binary operator
16329 // feeding an extract subvector.
16330 SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
16332 // TODO: The motivating case for this transform is an x86 AVX1 target. That
16333 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
16334 // flavors, but no other 256-bit integer support. This could be extended to
16335 // handle any binop, but that may require fixing/adding other folds to avoid
16336 // codegen regressions.
16337 unsigned BOpcode = BinOp.getOpcode();
16338 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
16341 // The binop must be a vector type, so we can chop it in half.
16342 EVT WideBVT = BinOp.getValueType();
16343 if (!WideBVT.isVector())
16346 // Bail out if the target does not support a narrower version of the binop.
16347 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
16348 WideBVT.getVectorNumElements() / 2);
16349 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16350 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
16353 // Peek through bitcasts of the binary operator operands if needed.
16354 SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
16355 SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
16357 // We need at least one concatenation operation of a binop operand to make
16358 // this transform worthwhile. The concat must double the input vector sizes.
16359 // TODO: Should we also handle INSERT_SUBVECTOR patterns?
16361 LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
16363 RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
16364 if (!ConcatL && !ConcatR)
16367 // If one of the binop operands was not the result of a concat, we must
16368 // extract a half-sized operand for our new narrow binop. We can't just reuse
16369 // the original extract index operand because we may have bitcasted.
16370 unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems;
16371 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
16372 EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
16375 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
16376 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
16377 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
16378 SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
16379 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16380 BinOp.getOperand(0),
16381 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16383 SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
16384 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
16385 BinOp.getOperand(1),
16386 DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
16388 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
16389 return DAG.getBitcast(VT, NarrowBinOp);
16392 /// If we are extracting a subvector from a wide vector load, convert to a
16393 /// narrow load to eliminate the extraction:
16394 /// (extract_subvector (load wide vector)) --> (load narrow vector)
16395 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
16396 // TODO: Add support for big-endian. The offset calculation must be adjusted.
16397 if (DAG.getDataLayout().isBigEndian())
16400 // TODO: The one-use check is overly conservative. Check the cost of the
16401 // extract instead or remove that condition entirely.
16402 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
16403 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
16404 if (!Ld || !Ld->hasOneUse() || Ld->getExtensionType() || Ld->isVolatile() ||
16408 // The narrow load will be offset from the base address of the old load if
16409 // we are extracting from something besides index 0 (little-endian).
16410 EVT VT = Extract->getValueType(0);
16412 SDValue BaseAddr = Ld->getOperand(1);
16413 unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
16415 // TODO: Use "BaseIndexOffset" to make this more effective.
16416 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
16417 MachineFunction &MF = DAG.getMachineFunction();
16418 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
16419 VT.getStoreSize());
16420 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
16421 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
16425 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
16426 EVT NVT = N->getValueType(0);
16427 SDValue V = N->getOperand(0);
16429 // Extract from UNDEF is UNDEF.
16431 return DAG.getUNDEF(NVT);
16433 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
16434 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
16438 // (extract_subvec (concat V1, V2, ...), i)
16441 // Only operand 0 is checked as 'concat' assumes all inputs of the same
16443 if (V->getOpcode() == ISD::CONCAT_VECTORS &&
16444 isa<ConstantSDNode>(N->getOperand(1)) &&
16445 V->getOperand(0).getValueType() == NVT) {
16446 unsigned Idx = N->getConstantOperandVal(1);
16447 unsigned NumElems = NVT.getVectorNumElements();
16448 assert((Idx % NumElems) == 0 &&
16449 "IDX in concat is not a multiple of the result vector length.");
16450 return V->getOperand(Idx / NumElems);
16454 V = peekThroughBitcast(V);
16456 // If the input is a build vector. Try to make a smaller build vector.
16457 if (V->getOpcode() == ISD::BUILD_VECTOR) {
16458 if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
16459 EVT InVT = V->getValueType(0);
16460 unsigned ExtractSize = NVT.getSizeInBits();
16461 unsigned EltSize = InVT.getScalarSizeInBits();
16462 // Only do this if we won't split any elements.
16463 if (ExtractSize % EltSize == 0) {
16464 unsigned NumElems = ExtractSize / EltSize;
16465 EVT EltVT = InVT.getVectorElementType();
16466 EVT ExtractVT = NumElems == 1 ? EltVT :
16467 EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
16468 if ((Level < AfterLegalizeDAG ||
16470 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
16471 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
16472 unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
16474 if (NumElems == 1) {
16475 SDValue Src = V->getOperand(IdxVal);
16476 if (EltVT != Src.getValueType())
16477 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
16479 return DAG.getBitcast(NVT, Src);
16482 // Extract the pieces from the original build_vector.
16483 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
16484 makeArrayRef(V->op_begin() + IdxVal,
16486 return DAG.getBitcast(NVT, BuildVec);
16492 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
16493 // Handle only simple case where vector being inserted and vector
16494 // being extracted are of same size.
16495 EVT SmallVT = V->getOperand(1).getValueType();
16496 if (!NVT.bitsEq(SmallVT))
16499 // Only handle cases where both indexes are constants.
16500 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
16501 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
16503 if (InsIdx && ExtIdx) {
16505 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
16507 // indices are equal or bit offsets are equal => V1
16508 // otherwise => (extract_subvec V1, ExtIdx)
16509 if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
16510 ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
16511 return DAG.getBitcast(NVT, V->getOperand(1));
16512 return DAG.getNode(
16513 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
16514 DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
16519 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
16522 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
16523 return SDValue(N, 0);
16528 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
16529 // or turn a shuffle of a single concat into simpler shuffle then concat.
16530 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
16531 EVT VT = N->getValueType(0);
16532 unsigned NumElts = VT.getVectorNumElements();
16534 SDValue N0 = N->getOperand(0);
16535 SDValue N1 = N->getOperand(1);
16536 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16538 SmallVector<SDValue, 4> Ops;
16539 EVT ConcatVT = N0.getOperand(0).getValueType();
16540 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
16541 unsigned NumConcats = NumElts / NumElemsPerConcat;
16543 // Special case: shuffle(concat(A,B)) can be more efficiently represented
16544 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
16545 // half vector elements.
16546 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
16547 std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
16548 SVN->getMask().end(), [](int i) { return i == -1; })) {
16549 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
16550 makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
16551 N1 = DAG.getUNDEF(ConcatVT);
16552 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
16555 // Look at every vector that's inserted. We're looking for exact
16556 // subvector-sized copies from a concatenated vector
16557 for (unsigned I = 0; I != NumConcats; ++I) {
16558 // Make sure we're dealing with a copy.
16559 unsigned Begin = I * NumElemsPerConcat;
16560 bool AllUndef = true, NoUndef = true;
16561 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
16562 if (SVN->getMaskElt(J) >= 0)
16569 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
16572 for (unsigned J = 1; J != NumElemsPerConcat; ++J)
16573 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
16576 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
16577 if (FirstElt < N0.getNumOperands())
16578 Ops.push_back(N0.getOperand(FirstElt));
16580 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
16582 } else if (AllUndef) {
16583 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
16584 } else { // Mixed with general masks and undefs, can't do optimization.
16589 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
16592 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
16593 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
16595 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
16596 // a simplification in some sense, but it isn't appropriate in general: some
16597 // BUILD_VECTORs are substantially cheaper than others. The general case
16598 // of a BUILD_VECTOR requires inserting each element individually (or
16599 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
16600 // all constants is a single constant pool load. A BUILD_VECTOR where each
16601 // element is identical is a splat. A BUILD_VECTOR where most of the operands
16602 // are undef lowers to a small number of element insertions.
16604 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
16605 // We don't fold shuffles where one side is a non-zero constant, and we don't
16606 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
16607 // non-constant operands. This seems to work out reasonably well in practice.
16608 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
16610 const TargetLowering &TLI) {
16611 EVT VT = SVN->getValueType(0);
16612 unsigned NumElts = VT.getVectorNumElements();
16613 SDValue N0 = SVN->getOperand(0);
16614 SDValue N1 = SVN->getOperand(1);
16616 if (!N0->hasOneUse() || !N1->hasOneUse())
16619 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
16620 // discussed above.
16621 if (!N1.isUndef()) {
16622 bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
16623 bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
16624 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
16626 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
16630 // If both inputs are splats of the same value then we can safely merge this
16631 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
16632 bool IsSplat = false;
16633 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
16634 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
16636 if (SDValue Splat0 = BV0->getSplatValue())
16637 IsSplat = (Splat0 == BV1->getSplatValue());
16639 SmallVector<SDValue, 8> Ops;
16640 SmallSet<SDValue, 16> DuplicateOps;
16641 for (int M : SVN->getMask()) {
16642 SDValue Op = DAG.getUNDEF(VT.getScalarType());
16644 int Idx = M < (int)NumElts ? M : M - NumElts;
16645 SDValue &S = (M < (int)NumElts ? N0 : N1);
16646 if (S.getOpcode() == ISD::BUILD_VECTOR) {
16647 Op = S.getOperand(Idx);
16648 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16649 assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
16650 Op = S.getOperand(0);
16652 // Operand can't be combined - bail out.
16657 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
16658 // generating a splat; semantically, this is fine, but it's likely to
16659 // generate low-quality code if the target can't reconstruct an appropriate
16661 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
16662 if (!IsSplat && !DuplicateOps.insert(Op).second)
16668 // BUILD_VECTOR requires all inputs to be of the same type, find the
16669 // maximum type and extend them all.
16670 EVT SVT = VT.getScalarType();
16671 if (SVT.isInteger())
16672 for (SDValue &Op : Ops)
16673 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
16674 if (SVT != VT.getScalarType())
16675 for (SDValue &Op : Ops)
16676 Op = TLI.isZExtFree(Op.getValueType(), SVT)
16677 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
16678 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
16679 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
16682 // Match shuffles that can be converted to any_vector_extend_in_reg.
16683 // This is often generated during legalization.
16684 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
16685 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
16686 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
16688 const TargetLowering &TLI,
16689 bool LegalOperations,
16691 EVT VT = SVN->getValueType(0);
16692 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16694 // TODO Add support for big-endian when we have a test case.
16695 if (!VT.isInteger() || IsBigEndian)
16698 unsigned NumElts = VT.getVectorNumElements();
16699 unsigned EltSizeInBits = VT.getScalarSizeInBits();
16700 ArrayRef<int> Mask = SVN->getMask();
16701 SDValue N0 = SVN->getOperand(0);
16703 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
16704 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
16705 for (unsigned i = 0; i != NumElts; ++i) {
16708 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
16715 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
16716 // power-of-2 extensions as they are the most likely.
16717 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
16718 // Check for non power of 2 vector sizes
16719 if (NumElts % Scale != 0)
16721 if (!isAnyExtend(Scale))
16724 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
16725 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
16726 if (!LegalTypes || TLI.isTypeLegal(OutVT))
16727 if (!LegalOperations ||
16728 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
16729 return DAG.getBitcast(VT,
16730 DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
16736 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
16737 // each source element of a large type into the lowest elements of a smaller
16738 // destination type. This is often generated during legalization.
16739 // If the source node itself was a '*_extend_vector_inreg' node then we should
16740 // then be able to remove it.
16741 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
16742 SelectionDAG &DAG) {
16743 EVT VT = SVN->getValueType(0);
16744 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
16746 // TODO Add support for big-endian when we have a test case.
16747 if (!VT.isInteger() || IsBigEndian)
16750 SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
16752 unsigned Opcode = N0.getOpcode();
16753 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
16754 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
16755 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
16758 SDValue N00 = N0.getOperand(0);
16759 ArrayRef<int> Mask = SVN->getMask();
16760 unsigned NumElts = VT.getVectorNumElements();
16761 unsigned EltSizeInBits = VT.getScalarSizeInBits();
16762 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
16763 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
16765 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
16767 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
16769 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
16770 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
16771 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
16772 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
16773 for (unsigned i = 0; i != NumElts; ++i) {
16776 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
16783 // At the moment we just handle the case where we've truncated back to the
16784 // same size as before the extension.
16785 // TODO: handle more extension/truncation cases as cases arise.
16786 if (EltSizeInBits != ExtSrcSizeInBits)
16789 // We can remove *extend_vector_inreg only if the truncation happens at
16790 // the same scale as the extension.
16791 if (isTruncate(ExtScale))
16792 return DAG.getBitcast(VT, N00);
16797 // Combine shuffles of splat-shuffles of the form:
16798 // shuffle (shuffle V, undef, splat-mask), undef, M
16799 // If splat-mask contains undef elements, we need to be careful about
16800 // introducing undef's in the folded mask which are not the result of composing
16801 // the masks of the shuffles.
16802 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
16803 ShuffleVectorSDNode *Splat,
16804 SelectionDAG &DAG) {
16805 ArrayRef<int> SplatMask = Splat->getMask();
16806 assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
16808 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
16809 // every undef mask element in the splat-shuffle has a corresponding undef
16810 // element in the user-shuffle's mask or if the composition of mask elements
16811 // would result in undef.
16812 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
16813 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
16814 // In this case it is not legal to simplify to the splat-shuffle because we
16815 // may be exposing the users of the shuffle an undef element at index 1
16816 // which was not there before the combine.
16817 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
16818 // In this case the composition of masks yields SplatMask, so it's ok to
16819 // simplify to the splat-shuffle.
16820 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
16821 // In this case the composed mask includes all undef elements of SplatMask
16822 // and in addition sets element zero to undef. It is safe to simplify to
16823 // the splat-shuffle.
16824 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
16825 ArrayRef<int> SplatMask) {
16826 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
16827 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
16828 SplatMask[UserMask[i]] != -1)
16832 if (CanSimplifyToExistingSplat(UserMask, SplatMask))
16833 return SDValue(Splat, 0);
16835 // Create a new shuffle with a mask that is composed of the two shuffles'
16837 SmallVector<int, 32> NewMask;
16838 for (int Idx : UserMask)
16839 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
16841 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
16842 Splat->getOperand(0), Splat->getOperand(1),
16846 /// If the shuffle mask is taking exactly one element from the first vector
16847 /// operand and passing through all other elements from the second vector
16848 /// operand, return the index of the mask element that is choosing an element
16849 /// from the first operand. Otherwise, return -1.
16850 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
16851 int MaskSize = Mask.size();
16852 int EltFromOp0 = -1;
16853 // TODO: This does not match if there are undef elements in the shuffle mask.
16854 // Should we ignore undefs in the shuffle mask instead? The trade-off is
16855 // removing an instruction (a shuffle), but losing the knowledge that some
16856 // vector lanes are not needed.
16857 for (int i = 0; i != MaskSize; ++i) {
16858 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
16859 // We're looking for a shuffle of exactly one element from operand 0.
16860 if (EltFromOp0 != -1)
16863 } else if (Mask[i] != i + MaskSize) {
16864 // Nothing from operand 1 can change lanes.
16871 /// If a shuffle inserts exactly one element from a source vector operand into
16872 /// another vector operand and we can access the specified element as a scalar,
16873 /// then we can eliminate the shuffle.
16874 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
16875 SelectionDAG &DAG) {
16876 // First, check if we are taking one element of a vector and shuffling that
16877 // element into another vector.
16878 ArrayRef<int> Mask = Shuf->getMask();
16879 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
16880 SDValue Op0 = Shuf->getOperand(0);
16881 SDValue Op1 = Shuf->getOperand(1);
16882 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
16883 if (ShufOp0Index == -1) {
16884 // Commute mask and check again.
16885 ShuffleVectorSDNode::commuteMask(CommutedMask);
16886 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
16887 if (ShufOp0Index == -1)
16889 // Commute operands to match the commuted shuffle mask.
16890 std::swap(Op0, Op1);
16891 Mask = CommutedMask;
16894 // The shuffle inserts exactly one element from operand 0 into operand 1.
16895 // Now see if we can access that element as a scalar via a real insert element
16897 // TODO: We can try harder to locate the element as a scalar. Examples: it
16898 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
16899 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
16900 "Shuffle mask value must be from operand 0");
16901 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
16904 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
16905 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
16908 // There's an existing insertelement with constant insertion index, so we
16909 // don't need to check the legality/profitability of a replacement operation
16910 // that differs at most in the constant value. The target should be able to
16911 // lower any of those in a similar way. If not, legalization will expand this
16912 // to a scalar-to-vector plus shuffle.
16914 // Note that the shuffle may move the scalar from the position that the insert
16915 // element used. Therefore, our new insert element occurs at the shuffle's
16916 // mask index value, not the insert's index value.
16917 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
16918 SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
16919 Op0.getOperand(2).getValueType());
16920 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
16921 Op1, Op0.getOperand(1), NewInsIndex);
16924 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
16925 EVT VT = N->getValueType(0);
16926 unsigned NumElts = VT.getVectorNumElements();
16928 SDValue N0 = N->getOperand(0);
16929 SDValue N1 = N->getOperand(1);
16931 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
16933 // Canonicalize shuffle undef, undef -> undef
16934 if (N0.isUndef() && N1.isUndef())
16935 return DAG.getUNDEF(VT);
16937 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
16939 // Canonicalize shuffle v, v -> v, undef
16941 SmallVector<int, 8> NewMask;
16942 for (unsigned i = 0; i != NumElts; ++i) {
16943 int Idx = SVN->getMaskElt(i);
16944 if (Idx >= (int)NumElts) Idx -= NumElts;
16945 NewMask.push_back(Idx);
16947 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
16950 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
16952 return DAG.getCommutedVectorShuffle(*SVN);
16954 // Remove references to rhs if it is undef
16955 if (N1.isUndef()) {
16956 bool Changed = false;
16957 SmallVector<int, 8> NewMask;
16958 for (unsigned i = 0; i != NumElts; ++i) {
16959 int Idx = SVN->getMaskElt(i);
16960 if (Idx >= (int)NumElts) {
16964 NewMask.push_back(Idx);
16967 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
16970 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
16973 // A shuffle of a single vector that is a splat can always be folded.
16974 if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
16975 if (N1->isUndef() && N0Shuf->isSplat())
16976 return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
16978 // If it is a splat, check if the argument vector is another splat or a
16980 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
16981 SDNode *V = N0.getNode();
16983 // If this is a bit convert that changes the element type of the vector but
16984 // not the number of vector elements, look through it. Be careful not to
16985 // look though conversions that change things like v4f32 to v2f64.
16986 if (V->getOpcode() == ISD::BITCAST) {
16987 SDValue ConvInput = V->getOperand(0);
16988 if (ConvInput.getValueType().isVector() &&
16989 ConvInput.getValueType().getVectorNumElements() == NumElts)
16990 V = ConvInput.getNode();
16993 if (V->getOpcode() == ISD::BUILD_VECTOR) {
16994 assert(V->getNumOperands() == NumElts &&
16995 "BUILD_VECTOR has wrong number of operands");
16997 bool AllSame = true;
16998 for (unsigned i = 0; i != NumElts; ++i) {
16999 if (!V->getOperand(i).isUndef()) {
17000 Base = V->getOperand(i);
17004 // Splat of <u, u, u, u>, return <u, u, u, u>
17005 if (!Base.getNode())
17007 for (unsigned i = 0; i != NumElts; ++i) {
17008 if (V->getOperand(i) != Base) {
17013 // Splat of <x, x, x, x>, return <x, x, x, x>
17017 // Canonicalize any other splat as a build_vector.
17018 const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17019 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17020 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17022 // We may have jumped through bitcasts, so the type of the
17023 // BUILD_VECTOR may not match the type of the shuffle.
17024 if (V->getValueType(0) != VT)
17025 NewBV = DAG.getBitcast(VT, NewBV);
17030 // Simplify source operands based on shuffle mask.
17031 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17032 return SDValue(N, 0);
17034 // Match shuffles that can be converted to any_vector_extend_in_reg.
17035 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
17038 // Combine "truncate_vector_in_reg" style shuffles.
17039 if (SDValue V = combineTruncationShuffle(SVN, DAG))
17042 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17043 Level < AfterLegalizeVectorOps &&
17045 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17046 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17047 if (SDValue V = partitionShuffleOfConcats(N, DAG))
17051 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17052 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17053 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17054 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17057 // If this shuffle only has a single input that is a bitcasted shuffle,
17058 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17059 // back to their original types.
17060 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17061 N1.isUndef() && Level < AfterLegalizeVectorOps &&
17062 TLI.isTypeLegal(VT)) {
17064 // Peek through the bitcast only if there is one user.
17066 while (BC0.getOpcode() == ISD::BITCAST) {
17067 if (!BC0.hasOneUse())
17069 BC0 = BC0.getOperand(0);
17072 auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17074 return SmallVector<int, 8>(Mask.begin(), Mask.end());
17076 SmallVector<int, 8> NewMask;
17078 for (int s = 0; s != Scale; ++s)
17079 NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17083 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17084 EVT SVT = VT.getScalarType();
17085 EVT InnerVT = BC0->getValueType(0);
17086 EVT InnerSVT = InnerVT.getScalarType();
17088 // Determine which shuffle works with the smaller scalar type.
17089 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17090 EVT ScaleSVT = ScaleVT.getScalarType();
17092 if (TLI.isTypeLegal(ScaleVT) &&
17093 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17094 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17095 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17096 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17098 // Scale the shuffle masks to the smaller scalar type.
17099 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17100 SmallVector<int, 8> InnerMask =
17101 ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17102 SmallVector<int, 8> OuterMask =
17103 ScaleShuffleMask(SVN->getMask(), OuterScale);
17105 // Merge the shuffle masks.
17106 SmallVector<int, 8> NewMask;
17107 for (int M : OuterMask)
17108 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17110 // Test for shuffle mask legality over both commutations.
17111 SDValue SV0 = BC0->getOperand(0);
17112 SDValue SV1 = BC0->getOperand(1);
17113 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17115 std::swap(SV0, SV1);
17116 ShuffleVectorSDNode::commuteMask(NewMask);
17117 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17121 SV0 = DAG.getBitcast(ScaleVT, SV0);
17122 SV1 = DAG.getBitcast(ScaleVT, SV1);
17123 return DAG.getBitcast(
17124 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17130 // Canonicalize shuffles according to rules:
17131 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17132 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17133 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17134 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17135 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17136 TLI.isTypeLegal(VT)) {
17137 // The incoming shuffle must be of the same type as the result of the
17138 // current shuffle.
17139 assert(N1->getOperand(0).getValueType() == VT &&
17140 "Shuffle types don't match");
17142 SDValue SV0 = N1->getOperand(0);
17143 SDValue SV1 = N1->getOperand(1);
17144 bool HasSameOp0 = N0 == SV0;
17145 bool IsSV1Undef = SV1.isUndef();
17146 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
17147 // Commute the operands of this shuffle so that next rule
17149 return DAG.getCommutedVectorShuffle(*SVN);
17152 // Try to fold according to rules:
17153 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17154 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17155 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17156 // Don't try to fold shuffles with illegal type.
17157 // Only fold if this shuffle is the only user of the other shuffle.
17158 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
17159 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
17160 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
17162 // Don't try to fold splats; they're likely to simplify somehow, or they
17164 if (OtherSV->isSplat())
17167 // The incoming shuffle must be of the same type as the result of the
17168 // current shuffle.
17169 assert(OtherSV->getOperand(0).getValueType() == VT &&
17170 "Shuffle types don't match");
17173 SmallVector<int, 4> Mask;
17174 // Compute the combined shuffle mask for a shuffle with SV0 as the first
17175 // operand, and SV1 as the second operand.
17176 for (unsigned i = 0; i != NumElts; ++i) {
17177 int Idx = SVN->getMaskElt(i);
17179 // Propagate Undef.
17180 Mask.push_back(Idx);
17184 SDValue CurrentVec;
17185 if (Idx < (int)NumElts) {
17186 // This shuffle index refers to the inner shuffle N0. Lookup the inner
17187 // shuffle mask to identify which vector is actually referenced.
17188 Idx = OtherSV->getMaskElt(Idx);
17190 // Propagate Undef.
17191 Mask.push_back(Idx);
17195 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
17196 : OtherSV->getOperand(1);
17198 // This shuffle index references an element within N1.
17202 // Simple case where 'CurrentVec' is UNDEF.
17203 if (CurrentVec.isUndef()) {
17204 Mask.push_back(-1);
17208 // Canonicalize the shuffle index. We don't know yet if CurrentVec
17209 // will be the first or second operand of the combined shuffle.
17210 Idx = Idx % NumElts;
17211 if (!SV0.getNode() || SV0 == CurrentVec) {
17212 // Ok. CurrentVec is the left hand side.
17213 // Update the mask accordingly.
17215 Mask.push_back(Idx);
17219 // Bail out if we cannot convert the shuffle pair into a single shuffle.
17220 if (SV1.getNode() && SV1 != CurrentVec)
17223 // Ok. CurrentVec is the right hand side.
17224 // Update the mask accordingly.
17226 Mask.push_back(Idx + NumElts);
17229 // Check if all indices in Mask are Undef. In case, propagate Undef.
17230 bool isUndefMask = true;
17231 for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
17232 isUndefMask &= Mask[i] < 0;
17235 return DAG.getUNDEF(VT);
17237 if (!SV0.getNode())
17238 SV0 = DAG.getUNDEF(VT);
17239 if (!SV1.getNode())
17240 SV1 = DAG.getUNDEF(VT);
17242 // Avoid introducing shuffles with illegal mask.
17243 if (!TLI.isShuffleMaskLegal(Mask, VT)) {
17244 ShuffleVectorSDNode::commuteMask(Mask);
17246 if (!TLI.isShuffleMaskLegal(Mask, VT))
17249 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
17250 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
17251 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
17252 std::swap(SV0, SV1);
17255 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
17256 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
17257 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
17258 return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
17264 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
17265 SDValue InVal = N->getOperand(0);
17266 EVT VT = N->getValueType(0);
17268 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
17269 // with a VECTOR_SHUFFLE and possible truncate.
17270 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
17271 SDValue InVec = InVal->getOperand(0);
17272 SDValue EltNo = InVal->getOperand(1);
17273 auto InVecT = InVec.getValueType();
17274 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
17275 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
17276 int Elt = C0->getZExtValue();
17279 // If we have an implict truncate do truncate here as long as it's legal.
17280 // if it's not legal, this should
17281 if (VT.getScalarType() != InVal.getValueType() &&
17282 InVal.getValueType().isScalarInteger() &&
17283 isTypeLegal(VT.getScalarType())) {
17285 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
17286 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
17288 if (VT.getScalarType() == InVecT.getScalarType() &&
17289 VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
17290 TLI.isShuffleMaskLegal(NewMask, VT)) {
17291 Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
17292 DAG.getUNDEF(InVecT), NewMask);
17293 // If the initial vector is the correct size this shuffle is a
17297 // If not we must truncate the vector.
17298 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
17299 MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17300 SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
17302 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
17303 VT.getVectorNumElements());
17304 Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
17315 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
17316 EVT VT = N->getValueType(0);
17317 SDValue N0 = N->getOperand(0);
17318 SDValue N1 = N->getOperand(1);
17319 SDValue N2 = N->getOperand(2);
17321 // If inserting an UNDEF, just return the original vector.
17325 // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
17326 // us to pull BITCASTs from input to output.
17327 if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
17328 if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
17329 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
17331 // If this is an insert of an extracted vector into an undef vector, we can
17332 // just use the input to the extract.
17333 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17334 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
17335 return N1.getOperand(0);
17337 // If we are inserting a bitcast value into an undef, with the same
17338 // number of elements, just use the bitcast input of the extract.
17339 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
17340 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
17341 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
17342 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17343 N1.getOperand(0).getOperand(1) == N2 &&
17344 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
17345 VT.getVectorNumElements() &&
17346 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
17347 VT.getSizeInBits()) {
17348 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
17351 // If both N1 and N2 are bitcast values on which insert_subvector
17352 // would makes sense, pull the bitcast through.
17353 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
17354 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
17355 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
17356 SDValue CN0 = N0.getOperand(0);
17357 SDValue CN1 = N1.getOperand(0);
17358 EVT CN0VT = CN0.getValueType();
17359 EVT CN1VT = CN1.getValueType();
17360 if (CN0VT.isVector() && CN1VT.isVector() &&
17361 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
17362 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
17363 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
17364 CN0.getValueType(), CN0, CN1, N2);
17365 return DAG.getBitcast(VT, NewINSERT);
17369 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
17370 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
17371 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
17372 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17373 N0.getOperand(1).getValueType() == N1.getValueType() &&
17374 N0.getOperand(2) == N2)
17375 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
17378 if (!isa<ConstantSDNode>(N2))
17381 unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
17383 // Canonicalize insert_subvector dag nodes.
17385 // (insert_subvector (insert_subvector A, Idx0), Idx1)
17386 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
17387 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
17388 N1.getValueType() == N0.getOperand(1).getValueType() &&
17389 isa<ConstantSDNode>(N0.getOperand(2))) {
17390 unsigned OtherIdx = N0.getConstantOperandVal(2);
17391 if (InsIdx < OtherIdx) {
17393 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
17394 N0.getOperand(0), N1, N2);
17395 AddToWorklist(NewOp.getNode());
17396 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
17397 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
17401 // If the input vector is a concatenation, and the insert replaces
17402 // one of the pieces, we can optimize into a single concat_vectors.
17403 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
17404 N0.getOperand(0).getValueType() == N1.getValueType()) {
17405 unsigned Factor = N1.getValueType().getVectorNumElements();
17407 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
17408 Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
17410 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17416 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
17417 SDValue N0 = N->getOperand(0);
17419 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
17420 if (N0->getOpcode() == ISD::FP16_TO_FP)
17421 return N0->getOperand(0);
17426 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
17427 SDValue N0 = N->getOperand(0);
17429 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
17430 if (N0->getOpcode() == ISD::AND) {
17431 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
17432 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
17433 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
17441 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
17442 /// with the destination vector and a zero vector.
17443 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
17444 /// vector_shuffle V, Zero, <0, 4, 2, 4>
17445 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
17446 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
17448 EVT VT = N->getValueType(0);
17449 SDValue LHS = N->getOperand(0);
17450 SDValue RHS = peekThroughBitcast(N->getOperand(1));
17453 // Make sure we're not running after operation legalization where it
17454 // may have custom lowered the vector shuffles.
17455 if (LegalOperations)
17458 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
17461 EVT RVT = RHS.getValueType();
17462 unsigned NumElts = RHS.getNumOperands();
17464 // Attempt to create a valid clear mask, splitting the mask into
17465 // sub elements and checking to see if each is
17466 // all zeros or all ones - suitable for shuffle masking.
17467 auto BuildClearMask = [&](int Split) {
17468 int NumSubElts = NumElts * Split;
17469 int NumSubBits = RVT.getScalarSizeInBits() / Split;
17471 SmallVector<int, 8> Indices;
17472 for (int i = 0; i != NumSubElts; ++i) {
17473 int EltIdx = i / Split;
17474 int SubIdx = i % Split;
17475 SDValue Elt = RHS.getOperand(EltIdx);
17476 if (Elt.isUndef()) {
17477 Indices.push_back(-1);
17482 if (isa<ConstantSDNode>(Elt))
17483 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
17484 else if (isa<ConstantFPSDNode>(Elt))
17485 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
17489 // Extract the sub element from the constant bit mask.
17490 if (DAG.getDataLayout().isBigEndian()) {
17491 Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
17493 Bits.lshrInPlace(SubIdx * NumSubBits);
17497 Bits = Bits.trunc(NumSubBits);
17499 if (Bits.isAllOnesValue())
17500 Indices.push_back(i);
17501 else if (Bits == 0)
17502 Indices.push_back(i + NumSubElts);
17507 // Let's see if the target supports this vector_shuffle.
17508 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
17509 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
17510 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
17513 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
17514 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
17515 DAG.getBitcast(ClearVT, LHS),
17519 // Determine maximum split level (byte level masking).
17521 if (RVT.getScalarSizeInBits() % 8 == 0)
17522 MaxSplit = RVT.getScalarSizeInBits() / 8;
17524 for (int Split = 1; Split <= MaxSplit; ++Split)
17525 if (RVT.getScalarSizeInBits() % Split == 0)
17526 if (SDValue S = BuildClearMask(Split))
17532 /// Visit a binary vector operation, like ADD.
17533 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
17534 assert(N->getValueType(0).isVector() &&
17535 "SimplifyVBinOp only works on vectors!");
17537 SDValue LHS = N->getOperand(0);
17538 SDValue RHS = N->getOperand(1);
17539 SDValue Ops[] = {LHS, RHS};
17541 // See if we can constant fold the vector operation.
17542 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
17543 N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
17546 // Type legalization might introduce new shuffles in the DAG.
17547 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
17548 // -> (shuffle (VBinOp (A, B)), Undef, Mask).
17549 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
17550 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
17551 LHS.getOperand(1).isUndef() &&
17552 RHS.getOperand(1).isUndef()) {
17553 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
17554 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
17556 if (SVN0->getMask().equals(SVN1->getMask())) {
17557 EVT VT = N->getValueType(0);
17558 SDValue UndefVector = LHS.getOperand(1);
17559 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
17560 LHS.getOperand(0), RHS.getOperand(0),
17562 AddUsersToWorklist(N);
17563 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
17571 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
17573 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
17575 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
17576 cast<CondCodeSDNode>(N0.getOperand(2))->get());
17578 // If we got a simplified select_cc node back from SimplifySelectCC, then
17579 // break it down into a new SETCC node, and a new SELECT node, and then return
17580 // the SELECT node, since we were called with a SELECT node.
17581 if (SCC.getNode()) {
17582 // Check to see if we got a select_cc back (to turn into setcc/select).
17583 // Otherwise, just return whatever node we got back, like fabs.
17584 if (SCC.getOpcode() == ISD::SELECT_CC) {
17585 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
17587 SCC.getOperand(0), SCC.getOperand(1),
17588 SCC.getOperand(4));
17589 AddToWorklist(SETCC.getNode());
17590 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
17591 SCC.getOperand(2), SCC.getOperand(3));
17599 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
17600 /// being selected between, see if we can simplify the select. Callers of this
17601 /// should assume that TheSelect is deleted if this returns true. As such, they
17602 /// should return the appropriate thing (e.g. the node) back to the top-level of
17603 /// the DAG combiner loop to avoid it being looked at.
17604 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
17606 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17607 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
17608 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
17609 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
17610 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
17611 SDValue Sqrt = RHS;
17614 const ConstantFPSDNode *Zero = nullptr;
17616 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
17617 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
17618 CmpLHS = TheSelect->getOperand(0);
17619 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
17621 // SELECT or VSELECT
17622 SDValue Cmp = TheSelect->getOperand(0);
17623 if (Cmp.getOpcode() == ISD::SETCC) {
17624 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
17625 CmpLHS = Cmp.getOperand(0);
17626 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
17629 if (Zero && Zero->isZero() &&
17630 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
17631 CC == ISD::SETULT || CC == ISD::SETLT)) {
17632 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
17633 CombineTo(TheSelect, Sqrt);
17638 // Cannot simplify select with vector condition
17639 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
17641 // If this is a select from two identical things, try to pull the operation
17642 // through the select.
17643 if (LHS.getOpcode() != RHS.getOpcode() ||
17644 !LHS.hasOneUse() || !RHS.hasOneUse())
17647 // If this is a load and the token chain is identical, replace the select
17648 // of two loads with a load through a select of the address to load from.
17649 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
17650 // constants have been dropped into the constant pool.
17651 if (LHS.getOpcode() == ISD::LOAD) {
17652 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
17653 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
17655 // Token chains must be identical.
17656 if (LHS.getOperand(0) != RHS.getOperand(0) ||
17657 // Do not let this transformation reduce the number of volatile loads.
17658 LLD->isVolatile() || RLD->isVolatile() ||
17659 // FIXME: If either is a pre/post inc/dec load,
17660 // we'd need to split out the address adjustment.
17661 LLD->isIndexed() || RLD->isIndexed() ||
17662 // If this is an EXTLOAD, the VT's must match.
17663 LLD->getMemoryVT() != RLD->getMemoryVT() ||
17664 // If this is an EXTLOAD, the kind of extension must match.
17665 (LLD->getExtensionType() != RLD->getExtensionType() &&
17666 // The only exception is if one of the extensions is anyext.
17667 LLD->getExtensionType() != ISD::EXTLOAD &&
17668 RLD->getExtensionType() != ISD::EXTLOAD) ||
17669 // FIXME: this discards src value information. This is
17670 // over-conservative. It would be beneficial to be able to remember
17671 // both potential memory locations. Since we are discarding
17672 // src value info, don't do the transformation if the memory
17673 // locations are not in the default address space.
17674 LLD->getPointerInfo().getAddrSpace() != 0 ||
17675 RLD->getPointerInfo().getAddrSpace() != 0 ||
17676 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
17677 LLD->getBasePtr().getValueType()))
17680 // Check that the select condition doesn't reach either load. If so,
17681 // folding this will induce a cycle into the DAG. If not, this is safe to
17682 // xform, so create a select of the addresses.
17684 if (TheSelect->getOpcode() == ISD::SELECT) {
17685 SDNode *CondNode = TheSelect->getOperand(0).getNode();
17686 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
17687 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
17689 // The loads must not depend on one another.
17690 if (LLD->isPredecessorOf(RLD) ||
17691 RLD->isPredecessorOf(LLD))
17693 Addr = DAG.getSelect(SDLoc(TheSelect),
17694 LLD->getBasePtr().getValueType(),
17695 TheSelect->getOperand(0), LLD->getBasePtr(),
17696 RLD->getBasePtr());
17697 } else { // Otherwise SELECT_CC
17698 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
17699 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
17701 if ((LLD->hasAnyUseOfValue(1) &&
17702 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
17703 (RLD->hasAnyUseOfValue(1) &&
17704 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
17707 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
17708 LLD->getBasePtr().getValueType(),
17709 TheSelect->getOperand(0),
17710 TheSelect->getOperand(1),
17711 LLD->getBasePtr(), RLD->getBasePtr(),
17712 TheSelect->getOperand(4));
17716 // It is safe to replace the two loads if they have different alignments,
17717 // but the new load must be the minimum (most restrictive) alignment of the
17719 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
17720 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
17721 if (!RLD->isInvariant())
17722 MMOFlags &= ~MachineMemOperand::MOInvariant;
17723 if (!RLD->isDereferenceable())
17724 MMOFlags &= ~MachineMemOperand::MODereferenceable;
17725 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
17726 // FIXME: Discards pointer and AA info.
17727 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
17728 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
17731 // FIXME: Discards pointer and AA info.
17732 Load = DAG.getExtLoad(
17733 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
17734 : LLD->getExtensionType(),
17735 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
17736 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
17739 // Users of the select now use the result of the load.
17740 CombineTo(TheSelect, Load);
17742 // Users of the old loads now use the new load's chain. We know the
17743 // old-load value is dead now.
17744 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
17745 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
17752 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
17754 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
17755 SDValue N1, SDValue N2, SDValue N3,
17756 ISD::CondCode CC) {
17757 // If this is a select where the false operand is zero and the compare is a
17758 // check of the sign bit, see if we can perform the "gzip trick":
17759 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
17760 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
17761 EVT XType = N0.getValueType();
17762 EVT AType = N2.getValueType();
17763 if (!isNullConstant(N3) || !XType.bitsGE(AType))
17766 // If the comparison is testing for a positive value, we have to invert
17767 // the sign bit mask, so only do that transform if the target has a bitwise
17768 // 'and not' instruction (the invert is free).
17769 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
17770 // (X > -1) ? A : 0
17771 // (X > 0) ? X : 0 <-- This is canonical signed max.
17772 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
17774 } else if (CC == ISD::SETLT) {
17776 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
17777 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
17783 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
17785 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
17786 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17787 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
17788 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
17789 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
17790 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
17791 AddToWorklist(Shift.getNode());
17793 if (XType.bitsGT(AType)) {
17794 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17795 AddToWorklist(Shift.getNode());
17798 if (CC == ISD::SETGT)
17799 Shift = DAG.getNOT(DL, Shift, AType);
17801 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17804 SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
17805 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
17806 AddToWorklist(Shift.getNode());
17808 if (XType.bitsGT(AType)) {
17809 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
17810 AddToWorklist(Shift.getNode());
17813 if (CC == ISD::SETGT)
17814 Shift = DAG.getNOT(DL, Shift, AType);
17816 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
17819 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
17820 /// where 'cond' is the comparison specified by CC.
17821 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
17822 SDValue N2, SDValue N3, ISD::CondCode CC,
17823 bool NotExtCompare) {
17824 // (x ? y : y) -> y.
17825 if (N2 == N3) return N2;
17827 EVT VT = N2.getValueType();
17828 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
17829 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
17831 // Determine if the condition we're dealing with is constant
17832 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
17833 N0, N1, CC, DL, false);
17834 if (SCC.getNode()) AddToWorklist(SCC.getNode());
17836 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
17837 // fold select_cc true, x, y -> x
17838 // fold select_cc false, x, y -> y
17839 return !SCCC->isNullValue() ? N2 : N3;
17842 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
17843 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
17844 // in it. This is a win when the constant is not otherwise available because
17845 // it replaces two constant pool loads with one. We only do this if the FP
17846 // type is known to be legal, because if it isn't, then we are before legalize
17847 // types an we want the other legalization to happen first (e.g. to avoid
17848 // messing with soft float) and if the ConstantFP is not legal, because if
17849 // it is legal, we may not need to store the FP constant in a constant pool.
17850 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
17851 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
17852 if (TLI.isTypeLegal(N2.getValueType()) &&
17853 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
17854 TargetLowering::Legal &&
17855 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
17856 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
17857 // If both constants have multiple uses, then we won't need to do an
17858 // extra load, they are likely around in registers for other users.
17859 (TV->hasOneUse() || FV->hasOneUse())) {
17860 Constant *Elts[] = {
17861 const_cast<ConstantFP*>(FV->getConstantFPValue()),
17862 const_cast<ConstantFP*>(TV->getConstantFPValue())
17864 Type *FPTy = Elts[0]->getType();
17865 const DataLayout &TD = DAG.getDataLayout();
17867 // Create a ConstantArray of the two constants.
17868 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
17870 DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
17871 TD.getPrefTypeAlignment(FPTy));
17872 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
17874 // Get the offsets to the 0 and 1 element of the array so that we can
17875 // select between them.
17876 SDValue Zero = DAG.getIntPtrConstant(0, DL);
17877 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
17878 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
17880 SDValue Cond = DAG.getSetCC(DL,
17881 getSetCCResultType(N0.getValueType()),
17883 AddToWorklist(Cond.getNode());
17884 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
17886 AddToWorklist(CstOffset.getNode());
17887 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
17889 AddToWorklist(CPIdx.getNode());
17890 return DAG.getLoad(
17891 TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
17892 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
17897 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
17900 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
17901 // where y is has a single bit set.
17902 // A plaintext description would be, we can turn the SELECT_CC into an AND
17903 // when the condition can be materialized as an all-ones register. Any
17904 // single bit-test can be materialized as an all-ones register with
17905 // shift-left and shift-right-arith.
17906 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
17907 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
17908 SDValue AndLHS = N0->getOperand(0);
17909 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17910 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
17911 // Shift the tested bit over the sign bit.
17912 const APInt &AndMask = ConstAndRHS->getAPIntValue();
17914 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
17915 getShiftAmountTy(AndLHS.getValueType()));
17916 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
17918 // Now arithmetic right shift it all the way over, so the result is either
17919 // all-ones, or zero.
17921 DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
17922 getShiftAmountTy(Shl.getValueType()));
17923 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
17925 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
17929 // fold select C, 16, 0 -> shl C, 4
17930 if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
17931 TLI.getBooleanContents(N0.getValueType()) ==
17932 TargetLowering::ZeroOrOneBooleanContent) {
17934 // If the caller doesn't want us to simplify this into a zext of a compare,
17936 if (NotExtCompare && N2C->isOne())
17939 // Get a SetCC of the condition
17940 // NOTE: Don't create a SETCC if it's not legal on this target.
17941 if (!LegalOperations ||
17942 TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
17944 // cast from setcc result type to select result type
17946 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
17948 if (N2.getValueType().bitsLT(SCC.getValueType()))
17949 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
17950 N2.getValueType());
17952 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17953 N2.getValueType(), SCC);
17955 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
17956 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
17957 N2.getValueType(), SCC);
17960 AddToWorklist(SCC.getNode());
17961 AddToWorklist(Temp.getNode());
17966 // shl setcc result by log2 n2c
17967 return DAG.getNode(
17968 ISD::SHL, DL, N2.getValueType(), Temp,
17969 DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
17970 getShiftAmountTy(Temp.getValueType())));
17974 // Check to see if this is an integer abs.
17975 // select_cc setg[te] X, 0, X, -X ->
17976 // select_cc setgt X, -1, X, -X ->
17977 // select_cc setl[te] X, 0, -X, X ->
17978 // select_cc setlt X, 1, -X, X ->
17979 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
17981 ConstantSDNode *SubC = nullptr;
17982 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
17983 (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
17984 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
17985 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
17986 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
17987 (N1C->isOne() && CC == ISD::SETLT)) &&
17988 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
17989 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
17991 EVT XType = N0.getValueType();
17992 if (SubC && SubC->isNullValue() && XType.isInteger()) {
17994 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
17996 DAG.getConstant(XType.getSizeInBits() - 1, DL,
17997 getShiftAmountTy(N0.getValueType())));
17998 SDValue Add = DAG.getNode(ISD::ADD, DL,
18000 AddToWorklist(Shift.getNode());
18001 AddToWorklist(Add.getNode());
18002 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
18006 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18007 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18008 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18009 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18010 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18011 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18012 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18013 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18014 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18015 SDValue ValueOnZero = N2;
18016 SDValue Count = N3;
18017 // If the condition is NE instead of E, swap the operands.
18018 if (CC == ISD::SETNE)
18019 std::swap(ValueOnZero, Count);
18020 // Check if the value on zero is a constant equal to the bits in the type.
18021 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18022 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18023 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18024 // legal, combine to just cttz.
18025 if ((Count.getOpcode() == ISD::CTTZ ||
18026 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18027 N0 == Count.getOperand(0) &&
18028 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18029 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18030 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18031 // legal, combine to just ctlz.
18032 if ((Count.getOpcode() == ISD::CTLZ ||
18033 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18034 N0 == Count.getOperand(0) &&
18035 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18036 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18044 /// This is a stub for TargetLowering::SimplifySetCC.
18045 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18046 ISD::CondCode Cond, const SDLoc &DL,
18047 bool foldBooleans) {
18048 TargetLowering::DAGCombinerInfo
18049 DagCombineInfo(DAG, Level, false, this);
18050 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18053 /// Given an ISD::SDIV node expressing a divide by constant, return
18054 /// a DAG expression to select that will generate the same value by multiplying
18055 /// by a magic number.
18056 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18057 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18058 // when optimising for minimum size, we don't want to expand a div to a mul
18060 if (DAG.getMachineFunction().getFunction().optForMinSize())
18063 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18067 // Avoid division by zero.
18068 if (C->isNullValue())
18071 SmallVector<SDNode *, 8> Built;
18073 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
18075 for (SDNode *N : Built)
18080 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
18081 /// DAG expression that will generate the same value by right shifting.
18082 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
18083 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18087 // Avoid division by zero.
18088 if (C->isNullValue())
18091 SmallVector<SDNode *, 8> Built;
18092 SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built);
18094 for (SDNode *N : Built)
18099 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
18100 /// expression that will generate the same value by multiplying by a magic
18102 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18103 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
18104 // when optimising for minimum size, we don't want to expand a div to a mul
18106 if (DAG.getMachineFunction().getFunction().optForMinSize())
18109 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
18113 // Avoid division by zero.
18114 if (C->isNullValue())
18117 SmallVector<SDNode *, 8> Built;
18119 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, Built);
18121 for (SDNode *N : Built)
18126 /// Determines the LogBase2 value for a non-null input value using the
18127 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
18128 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
18129 EVT VT = V.getValueType();
18130 unsigned EltBits = VT.getScalarSizeInBits();
18131 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
18132 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
18133 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
18137 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18138 /// For the reciprocal, we need to find the zero of the function:
18139 /// F(X) = A X - 1 [which has a zero at X = 1/A]
18141 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
18142 /// does not require additional intermediate precision]
18143 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
18144 if (Level >= AfterLegalizeDAG)
18147 // TODO: Handle half and/or extended types?
18148 EVT VT = Op.getValueType();
18149 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18152 // If estimates are explicitly disabled for this function, we're done.
18153 MachineFunction &MF = DAG.getMachineFunction();
18154 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
18155 if (Enabled == TLI.ReciprocalEstimate::Disabled)
18158 // Estimates may be explicitly enabled for this type with a custom number of
18159 // refinement steps.
18160 int Iterations = TLI.getDivRefinementSteps(VT, MF);
18161 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
18162 AddToWorklist(Est.getNode());
18165 EVT VT = Op.getValueType();
18167 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
18169 // Newton iterations: Est = Est + Est (1 - Arg * Est)
18170 for (int i = 0; i < Iterations; ++i) {
18171 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
18172 AddToWorklist(NewEst.getNode());
18174 NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
18175 AddToWorklist(NewEst.getNode());
18177 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18178 AddToWorklist(NewEst.getNode());
18180 Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
18181 AddToWorklist(Est.getNode());
18190 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18191 /// For the reciprocal sqrt, we need to find the zero of the function:
18192 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18194 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
18195 /// As a result, we precompute A/2 prior to the iteration loop.
18196 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
18197 unsigned Iterations,
18198 SDNodeFlags Flags, bool Reciprocal) {
18199 EVT VT = Arg.getValueType();
18201 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
18203 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
18204 // this entire sequence requires only one FP constant.
18205 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
18206 AddToWorklist(HalfArg.getNode());
18208 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
18209 AddToWorklist(HalfArg.getNode());
18211 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
18212 for (unsigned i = 0; i < Iterations; ++i) {
18213 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
18214 AddToWorklist(NewEst.getNode());
18216 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
18217 AddToWorklist(NewEst.getNode());
18219 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
18220 AddToWorklist(NewEst.getNode());
18222 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
18223 AddToWorklist(Est.getNode());
18226 // If non-reciprocal square root is requested, multiply the result by Arg.
18228 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
18229 AddToWorklist(Est.getNode());
18235 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
18236 /// For the reciprocal sqrt, we need to find the zero of the function:
18237 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
18239 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
18240 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
18241 unsigned Iterations,
18242 SDNodeFlags Flags, bool Reciprocal) {
18243 EVT VT = Arg.getValueType();
18245 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
18246 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
18248 // This routine must enter the loop below to work correctly
18249 // when (Reciprocal == false).
18250 assert(Iterations > 0);
18252 // Newton iterations for reciprocal square root:
18253 // E = (E * -0.5) * ((A * E) * E + -3.0)
18254 for (unsigned i = 0; i < Iterations; ++i) {
18255 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
18256 AddToWorklist(AE.getNode());
18258 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
18259 AddToWorklist(AEE.getNode());
18261 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
18262 AddToWorklist(RHS.getNode());
18264 // When calculating a square root at the last iteration build:
18265 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
18266 // (notice a common subexpression)
18268 if (Reciprocal || (i + 1) < Iterations) {
18269 // RSQRT: LHS = (E * -0.5)
18270 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
18272 // SQRT: LHS = (A * E) * -0.5
18273 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
18275 AddToWorklist(LHS.getNode());
18277 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
18278 AddToWorklist(Est.getNode());
18284 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
18285 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
18286 /// Op can be zero.
18287 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
18289 if (Level >= AfterLegalizeDAG)
18292 // TODO: Handle half and/or extended types?
18293 EVT VT = Op.getValueType();
18294 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
18297 // If estimates are explicitly disabled for this function, we're done.
18298 MachineFunction &MF = DAG.getMachineFunction();
18299 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
18300 if (Enabled == TLI.ReciprocalEstimate::Disabled)
18303 // Estimates may be explicitly enabled for this type with a custom number of
18304 // refinement steps.
18305 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
18307 bool UseOneConstNR = false;
18309 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
18311 AddToWorklist(Est.getNode());
18314 Est = UseOneConstNR
18315 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
18316 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
18319 // The estimate is now completely wrong if the input was exactly 0.0 or
18320 // possibly a denormal. Force the answer to 0.0 for those cases.
18321 EVT VT = Op.getValueType();
18323 EVT CCVT = getSetCCResultType(VT);
18324 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
18325 const Function &F = DAG.getMachineFunction().getFunction();
18326 Attribute Denorms = F.getFnAttribute("denormal-fp-math");
18327 if (Denorms.getValueAsString().equals("ieee")) {
18328 // fabs(X) < SmallestNormal ? 0.0 : Est
18329 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
18330 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
18331 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
18332 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18333 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
18334 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
18335 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
18336 AddToWorklist(Fabs.getNode());
18337 AddToWorklist(IsDenorm.getNode());
18338 AddToWorklist(Est.getNode());
18340 // X == 0.0 ? 0.0 : Est
18341 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
18342 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
18343 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
18344 AddToWorklist(IsZero.getNode());
18345 AddToWorklist(Est.getNode());
18355 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18356 return buildSqrtEstimateImpl(Op, Flags, true);
18359 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
18360 return buildSqrtEstimateImpl(Op, Flags, false);
18363 /// Return true if there is any possibility that the two addresses overlap.
18364 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
18365 // If they are the same then they must be aliases.
18366 if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
18368 // If they are both volatile then they cannot be reordered.
18369 if (Op0->isVolatile() && Op1->isVolatile()) return true;
18371 // If one operation reads from invariant memory, and the other may store, they
18372 // cannot alias. These should really be checking the equivalent of mayWrite,
18373 // but it only matters for memory nodes other than load /store.
18374 if (Op0->isInvariant() && Op1->writeMem())
18377 if (Op1->isInvariant() && Op0->writeMem())
18380 unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
18381 unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
18383 // Check for BaseIndexOffset matching.
18384 BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
18385 BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
18387 if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
18388 if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
18389 return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
18391 // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
18392 // able to calculate their relative offset if at least one arises
18393 // from an alloca. However, these allocas cannot overlap and we
18394 // can infer there is no alias.
18395 if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
18396 if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
18397 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
18398 // If the base are the same frame index but the we couldn't find a
18399 // constant offset, (indices are different) be conservative.
18400 if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
18401 !MFI.isFixedObjectIndex(B->getIndex())))
18405 bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
18406 bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
18407 bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
18408 bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
18409 bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
18410 bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
18412 // If of mismatched base types or checkable indices we can check
18413 // they do not alias.
18414 if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
18415 (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
18416 (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
18420 // If we know required SrcValue1 and SrcValue2 have relatively large
18421 // alignment compared to the size and offset of the access, we may be able
18422 // to prove they do not alias. This check is conservative for now to catch
18423 // cases created by splitting vector types.
18424 int64_t SrcValOffset0 = Op0->getSrcValueOffset();
18425 int64_t SrcValOffset1 = Op1->getSrcValueOffset();
18426 unsigned OrigAlignment0 = Op0->getOriginalAlignment();
18427 unsigned OrigAlignment1 = Op1->getOriginalAlignment();
18428 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
18429 NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
18430 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
18431 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
18433 // There is no overlap between these relatively aligned accesses of
18434 // similar size. Return no alias.
18435 if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
18436 (OffAlign1 + NumBytes1) <= OffAlign0)
18440 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
18442 : DAG.getSubtarget().useAA();
18444 if (CombinerAAOnlyFunc.getNumOccurrences() &&
18445 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
18450 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
18451 // Use alias analysis information.
18452 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
18453 int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
18454 int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
18455 AliasResult AAResult =
18456 AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
18457 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
18458 MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
18459 UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
18460 if (AAResult == NoAlias)
18464 // Otherwise we have to assume they alias.
18468 /// Walk up chain skipping non-aliasing memory nodes,
18469 /// looking for aliasing nodes and adding them to the Aliases vector.
18470 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
18471 SmallVectorImpl<SDValue> &Aliases) {
18472 SmallVector<SDValue, 8> Chains; // List of chains to visit.
18473 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
18475 // Get alias information for node.
18476 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
18479 Chains.push_back(OriginalChain);
18480 unsigned Depth = 0;
18482 // Look at each chain and determine if it is an alias. If so, add it to the
18483 // aliases list. If not, then continue up the chain looking for the next
18485 while (!Chains.empty()) {
18486 SDValue Chain = Chains.pop_back_val();
18488 // For TokenFactor nodes, look at each operand and only continue up the
18489 // chain until we reach the depth limit.
18491 // FIXME: The depth check could be made to return the last non-aliasing
18492 // chain we found before we hit a tokenfactor rather than the original
18494 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
18496 Aliases.push_back(OriginalChain);
18500 // Don't bother if we've been before.
18501 if (!Visited.insert(Chain.getNode()).second)
18504 switch (Chain.getOpcode()) {
18505 case ISD::EntryToken:
18506 // Entry token is ideal chain operand, but handled in FindBetterChain.
18511 // Get alias information for Chain.
18512 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
18513 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
18515 // If chain is alias then stop here.
18516 if (!(IsLoad && IsOpLoad) &&
18517 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
18518 Aliases.push_back(Chain);
18520 // Look further up the chain.
18521 Chains.push_back(Chain.getOperand(0));
18527 case ISD::TokenFactor:
18528 // We have to check each of the operands of the token factor for "small"
18529 // token factors, so we queue them up. Adding the operands to the queue
18530 // (stack) in reverse order maintains the original order and increases the
18531 // likelihood that getNode will find a matching token factor (CSE.)
18532 if (Chain.getNumOperands() > 16) {
18533 Aliases.push_back(Chain);
18536 for (unsigned n = Chain.getNumOperands(); n;)
18537 Chains.push_back(Chain.getOperand(--n));
18541 case ISD::CopyFromReg:
18542 // Forward past CopyFromReg.
18543 Chains.push_back(Chain.getOperand(0));
18548 // For all other instructions we will just have to take what we can get.
18549 Aliases.push_back(Chain);
18555 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
18556 /// (aliasing node.)
18557 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
18558 if (OptLevel == CodeGenOpt::None)
18561 // Ops for replacing token factor.
18562 SmallVector<SDValue, 8> Aliases;
18564 // Accumulate all the aliases to this node.
18565 GatherAllAliases(N, OldChain, Aliases);
18567 // If no operands then chain to entry token.
18568 if (Aliases.size() == 0)
18569 return DAG.getEntryNode();
18571 // If a single operand then chain to it. We don't need to revisit it.
18572 if (Aliases.size() == 1)
18575 // Construct a custom tailored token factor.
18576 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
18579 // This function tries to collect a bunch of potentially interesting
18580 // nodes to improve the chains of, all at once. This might seem
18581 // redundant, as this function gets called when visiting every store
18582 // node, so why not let the work be done on each store as it's visited?
18584 // I believe this is mainly important because MergeConsecutiveStores
18585 // is unable to deal with merging stores of different sizes, so unless
18586 // we improve the chains of all the potential candidates up-front
18587 // before running MergeConsecutiveStores, it might only see some of
18588 // the nodes that will eventually be candidates, and then not be able
18589 // to go from a partially-merged state to the desired final
18590 // fully-merged state.
18591 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
18592 if (OptLevel == CodeGenOpt::None)
18595 // This holds the base pointer, index, and the offset in bytes from the base
18597 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
18599 // We must have a base and an offset.
18600 if (!BasePtr.getBase().getNode())
18603 // Do not handle stores to undef base pointers.
18604 if (BasePtr.getBase().isUndef())
18607 SmallVector<StoreSDNode *, 8> ChainedStores;
18608 ChainedStores.push_back(St);
18610 // Walk up the chain and look for nodes with offsets from the same
18611 // base pointer. Stop when reaching an instruction with a different kind
18612 // or instruction which has a different base pointer.
18613 StoreSDNode *Index = St;
18615 // If the chain has more than one use, then we can't reorder the mem ops.
18616 if (Index != St && !SDValue(Index, 0)->hasOneUse())
18619 if (Index->isVolatile() || Index->isIndexed())
18622 // Find the base pointer and offset for this memory node.
18623 BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG);
18625 // Check that the base pointer is the same as the original one.
18626 if (!BasePtr.equalBaseIndex(Ptr, DAG))
18629 // Walk up the chain to find the next store node, ignoring any
18630 // intermediate loads. Any other kind of node will halt the loop.
18631 SDNode *NextInChain = Index->getChain().getNode();
18633 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
18634 // We found a store node. Use it for the next iteration.
18635 if (STn->isVolatile() || STn->isIndexed()) {
18639 ChainedStores.push_back(STn);
18642 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
18643 NextInChain = Ldn->getChain().getNode();
18652 // At this point, ChainedStores lists all of the Store nodes
18653 // reachable by iterating up through chain nodes matching the above
18654 // conditions. For each such store identified, try to find an
18655 // earlier chain to attach the store to which won't violate the
18656 // required ordering.
18657 bool MadeChangeToSt = false;
18658 SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
18660 for (StoreSDNode *ChainedStore : ChainedStores) {
18661 SDValue Chain = ChainedStore->getChain();
18662 SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
18664 if (Chain != BetterChain) {
18665 if (ChainedStore == St)
18666 MadeChangeToSt = true;
18667 BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
18671 // Do all replacements after finding the replacements to make to avoid making
18672 // the chains more complicated by introducing new TokenFactors.
18673 for (auto Replacement : BetterChains)
18674 replaceStoreChain(Replacement.first, Replacement.second);
18676 return MadeChangeToSt;
18679 /// This is the entry point for the file.
18680 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
18681 CodeGenOpt::Level OptLevel) {
18682 /// This is the main entry point to this class.
18683 DAGCombiner(*this, AA, OptLevel).Run(Level);