1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //===----------------------------------------------------------------------===//
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallPtrSet.h"
28 #include "llvm/ADT/SmallSet.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/MemoryLocation.h"
33 #include "llvm/Analysis/VectorUtils.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
78 #define DEBUG_TYPE "dagcombine"
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90 cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94 cl::desc("Enable DAG combiner's use of TBAA"));
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99 cl::desc("Only use DAG-combiner alias analysis in this"
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107 cl::desc("Bypass the profitability model of load slicing"),
111 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112 cl::desc("DAG combiner may split indexing from loads"));
115 EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116 cl::desc("DAG combiner enable merging multiple stores "
117 "into a wider store"));
119 static cl::opt<unsigned> TokenFactorInlineLimit(
120 "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121 cl::desc("Limit the number of operands to inline for Token Factors"));
123 static cl::opt<unsigned> StoreMergeDependenceLimit(
124 "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125 cl::desc("Limit the number of times for the same StoreNode and RootNode "
126 "to bail out in store merging dependence check"));
128 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
129 "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130 cl::desc("DAG cominber enable reducing the width of load/op/store "
133 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
134 "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135 cl::desc("DAG cominber enable load/<replace bytes>/store with "
136 "a narrower store"));
142 const TargetLowering &TLI;
143 const SelectionDAGTargetInfo *STI;
145 CodeGenOpt::Level OptLevel;
146 bool LegalDAG = false;
147 bool LegalOperations = false;
148 bool LegalTypes = false;
150 bool DisableGenericCombines;
152 /// Worklist of all of the nodes that need to be simplified.
154 /// This must behave as a stack -- new nodes to process are pushed onto the
155 /// back and when processing we pop off of the back.
157 /// The worklist will not contain duplicates but may contain null entries
158 /// due to nodes being deleted from the underlying DAG.
159 SmallVector<SDNode *, 64> Worklist;
161 /// Mapping from an SDNode to its position on the worklist.
163 /// This is used to find and remove nodes from the worklist (by nulling
164 /// them) when they are deleted from the underlying DAG. It relies on
165 /// stable indices of nodes within the worklist.
166 DenseMap<SDNode *, unsigned> WorklistMap;
167 /// This records all nodes attempted to add to the worklist since we
168 /// considered a new worklist entry. As we keep do not add duplicate nodes
169 /// in the worklist, this is different from the tail of the worklist.
170 SmallSetVector<SDNode *, 32> PruningList;
172 /// Set of nodes which have been combined (at least once).
174 /// This is used to allow us to reliably add any operands of a DAG node
175 /// which have not yet been combined to the worklist.
176 SmallPtrSet<SDNode *, 32> CombinedNodes;
178 /// Map from candidate StoreNode to the pair of RootNode and count.
179 /// The count is used to track how many times we have seen the StoreNode
180 /// with the same RootNode bail out in dependence check. If we have seen
181 /// the bail out for the same pair many times over a limit, we won't
182 /// consider the StoreNode with the same RootNode as store merging
184 DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
186 // AA - Used for DAG load/store alias analysis.
189 /// When an instruction is simplified, add all users of the instruction to
190 /// the work lists because they might get more simplified now.
191 void AddUsersToWorklist(SDNode *N) {
192 for (SDNode *Node : N->uses())
196 /// Convenient shorthand to add a node and all of its user to the worklist.
197 void AddToWorklistWithUsers(SDNode *N) {
198 AddUsersToWorklist(N);
202 // Prune potentially dangling nodes. This is called after
203 // any visit to a node, but should also be called during a visit after any
204 // failed combine which may have created a DAG node.
205 void clearAddedDanglingWorklistEntries() {
206 // Check any nodes added to the worklist to see if they are prunable.
207 while (!PruningList.empty()) {
208 auto *N = PruningList.pop_back_val();
210 recursivelyDeleteUnusedNodes(N);
214 SDNode *getNextWorklistEntry() {
215 // Before we do any work, remove nodes that are not in use.
216 clearAddedDanglingWorklistEntries();
218 // The Worklist holds the SDNodes in order, but it may contain null
220 while (!N && !Worklist.empty()) {
221 N = Worklist.pop_back_val();
225 bool GoodWorklistEntry = WorklistMap.erase(N);
226 (void)GoodWorklistEntry;
227 assert(GoodWorklistEntry &&
228 "Found a worklist entry without a corresponding map entry!");
233 /// Call the node-specific routine that folds each particular type of node.
234 SDValue visit(SDNode *N);
237 DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
238 : DAG(D), TLI(D.getTargetLoweringInfo()),
239 STI(D.getSubtarget().getSelectionDAGInfo()),
240 Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
241 ForCodeSize = DAG.shouldOptForSize();
242 DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
244 MaximumLegalStoreInBits = 0;
245 // We use the minimum store size here, since that's all we can guarantee
246 // for the scalable vector types.
247 for (MVT VT : MVT::all_valuetypes())
248 if (EVT(VT).isSimple() && VT != MVT::Other &&
249 TLI.isTypeLegal(EVT(VT)) &&
250 VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
251 MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254 void ConsiderForPruning(SDNode *N) {
255 // Mark this for potential pruning.
256 PruningList.insert(N);
259 /// Add to the worklist making sure its instance is at the back (next to be
261 void AddToWorklist(SDNode *N) {
262 assert(N->getOpcode() != ISD::DELETED_NODE &&
263 "Deleted Node added to Worklist");
265 // Skip handle nodes as they can't usefully be combined and confuse the
266 // zero-use deletion strategy.
267 if (N->getOpcode() == ISD::HANDLENODE)
270 ConsiderForPruning(N);
272 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
273 Worklist.push_back(N);
276 /// Remove all instances of N from the worklist.
277 void removeFromWorklist(SDNode *N) {
278 CombinedNodes.erase(N);
279 PruningList.remove(N);
280 StoreRootCountMap.erase(N);
282 auto It = WorklistMap.find(N);
283 if (It == WorklistMap.end())
284 return; // Not in the worklist.
286 // Null out the entry rather than erasing it to avoid a linear operation.
287 Worklist[It->second] = nullptr;
288 WorklistMap.erase(It);
291 void deleteAndRecombine(SDNode *N);
292 bool recursivelyDeleteUnusedNodes(SDNode *N);
294 /// Replaces all uses of the results of one DAG node with new values.
295 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298 /// Replaces all uses of the results of one DAG node with new values.
299 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
300 return CombineTo(N, &Res, 1, AddTo);
303 /// Replaces all uses of the results of one DAG node with new values.
304 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
306 SDValue To[] = { Res0, Res1 };
307 return CombineTo(N, To, 2, AddTo);
310 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313 unsigned MaximumLegalStoreInBits;
315 /// Check the specified integer node value to see if it can be simplified or
316 /// if things it uses can be simplified by bit propagation.
317 /// If so, return true.
318 bool SimplifyDemandedBits(SDValue Op) {
319 unsigned BitWidth = Op.getScalarValueSizeInBits();
320 APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
321 return SimplifyDemandedBits(Op, DemandedBits);
324 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
325 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
327 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
331 AddToWorklist(Op.getNode());
333 CommitTargetLoweringOpt(TLO);
337 /// Check the specified vector node value to see if it can be simplified or
338 /// if things it uses can be simplified as it only uses some of the
339 /// elements. If so, return true.
340 bool SimplifyDemandedVectorElts(SDValue Op) {
341 // TODO: For now just pretend it cannot be simplified.
342 if (Op.getValueType().isScalableVector())
345 unsigned NumElts = Op.getValueType().getVectorNumElements();
346 APInt DemandedElts = APInt::getAllOnesValue(NumElts);
347 return SimplifyDemandedVectorElts(Op, DemandedElts);
350 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
351 const APInt &DemandedElts,
352 bool AssumeSingleUse = false);
353 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
354 bool AssumeSingleUse = false);
356 bool CombineToPreIndexedLoadStore(SDNode *N);
357 bool CombineToPostIndexedLoadStore(SDNode *N);
358 SDValue SplitIndexingFromLoad(LoadSDNode *LD);
359 bool SliceUpLoad(SDNode *N);
361 // Scalars have size 0 to distinguish from singleton vectors.
362 SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
363 bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
364 bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
366 /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
370 /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
371 /// \param EltNo index of the vector element to load.
372 /// \param OriginalLoad load that EVE came from to be replaced.
373 /// \returns EVE on success SDValue() on failure.
374 SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
376 LoadSDNode *OriginalLoad);
377 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
378 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
379 SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
380 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
381 SDValue PromoteIntBinOp(SDValue Op);
382 SDValue PromoteIntShiftOp(SDValue Op);
383 SDValue PromoteExtend(SDValue Op);
384 bool PromoteLoad(SDValue Op);
386 /// Call the node-specific routine that knows how to fold each
387 /// particular type of node. If that doesn't do anything, try the
388 /// target-specific DAG combines.
389 SDValue combine(SDNode *N);
391 // Visitation implementation - Implement dag node combining for different
392 // node types. The semantics are as follows:
394 // SDValue.getNode() == 0 - No change was made
395 // SDValue.getNode() == N - N was replaced, is dead and has been handled.
396 // otherwise - N should be replaced by the returned Operand.
398 SDValue visitTokenFactor(SDNode *N);
399 SDValue visitMERGE_VALUES(SDNode *N);
400 SDValue visitADD(SDNode *N);
401 SDValue visitADDLike(SDNode *N);
402 SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
403 SDValue visitSUB(SDNode *N);
404 SDValue visitADDSAT(SDNode *N);
405 SDValue visitSUBSAT(SDNode *N);
406 SDValue visitADDC(SDNode *N);
407 SDValue visitADDO(SDNode *N);
408 SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
409 SDValue visitSUBC(SDNode *N);
410 SDValue visitSUBO(SDNode *N);
411 SDValue visitADDE(SDNode *N);
412 SDValue visitADDCARRY(SDNode *N);
413 SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
414 SDValue visitSUBE(SDNode *N);
415 SDValue visitSUBCARRY(SDNode *N);
416 SDValue visitMUL(SDNode *N);
417 SDValue visitMULFIX(SDNode *N);
418 SDValue useDivRem(SDNode *N);
419 SDValue visitSDIV(SDNode *N);
420 SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
421 SDValue visitUDIV(SDNode *N);
422 SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
423 SDValue visitREM(SDNode *N);
424 SDValue visitMULHU(SDNode *N);
425 SDValue visitMULHS(SDNode *N);
426 SDValue visitSMUL_LOHI(SDNode *N);
427 SDValue visitUMUL_LOHI(SDNode *N);
428 SDValue visitMULO(SDNode *N);
429 SDValue visitIMINMAX(SDNode *N);
430 SDValue visitAND(SDNode *N);
431 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
432 SDValue visitOR(SDNode *N);
433 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
434 SDValue visitXOR(SDNode *N);
435 SDValue SimplifyVBinOp(SDNode *N);
436 SDValue visitSHL(SDNode *N);
437 SDValue visitSRA(SDNode *N);
438 SDValue visitSRL(SDNode *N);
439 SDValue visitFunnelShift(SDNode *N);
440 SDValue visitRotate(SDNode *N);
441 SDValue visitABS(SDNode *N);
442 SDValue visitBSWAP(SDNode *N);
443 SDValue visitBITREVERSE(SDNode *N);
444 SDValue visitCTLZ(SDNode *N);
445 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
446 SDValue visitCTTZ(SDNode *N);
447 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
448 SDValue visitCTPOP(SDNode *N);
449 SDValue visitSELECT(SDNode *N);
450 SDValue visitVSELECT(SDNode *N);
451 SDValue visitSELECT_CC(SDNode *N);
452 SDValue visitSETCC(SDNode *N);
453 SDValue visitSETCCCARRY(SDNode *N);
454 SDValue visitSIGN_EXTEND(SDNode *N);
455 SDValue visitZERO_EXTEND(SDNode *N);
456 SDValue visitANY_EXTEND(SDNode *N);
457 SDValue visitAssertExt(SDNode *N);
458 SDValue visitAssertAlign(SDNode *N);
459 SDValue visitSIGN_EXTEND_INREG(SDNode *N);
460 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
461 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
462 SDValue visitTRUNCATE(SDNode *N);
463 SDValue visitBITCAST(SDNode *N);
464 SDValue visitFREEZE(SDNode *N);
465 SDValue visitBUILD_PAIR(SDNode *N);
466 SDValue visitFADD(SDNode *N);
467 SDValue visitFSUB(SDNode *N);
468 SDValue visitFMUL(SDNode *N);
469 SDValue visitFMA(SDNode *N);
470 SDValue visitFDIV(SDNode *N);
471 SDValue visitFREM(SDNode *N);
472 SDValue visitFSQRT(SDNode *N);
473 SDValue visitFCOPYSIGN(SDNode *N);
474 SDValue visitFPOW(SDNode *N);
475 SDValue visitSINT_TO_FP(SDNode *N);
476 SDValue visitUINT_TO_FP(SDNode *N);
477 SDValue visitFP_TO_SINT(SDNode *N);
478 SDValue visitFP_TO_UINT(SDNode *N);
479 SDValue visitFP_ROUND(SDNode *N);
480 SDValue visitFP_EXTEND(SDNode *N);
481 SDValue visitFNEG(SDNode *N);
482 SDValue visitFABS(SDNode *N);
483 SDValue visitFCEIL(SDNode *N);
484 SDValue visitFTRUNC(SDNode *N);
485 SDValue visitFFLOOR(SDNode *N);
486 SDValue visitFMINNUM(SDNode *N);
487 SDValue visitFMAXNUM(SDNode *N);
488 SDValue visitFMINIMUM(SDNode *N);
489 SDValue visitFMAXIMUM(SDNode *N);
490 SDValue visitBRCOND(SDNode *N);
491 SDValue visitBR_CC(SDNode *N);
492 SDValue visitLOAD(SDNode *N);
494 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
495 SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
497 SDValue visitSTORE(SDNode *N);
498 SDValue visitLIFETIME_END(SDNode *N);
499 SDValue visitINSERT_VECTOR_ELT(SDNode *N);
500 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
501 SDValue visitBUILD_VECTOR(SDNode *N);
502 SDValue visitCONCAT_VECTORS(SDNode *N);
503 SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
504 SDValue visitVECTOR_SHUFFLE(SDNode *N);
505 SDValue visitSCALAR_TO_VECTOR(SDNode *N);
506 SDValue visitINSERT_SUBVECTOR(SDNode *N);
507 SDValue visitMLOAD(SDNode *N);
508 SDValue visitMSTORE(SDNode *N);
509 SDValue visitMGATHER(SDNode *N);
510 SDValue visitMSCATTER(SDNode *N);
511 SDValue visitFP_TO_FP16(SDNode *N);
512 SDValue visitFP16_TO_FP(SDNode *N);
513 SDValue visitVECREDUCE(SDNode *N);
515 SDValue visitFADDForFMACombine(SDNode *N);
516 SDValue visitFSUBForFMACombine(SDNode *N);
517 SDValue visitFMULForFMADistributiveCombine(SDNode *N);
519 SDValue XformToShuffleWithZero(SDNode *N);
520 bool reassociationCanBreakAddressingModePattern(unsigned Opc,
521 const SDLoc &DL, SDValue N0,
523 SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
525 SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
526 SDValue N1, SDNodeFlags Flags);
528 SDValue visitShiftByConstant(SDNode *N);
530 SDValue foldSelectOfConstants(SDNode *N);
531 SDValue foldVSelectOfConstants(SDNode *N);
532 SDValue foldBinOpIntoSelect(SDNode *BO);
533 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
534 SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
535 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
536 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
537 SDValue N2, SDValue N3, ISD::CondCode CC,
538 bool NotExtCompare = false);
539 SDValue convertSelectOfFPConstantsToLoadOffset(
540 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
542 SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
543 SDValue N2, SDValue N3, ISD::CondCode CC);
544 SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
546 SDValue unfoldMaskedMerge(SDNode *N);
547 SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
548 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
549 const SDLoc &DL, bool foldBooleans);
550 SDValue rebuildSetCC(SDValue N);
552 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
553 SDValue &CC, bool MatchStrict = false) const;
554 bool isOneUseSetCC(SDValue N) const;
556 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
558 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
559 SDValue CombineExtLoad(SDNode *N);
560 SDValue CombineZExtLogicopShiftLoad(SDNode *N);
561 SDValue combineRepeatedFPDivisors(SDNode *N);
562 SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
563 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
564 SDValue BuildSDIV(SDNode *N);
565 SDValue BuildSDIVPow2(SDNode *N);
566 SDValue BuildUDIV(SDNode *N);
567 SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
568 SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
569 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
570 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
571 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
572 SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
573 SDNodeFlags Flags, bool Reciprocal);
574 SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
575 SDNodeFlags Flags, bool Reciprocal);
576 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
577 bool DemandHighBits = true);
578 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
579 SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
580 SDValue InnerPos, SDValue InnerNeg,
581 unsigned PosOpcode, unsigned NegOpcode,
583 SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
584 SDValue InnerPos, SDValue InnerNeg,
585 unsigned PosOpcode, unsigned NegOpcode,
587 SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
588 SDValue MatchLoadCombine(SDNode *N);
589 SDValue MatchStoreCombine(StoreSDNode *N);
590 SDValue ReduceLoadWidth(SDNode *N);
591 SDValue ReduceLoadOpStoreWidth(SDNode *N);
592 SDValue splitMergedValStore(StoreSDNode *ST);
593 SDValue TransformFPLoadStorePair(SDNode *N);
594 SDValue convertBuildVecZextToZext(SDNode *N);
595 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
596 SDValue reduceBuildVecTruncToBitCast(SDNode *N);
597 SDValue reduceBuildVecToShuffle(SDNode *N);
598 SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
599 ArrayRef<int> VectorMask, SDValue VecIn1,
600 SDValue VecIn2, unsigned LeftIdx,
602 SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
604 /// Walk up chain skipping non-aliasing memory nodes,
605 /// looking for aliasing nodes and adding them to the Aliases vector.
606 void GatherAllAliases(SDNode *N, SDValue OriginalChain,
607 SmallVectorImpl<SDValue> &Aliases);
609 /// Return true if there is any possibility that the two addresses overlap.
610 bool isAlias(SDNode *Op0, SDNode *Op1) const;
612 /// Walk up chain skipping non-aliasing memory nodes, looking for a better
613 /// chain (aliasing node.)
614 SDValue FindBetterChain(SDNode *N, SDValue Chain);
616 /// Try to replace a store and any possibly adjacent stores on
617 /// consecutive chains with better chains. Return true only if St is
620 /// Notice that other chains may still be replaced even if the function
622 bool findBetterNeighborChains(StoreSDNode *St);
624 // Helper for findBetterNeighborChains. Walk up store chain add additional
625 // chained stores that do not overlap and can be parallelized.
626 bool parallelizeChainedStores(StoreSDNode *St);
628 /// Holds a pointer to an LSBaseSDNode as well as information on where it
629 /// is located in a sequence of memory operations connected by a chain.
631 // Ptr to the mem node.
632 LSBaseSDNode *MemNode;
634 // Offset from the base ptr.
635 int64_t OffsetFromBase;
637 MemOpLink(LSBaseSDNode *N, int64_t Offset)
638 : MemNode(N), OffsetFromBase(Offset) {}
641 // Classify the origin of a stored value.
642 enum class StoreSource { Unknown, Constant, Extract, Load };
643 StoreSource getStoreSource(SDValue StoreVal) {
644 if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal))
645 return StoreSource::Constant;
646 if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
647 StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR)
648 return StoreSource::Extract;
649 if (isa<LoadSDNode>(StoreVal))
650 return StoreSource::Load;
651 return StoreSource::Unknown;
654 /// This is a helper function for visitMUL to check the profitability
655 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
656 /// MulNode is the original multiply, AddNode is (add x, c1),
657 /// and ConstNode is c2.
658 bool isMulAddWithConstProfitable(SDNode *MulNode,
662 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
663 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
664 /// the type of the loaded value to be extended.
665 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
666 EVT LoadResultTy, EVT &ExtVT);
668 /// Helper function to calculate whether the given Load/Store can have its
669 /// width reduced to ExtVT.
670 bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
671 EVT &MemVT, unsigned ShAmt = 0);
673 /// Used by BackwardsPropagateMask to find suitable loads.
674 bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
675 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
676 ConstantSDNode *Mask, SDNode *&NodeToMask);
677 /// Attempt to propagate a given AND node back to load leaves so that they
678 /// can be combined into narrow loads.
679 bool BackwardsPropagateMask(SDNode *N);
681 /// Helper function for mergeConsecutiveStores which merges the component
683 SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
686 /// This is a helper function for mergeConsecutiveStores. When the source
687 /// elements of the consecutive stores are all constants or all extracted
688 /// vector elements, try to merge them into one larger store introducing
689 /// bitcasts if necessary. \return True if a merged store was created.
690 bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
691 EVT MemVT, unsigned NumStores,
692 bool IsConstantSrc, bool UseVector,
695 /// This is a helper function for mergeConsecutiveStores. Stores that
696 /// potentially may be merged with St are placed in StoreNodes. RootNode is
697 /// a chain predecessor to all store candidates.
698 void getStoreMergeCandidates(StoreSDNode *St,
699 SmallVectorImpl<MemOpLink> &StoreNodes,
702 /// Helper function for mergeConsecutiveStores. Checks if candidate stores
703 /// have indirect dependency through their operands. RootNode is the
704 /// predecessor to all stores calculated by getStoreMergeCandidates and is
705 /// used to prune the dependency check. \return True if safe to merge.
706 bool checkMergeStoreCandidatesForDependencies(
707 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
710 /// This is a helper function for mergeConsecutiveStores. Given a list of
711 /// store candidates, find the first N that are consecutive in memory.
712 /// Returns 0 if there are not at least 2 consecutive stores to try merging.
713 unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
714 int64_t ElementSizeBytes) const;
716 /// This is a helper function for mergeConsecutiveStores. It is used for
717 /// store chains that are composed entirely of constant values.
718 bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
719 unsigned NumConsecutiveStores,
720 EVT MemVT, SDNode *Root, bool AllowVectors);
722 /// This is a helper function for mergeConsecutiveStores. It is used for
723 /// store chains that are composed entirely of extracted vector elements.
724 /// When extracting multiple vector elements, try to store them in one
725 /// vector store rather than a sequence of scalar stores.
726 bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
727 unsigned NumConsecutiveStores, EVT MemVT,
730 /// This is a helper function for mergeConsecutiveStores. It is used for
731 /// store chains that are composed entirely of loaded values.
732 bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
733 unsigned NumConsecutiveStores, EVT MemVT,
734 SDNode *Root, bool AllowVectors,
735 bool IsNonTemporalStore, bool IsNonTemporalLoad);
737 /// Merge consecutive store operations into a wide store.
738 /// This optimization uses wide integers or vectors when possible.
739 /// \return true if stores were merged.
740 bool mergeConsecutiveStores(StoreSDNode *St);
742 /// Try to transform a truncation where C is a constant:
743 /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
745 /// \p N needs to be a truncation and its first operand an AND. Other
746 /// requirements are checked by the function (e.g. that trunc is
747 /// single-use) and if missed an empty SDValue is returned.
748 SDValue distributeTruncateThroughAnd(SDNode *N);
750 /// Helper function to determine whether the target supports operation
751 /// given by \p Opcode for type \p VT, that is, whether the operation
752 /// is legal or custom before legalizing operations, and whether is
753 /// legal (but not custom) after legalization.
754 bool hasOperation(unsigned Opcode, EVT VT) {
756 return TLI.isOperationLegal(Opcode, VT);
757 return TLI.isOperationLegalOrCustom(Opcode, VT);
761 /// Runs the dag combiner on all nodes in the work list
762 void Run(CombineLevel AtLevel);
764 SelectionDAG &getDAG() const { return DAG; }
766 /// Returns a type large enough to hold any valid shift amount - before type
767 /// legalization these can be huge.
768 EVT getShiftAmountTy(EVT LHSTy) {
769 assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
770 return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
773 /// This method returns true if we are running before type legalization or
774 /// if the specified VT is legal.
775 bool isTypeLegal(const EVT &VT) {
776 if (!LegalTypes) return true;
777 return TLI.isTypeLegal(VT);
780 /// Convenience wrapper around TargetLowering::getSetCCResultType
781 EVT getSetCCResultType(EVT VT) const {
782 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
785 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
786 SDValue OrigLoad, SDValue ExtLoad,
787 ISD::NodeType ExtType);
790 /// This class is a DAGUpdateListener that removes any deleted
791 /// nodes from the worklist.
792 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
796 explicit WorklistRemover(DAGCombiner &dc)
797 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
799 void NodeDeleted(SDNode *N, SDNode *E) override {
800 DC.removeFromWorklist(N);
804 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
808 explicit WorklistInserter(DAGCombiner &dc)
809 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
811 // FIXME: Ideally we could add N to the worklist, but this causes exponential
812 // compile time costs in large DAGs, e.g. Halide.
813 void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
816 } // end anonymous namespace
818 //===----------------------------------------------------------------------===//
819 // TargetLowering::DAGCombinerInfo implementation
820 //===----------------------------------------------------------------------===//
822 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
823 ((DAGCombiner*)DC)->AddToWorklist(N);
826 SDValue TargetLowering::DAGCombinerInfo::
827 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
828 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
831 SDValue TargetLowering::DAGCombinerInfo::
832 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
833 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
836 SDValue TargetLowering::DAGCombinerInfo::
837 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
838 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
841 bool TargetLowering::DAGCombinerInfo::
842 recursivelyDeleteUnusedNodes(SDNode *N) {
843 return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
846 void TargetLowering::DAGCombinerInfo::
847 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
848 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
851 //===----------------------------------------------------------------------===//
853 //===----------------------------------------------------------------------===//
855 void DAGCombiner::deleteAndRecombine(SDNode *N) {
856 removeFromWorklist(N);
858 // If the operands of this node are only used by the node, they will now be
859 // dead. Make sure to re-visit them and recursively delete dead nodes.
860 for (const SDValue &Op : N->ops())
861 // For an operand generating multiple values, one of the values may
862 // become dead allowing further simplification (e.g. split index
863 // arithmetic from an indexed load).
864 if (Op->hasOneUse() || Op->getNumValues() > 1)
865 AddToWorklist(Op.getNode());
870 // APInts must be the same size for most operations, this helper
871 // function zero extends the shorter of the pair so that they match.
872 // We provide an Offset so that we can create bitwidths that won't overflow.
873 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
874 unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
875 LHS = LHS.zextOrSelf(Bits);
876 RHS = RHS.zextOrSelf(Bits);
879 // Return true if this node is a setcc, or is a select_cc
880 // that selects between the target values used for true and false, making it
881 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
882 // the appropriate nodes based on the type of node we are checking. This
883 // simplifies life a bit for the callers.
884 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
885 SDValue &CC, bool MatchStrict) const {
886 if (N.getOpcode() == ISD::SETCC) {
887 LHS = N.getOperand(0);
888 RHS = N.getOperand(1);
889 CC = N.getOperand(2);
894 (N.getOpcode() == ISD::STRICT_FSETCC ||
895 N.getOpcode() == ISD::STRICT_FSETCCS)) {
896 LHS = N.getOperand(1);
897 RHS = N.getOperand(2);
898 CC = N.getOperand(3);
902 if (N.getOpcode() != ISD::SELECT_CC ||
903 !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
904 !TLI.isConstFalseVal(N.getOperand(3).getNode()))
907 if (TLI.getBooleanContents(N.getValueType()) ==
908 TargetLowering::UndefinedBooleanContent)
911 LHS = N.getOperand(0);
912 RHS = N.getOperand(1);
913 CC = N.getOperand(4);
917 /// Return true if this is a SetCC-equivalent operation with only one use.
918 /// If this is true, it allows the users to invert the operation for free when
919 /// it is profitable to do so.
920 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
922 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
927 // Returns the SDNode if it is a constant float BuildVector
928 // or constant float.
929 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
930 if (isa<ConstantFPSDNode>(N))
932 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
937 // Determines if it is a constant integer or a build vector of constant
938 // integers (and undefs).
939 // Do not permit build vector implicit truncation.
940 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
941 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
942 return !(Const->isOpaque() && NoOpaques);
943 if (N.getOpcode() != ISD::BUILD_VECTOR)
945 unsigned BitWidth = N.getScalarValueSizeInBits();
946 for (const SDValue &Op : N->op_values()) {
949 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
950 if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
951 (Const->isOpaque() && NoOpaques))
957 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
959 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
960 if (V.getOpcode() != ISD::BUILD_VECTOR)
962 return isConstantOrConstantVector(V, NoOpaques) ||
963 ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
966 // Determine if this an indexed load with an opaque target constant index.
967 static bool canSplitIdx(LoadSDNode *LD) {
968 return MaySplitLoadIndex &&
969 (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
970 !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
973 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
977 // Currently this only tries to ensure we don't undo the GEP splits done by
978 // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
979 // we check if the following transformation would be problematic:
980 // (load/store (add, (add, x, offset1), offset2)) ->
981 // (load/store (add, x, offset1+offset2)).
983 if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
989 auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
990 auto *C2 = dyn_cast<ConstantSDNode>(N1);
994 const APInt &C1APIntVal = C1->getAPIntValue();
995 const APInt &C2APIntVal = C2->getAPIntValue();
996 if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
999 const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1000 if (CombinedValueIntVal.getBitWidth() > 64)
1002 const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1004 for (SDNode *Node : N0->uses()) {
1005 auto LoadStore = dyn_cast<MemSDNode>(Node);
1007 // Is x[offset2] already not a legal addressing mode? If so then
1008 // reassociating the constants breaks nothing (we test offset2 because
1009 // that's the one we hope to fold into the load or store).
1010 TargetLoweringBase::AddrMode AM;
1011 AM.HasBaseReg = true;
1012 AM.BaseOffs = C2APIntVal.getSExtValue();
1013 EVT VT = LoadStore->getMemoryVT();
1014 unsigned AS = LoadStore->getAddressSpace();
1015 Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1016 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1019 // Would x[offset1+offset2] still be a legal addressing mode?
1020 AM.BaseOffs = CombinedValue;
1021 if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1029 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1030 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1031 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1032 SDValue N0, SDValue N1) {
1033 EVT VT = N0.getValueType();
1035 if (N0.getOpcode() != Opc)
1038 if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1039 if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1040 // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1041 if (SDValue OpNode =
1042 DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1043 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1046 if (N0.hasOneUse()) {
1047 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1048 // iff (op x, c1) has one use
1049 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1050 if (!OpNode.getNode())
1052 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1058 // Try to reassociate commutative binops.
1059 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1060 SDValue N1, SDNodeFlags Flags) {
1061 assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1063 // Floating-point reassociation is not allowed without loose FP math.
1064 if (N0.getValueType().isFloatingPoint() ||
1065 N1.getValueType().isFloatingPoint())
1066 if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1069 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1071 if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1076 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1078 assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1080 LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1081 To[0].getNode()->dump(&DAG);
1082 dbgs() << " and " << NumTo - 1 << " other values\n");
1083 for (unsigned i = 0, e = NumTo; i != e; ++i)
1084 assert((!To[i].getNode() ||
1085 N->getValueType(i) == To[i].getValueType()) &&
1086 "Cannot combine value to value of different type!");
1088 WorklistRemover DeadNodes(*this);
1089 DAG.ReplaceAllUsesWith(N, To);
1091 // Push the new nodes and any users onto the worklist
1092 for (unsigned i = 0, e = NumTo; i != e; ++i) {
1093 if (To[i].getNode()) {
1094 AddToWorklist(To[i].getNode());
1095 AddUsersToWorklist(To[i].getNode());
1100 // Finally, if the node is now dead, remove it from the graph. The node
1101 // may not be dead if the replacement process recursively simplified to
1102 // something else needing this node.
1104 deleteAndRecombine(N);
1105 return SDValue(N, 0);
1109 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1110 // Replace the old value with the new one.
1112 LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1113 dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1116 // Replace all uses. If any nodes become isomorphic to other nodes and
1117 // are deleted, make sure to remove them from our worklist.
1118 WorklistRemover DeadNodes(*this);
1119 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1121 // Push the new node and any (possibly new) users onto the worklist.
1122 AddToWorklistWithUsers(TLO.New.getNode());
1124 // Finally, if the node is now dead, remove it from the graph. The node
1125 // may not be dead if the replacement process recursively simplified to
1126 // something else needing this node.
1127 if (TLO.Old.getNode()->use_empty())
1128 deleteAndRecombine(TLO.Old.getNode());
1131 /// Check the specified integer node value to see if it can be simplified or if
1132 /// things it uses can be simplified by bit propagation. If so, return true.
1133 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1134 const APInt &DemandedElts,
1135 bool AssumeSingleUse) {
1136 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1138 if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1142 // Revisit the node.
1143 AddToWorklist(Op.getNode());
1145 CommitTargetLoweringOpt(TLO);
1149 /// Check the specified vector node value to see if it can be simplified or
1150 /// if things it uses can be simplified as it only uses some of the elements.
1151 /// If so, return true.
1152 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1153 const APInt &DemandedElts,
1154 bool AssumeSingleUse) {
1155 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1156 APInt KnownUndef, KnownZero;
1157 if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1158 TLO, 0, AssumeSingleUse))
1161 // Revisit the node.
1162 AddToWorklist(Op.getNode());
1164 CommitTargetLoweringOpt(TLO);
1168 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1170 EVT VT = Load->getValueType(0);
1171 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1173 LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1174 Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1175 WorklistRemover DeadNodes(*this);
1176 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1177 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1178 deleteAndRecombine(Load);
1179 AddToWorklist(Trunc.getNode());
1182 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1185 if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1186 LoadSDNode *LD = cast<LoadSDNode>(Op);
1187 EVT MemVT = LD->getMemoryVT();
1188 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1189 : LD->getExtensionType();
1191 return DAG.getExtLoad(ExtType, DL, PVT,
1192 LD->getChain(), LD->getBasePtr(),
1193 MemVT, LD->getMemOperand());
1196 unsigned Opc = Op.getOpcode();
1199 case ISD::AssertSext:
1200 if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1201 return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1203 case ISD::AssertZext:
1204 if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1205 return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1207 case ISD::Constant: {
1209 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1210 return DAG.getNode(ExtOpc, DL, PVT, Op);
1214 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1216 return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1219 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1220 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1222 EVT OldVT = Op.getValueType();
1224 bool Replace = false;
1225 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1226 if (!NewOp.getNode())
1228 AddToWorklist(NewOp.getNode());
1231 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1232 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1233 DAG.getValueType(OldVT));
1236 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1237 EVT OldVT = Op.getValueType();
1239 bool Replace = false;
1240 SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1241 if (!NewOp.getNode())
1243 AddToWorklist(NewOp.getNode());
1246 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1247 return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1250 /// Promote the specified integer binary operation if the target indicates it is
1251 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1252 /// i32 since i16 instructions are longer.
1253 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1254 if (!LegalOperations)
1257 EVT VT = Op.getValueType();
1258 if (VT.isVector() || !VT.isInteger())
1261 // If operation type is 'undesirable', e.g. i16 on x86, consider
1263 unsigned Opc = Op.getOpcode();
1264 if (TLI.isTypeDesirableForOp(Opc, VT))
1268 // Consult target whether it is a good idea to promote this operation and
1269 // what's the right type to promote it to.
1270 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1271 assert(PVT != VT && "Don't know what type to promote to!");
1273 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1275 bool Replace0 = false;
1276 SDValue N0 = Op.getOperand(0);
1277 SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1279 bool Replace1 = false;
1280 SDValue N1 = Op.getOperand(1);
1281 SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1285 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1287 // We are always replacing N0/N1's use in N and only need additional
1288 // replacements if there are additional uses.
1289 // Note: We are checking uses of the *nodes* (SDNode) rather than values
1290 // (SDValue) here because the node may reference multiple values
1291 // (for example, the chain value of a load node).
1292 Replace0 &= !N0->hasOneUse();
1293 Replace1 &= (N0 != N1) && !N1->hasOneUse();
1295 // Combine Op here so it is preserved past replacements.
1296 CombineTo(Op.getNode(), RV);
1298 // If operands have a use ordering, make sure we deal with
1299 // predecessor first.
1300 if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1302 std::swap(NN0, NN1);
1306 AddToWorklist(NN0.getNode());
1307 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1310 AddToWorklist(NN1.getNode());
1311 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1318 /// Promote the specified integer shift operation if the target indicates it is
1319 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1320 /// i32 since i16 instructions are longer.
1321 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1322 if (!LegalOperations)
1325 EVT VT = Op.getValueType();
1326 if (VT.isVector() || !VT.isInteger())
1329 // If operation type is 'undesirable', e.g. i16 on x86, consider
1331 unsigned Opc = Op.getOpcode();
1332 if (TLI.isTypeDesirableForOp(Opc, VT))
1336 // Consult target whether it is a good idea to promote this operation and
1337 // what's the right type to promote it to.
1338 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1339 assert(PVT != VT && "Don't know what type to promote to!");
1341 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1343 bool Replace = false;
1344 SDValue N0 = Op.getOperand(0);
1345 SDValue N1 = Op.getOperand(1);
1346 if (Opc == ISD::SRA)
1347 N0 = SExtPromoteOperand(N0, PVT);
1348 else if (Opc == ISD::SRL)
1349 N0 = ZExtPromoteOperand(N0, PVT);
1351 N0 = PromoteOperand(N0, PVT, Replace);
1358 DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1361 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1363 // Deal with Op being deleted.
1364 if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1370 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1371 if (!LegalOperations)
1374 EVT VT = Op.getValueType();
1375 if (VT.isVector() || !VT.isInteger())
1378 // If operation type is 'undesirable', e.g. i16 on x86, consider
1380 unsigned Opc = Op.getOpcode();
1381 if (TLI.isTypeDesirableForOp(Opc, VT))
1385 // Consult target whether it is a good idea to promote this operation and
1386 // what's the right type to promote it to.
1387 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1388 assert(PVT != VT && "Don't know what type to promote to!");
1389 // fold (aext (aext x)) -> (aext x)
1390 // fold (aext (zext x)) -> (zext x)
1391 // fold (aext (sext x)) -> (sext x)
1392 LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1393 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1398 bool DAGCombiner::PromoteLoad(SDValue Op) {
1399 if (!LegalOperations)
1402 if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1405 EVT VT = Op.getValueType();
1406 if (VT.isVector() || !VT.isInteger())
1409 // If operation type is 'undesirable', e.g. i16 on x86, consider
1411 unsigned Opc = Op.getOpcode();
1412 if (TLI.isTypeDesirableForOp(Opc, VT))
1416 // Consult target whether it is a good idea to promote this operation and
1417 // what's the right type to promote it to.
1418 if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1419 assert(PVT != VT && "Don't know what type to promote to!");
1422 SDNode *N = Op.getNode();
1423 LoadSDNode *LD = cast<LoadSDNode>(N);
1424 EVT MemVT = LD->getMemoryVT();
1425 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1426 : LD->getExtensionType();
1427 SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1428 LD->getChain(), LD->getBasePtr(),
1429 MemVT, LD->getMemOperand());
1430 SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1432 LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1433 Result.getNode()->dump(&DAG); dbgs() << '\n');
1434 WorklistRemover DeadNodes(*this);
1435 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1436 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1437 deleteAndRecombine(N);
1438 AddToWorklist(Result.getNode());
1444 /// Recursively delete a node which has no uses and any operands for
1445 /// which it is the only use.
1447 /// Note that this both deletes the nodes and removes them from the worklist.
1448 /// It also adds any nodes who have had a user deleted to the worklist as they
1449 /// may now have only one use and subject to other combines.
1450 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1451 if (!N->use_empty())
1454 SmallSetVector<SDNode *, 16> Nodes;
1457 N = Nodes.pop_back_val();
1461 if (N->use_empty()) {
1462 for (const SDValue &ChildN : N->op_values())
1463 Nodes.insert(ChildN.getNode());
1465 removeFromWorklist(N);
1470 } while (!Nodes.empty());
1474 //===----------------------------------------------------------------------===//
1475 // Main DAG Combiner implementation
1476 //===----------------------------------------------------------------------===//
1478 void DAGCombiner::Run(CombineLevel AtLevel) {
1479 // set the instance variables, so that the various visit routines may use it.
1481 LegalDAG = Level >= AfterLegalizeDAG;
1482 LegalOperations = Level >= AfterLegalizeVectorOps;
1483 LegalTypes = Level >= AfterLegalizeTypes;
1485 WorklistInserter AddNodes(*this);
1487 // Add all the dag nodes to the worklist.
1488 for (SDNode &Node : DAG.allnodes())
1489 AddToWorklist(&Node);
1491 // Create a dummy node (which is not added to allnodes), that adds a reference
1492 // to the root node, preventing it from being deleted, and tracking any
1493 // changes of the root.
1494 HandleSDNode Dummy(DAG.getRoot());
1496 // While we have a valid worklist entry node, try to combine it.
1497 while (SDNode *N = getNextWorklistEntry()) {
1498 // If N has no uses, it is dead. Make sure to revisit all N's operands once
1499 // N is deleted from the DAG, since they too may now be dead or may have a
1500 // reduced number of uses, allowing other xforms.
1501 if (recursivelyDeleteUnusedNodes(N))
1504 WorklistRemover DeadNodes(*this);
1506 // If this combine is running after legalizing the DAG, re-legalize any
1507 // nodes pulled off the worklist.
1509 SmallSetVector<SDNode *, 16> UpdatedNodes;
1510 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1512 for (SDNode *LN : UpdatedNodes)
1513 AddToWorklistWithUsers(LN);
1519 LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1521 // Add any operands of the new node which have not yet been combined to the
1522 // worklist as well. Because the worklist uniques things already, this
1523 // won't repeatedly process the same operand.
1524 CombinedNodes.insert(N);
1525 for (const SDValue &ChildN : N->op_values())
1526 if (!CombinedNodes.count(ChildN.getNode()))
1527 AddToWorklist(ChildN.getNode());
1529 SDValue RV = combine(N);
1536 // If we get back the same node we passed in, rather than a new node or
1537 // zero, we know that the node must have defined multiple values and
1538 // CombineTo was used. Since CombineTo takes care of the worklist
1539 // mechanics for us, we have no work to do in this case.
1540 if (RV.getNode() == N)
1543 assert(N->getOpcode() != ISD::DELETED_NODE &&
1544 RV.getOpcode() != ISD::DELETED_NODE &&
1545 "Node was deleted but visit returned new node!");
1547 LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1549 if (N->getNumValues() == RV.getNode()->getNumValues())
1550 DAG.ReplaceAllUsesWith(N, RV.getNode());
1552 assert(N->getValueType(0) == RV.getValueType() &&
1553 N->getNumValues() == 1 && "Type mismatch");
1554 DAG.ReplaceAllUsesWith(N, &RV);
1557 // Push the new node and any users onto the worklist
1558 AddToWorklist(RV.getNode());
1559 AddUsersToWorklist(RV.getNode());
1561 // Finally, if the node is now dead, remove it from the graph. The node
1562 // may not be dead if the replacement process recursively simplified to
1563 // something else needing this node. This will also take care of adding any
1564 // operands which have lost a user to the worklist.
1565 recursivelyDeleteUnusedNodes(N);
1568 // If the root changed (e.g. it was a dead load, update the root).
1569 DAG.setRoot(Dummy.getValue());
1570 DAG.RemoveDeadNodes();
1573 SDValue DAGCombiner::visit(SDNode *N) {
1574 switch (N->getOpcode()) {
1576 case ISD::TokenFactor: return visitTokenFactor(N);
1577 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1578 case ISD::ADD: return visitADD(N);
1579 case ISD::SUB: return visitSUB(N);
1581 case ISD::UADDSAT: return visitADDSAT(N);
1583 case ISD::USUBSAT: return visitSUBSAT(N);
1584 case ISD::ADDC: return visitADDC(N);
1586 case ISD::UADDO: return visitADDO(N);
1587 case ISD::SUBC: return visitSUBC(N);
1589 case ISD::USUBO: return visitSUBO(N);
1590 case ISD::ADDE: return visitADDE(N);
1591 case ISD::ADDCARRY: return visitADDCARRY(N);
1592 case ISD::SUBE: return visitSUBE(N);
1593 case ISD::SUBCARRY: return visitSUBCARRY(N);
1595 case ISD::SMULFIXSAT:
1597 case ISD::UMULFIXSAT: return visitMULFIX(N);
1598 case ISD::MUL: return visitMUL(N);
1599 case ISD::SDIV: return visitSDIV(N);
1600 case ISD::UDIV: return visitUDIV(N);
1602 case ISD::UREM: return visitREM(N);
1603 case ISD::MULHU: return visitMULHU(N);
1604 case ISD::MULHS: return visitMULHS(N);
1605 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1606 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1608 case ISD::UMULO: return visitMULO(N);
1612 case ISD::UMAX: return visitIMINMAX(N);
1613 case ISD::AND: return visitAND(N);
1614 case ISD::OR: return visitOR(N);
1615 case ISD::XOR: return visitXOR(N);
1616 case ISD::SHL: return visitSHL(N);
1617 case ISD::SRA: return visitSRA(N);
1618 case ISD::SRL: return visitSRL(N);
1620 case ISD::ROTL: return visitRotate(N);
1622 case ISD::FSHR: return visitFunnelShift(N);
1623 case ISD::ABS: return visitABS(N);
1624 case ISD::BSWAP: return visitBSWAP(N);
1625 case ISD::BITREVERSE: return visitBITREVERSE(N);
1626 case ISD::CTLZ: return visitCTLZ(N);
1627 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1628 case ISD::CTTZ: return visitCTTZ(N);
1629 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1630 case ISD::CTPOP: return visitCTPOP(N);
1631 case ISD::SELECT: return visitSELECT(N);
1632 case ISD::VSELECT: return visitVSELECT(N);
1633 case ISD::SELECT_CC: return visitSELECT_CC(N);
1634 case ISD::SETCC: return visitSETCC(N);
1635 case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1636 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1637 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1638 case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1639 case ISD::AssertSext:
1640 case ISD::AssertZext: return visitAssertExt(N);
1641 case ISD::AssertAlign: return visitAssertAlign(N);
1642 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1643 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1644 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1645 case ISD::TRUNCATE: return visitTRUNCATE(N);
1646 case ISD::BITCAST: return visitBITCAST(N);
1647 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1648 case ISD::FADD: return visitFADD(N);
1649 case ISD::FSUB: return visitFSUB(N);
1650 case ISD::FMUL: return visitFMUL(N);
1651 case ISD::FMA: return visitFMA(N);
1652 case ISD::FDIV: return visitFDIV(N);
1653 case ISD::FREM: return visitFREM(N);
1654 case ISD::FSQRT: return visitFSQRT(N);
1655 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1656 case ISD::FPOW: return visitFPOW(N);
1657 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1658 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1659 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1660 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1661 case ISD::FP_ROUND: return visitFP_ROUND(N);
1662 case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1663 case ISD::FNEG: return visitFNEG(N);
1664 case ISD::FABS: return visitFABS(N);
1665 case ISD::FFLOOR: return visitFFLOOR(N);
1666 case ISD::FMINNUM: return visitFMINNUM(N);
1667 case ISD::FMAXNUM: return visitFMAXNUM(N);
1668 case ISD::FMINIMUM: return visitFMINIMUM(N);
1669 case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1670 case ISD::FCEIL: return visitFCEIL(N);
1671 case ISD::FTRUNC: return visitFTRUNC(N);
1672 case ISD::BRCOND: return visitBRCOND(N);
1673 case ISD::BR_CC: return visitBR_CC(N);
1674 case ISD::LOAD: return visitLOAD(N);
1675 case ISD::STORE: return visitSTORE(N);
1676 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1677 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1678 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1679 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1680 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1681 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1682 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1683 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1684 case ISD::MGATHER: return visitMGATHER(N);
1685 case ISD::MLOAD: return visitMLOAD(N);
1686 case ISD::MSCATTER: return visitMSCATTER(N);
1687 case ISD::MSTORE: return visitMSTORE(N);
1688 case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1689 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1690 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1691 case ISD::FREEZE: return visitFREEZE(N);
1692 case ISD::VECREDUCE_FADD:
1693 case ISD::VECREDUCE_FMUL:
1694 case ISD::VECREDUCE_ADD:
1695 case ISD::VECREDUCE_MUL:
1696 case ISD::VECREDUCE_AND:
1697 case ISD::VECREDUCE_OR:
1698 case ISD::VECREDUCE_XOR:
1699 case ISD::VECREDUCE_SMAX:
1700 case ISD::VECREDUCE_SMIN:
1701 case ISD::VECREDUCE_UMAX:
1702 case ISD::VECREDUCE_UMIN:
1703 case ISD::VECREDUCE_FMAX:
1704 case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1709 SDValue DAGCombiner::combine(SDNode *N) {
1711 if (!DisableGenericCombines)
1714 // If nothing happened, try a target-specific DAG combine.
1715 if (!RV.getNode()) {
1716 assert(N->getOpcode() != ISD::DELETED_NODE &&
1717 "Node was deleted but visit returned NULL!");
1719 if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1720 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1722 // Expose the DAG combiner to the target combiner impls.
1723 TargetLowering::DAGCombinerInfo
1724 DagCombineInfo(DAG, Level, false, this);
1726 RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1730 // If nothing happened still, try promoting the operation.
1731 if (!RV.getNode()) {
1732 switch (N->getOpcode()) {
1740 RV = PromoteIntBinOp(SDValue(N, 0));
1745 RV = PromoteIntShiftOp(SDValue(N, 0));
1747 case ISD::SIGN_EXTEND:
1748 case ISD::ZERO_EXTEND:
1749 case ISD::ANY_EXTEND:
1750 RV = PromoteExtend(SDValue(N, 0));
1753 if (PromoteLoad(SDValue(N, 0)))
1759 // If N is a commutative binary node, try to eliminate it if the commuted
1760 // version is already present in the DAG.
1761 if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1762 N->getNumValues() == 1) {
1763 SDValue N0 = N->getOperand(0);
1764 SDValue N1 = N->getOperand(1);
1766 // Constant operands are canonicalized to RHS.
1767 if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1768 SDValue Ops[] = {N1, N0};
1769 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1772 return SDValue(CSENode, 0);
1779 /// Given a node, return its input chain if it has one, otherwise return a null
1781 static SDValue getInputChainForNode(SDNode *N) {
1782 if (unsigned NumOps = N->getNumOperands()) {
1783 if (N->getOperand(0).getValueType() == MVT::Other)
1784 return N->getOperand(0);
1785 if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1786 return N->getOperand(NumOps-1);
1787 for (unsigned i = 1; i < NumOps-1; ++i)
1788 if (N->getOperand(i).getValueType() == MVT::Other)
1789 return N->getOperand(i);
1794 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1795 // If N has two operands, where one has an input chain equal to the other,
1796 // the 'other' chain is redundant.
1797 if (N->getNumOperands() == 2) {
1798 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1799 return N->getOperand(0);
1800 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1801 return N->getOperand(1);
1804 // Don't simplify token factors if optnone.
1805 if (OptLevel == CodeGenOpt::None)
1808 // If the sole user is a token factor, we should make sure we have a
1809 // chance to merge them together. This prevents TF chains from inhibiting
1811 if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1812 AddToWorklist(*(N->use_begin()));
1814 SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1815 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1816 SmallPtrSet<SDNode*, 16> SeenOps;
1817 bool Changed = false; // If we should replace this token factor.
1819 // Start out with this token factor.
1822 // Iterate through token factors. The TFs grows when new token factors are
1824 for (unsigned i = 0; i < TFs.size(); ++i) {
1825 // Limit number of nodes to inline, to avoid quadratic compile times.
1826 // We have to add the outstanding Token Factors to Ops, otherwise we might
1827 // drop Ops from the resulting Token Factors.
1828 if (Ops.size() > TokenFactorInlineLimit) {
1829 for (unsigned j = i; j < TFs.size(); j++)
1830 Ops.emplace_back(TFs[j], 0);
1831 // Drop unprocessed Token Factors from TFs, so we do not add them to the
1832 // combiner worklist later.
1837 SDNode *TF = TFs[i];
1838 // Check each of the operands.
1839 for (const SDValue &Op : TF->op_values()) {
1840 switch (Op.getOpcode()) {
1841 case ISD::EntryToken:
1842 // Entry tokens don't need to be added to the list. They are
1847 case ISD::TokenFactor:
1848 if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1849 // Queue up for processing.
1850 TFs.push_back(Op.getNode());
1857 // Only add if it isn't already in the list.
1858 if (SeenOps.insert(Op.getNode()).second)
1867 // Re-visit inlined Token Factors, to clean them up in case they have been
1868 // removed. Skip the first Token Factor, as this is the current node.
1869 for (unsigned i = 1, e = TFs.size(); i < e; i++)
1870 AddToWorklist(TFs[i]);
1872 // Remove Nodes that are chained to another node in the list. Do so
1873 // by walking up chains breath-first stopping when we've seen
1874 // another operand. In general we must climb to the EntryNode, but we can exit
1875 // early if we find all remaining work is associated with just one operand as
1876 // no further pruning is possible.
1878 // List of nodes to search through and original Ops from which they originate.
1879 SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1880 SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1881 SmallPtrSet<SDNode *, 16> SeenChains;
1882 bool DidPruneOps = false;
1884 unsigned NumLeftToConsider = 0;
1885 for (const SDValue &Op : Ops) {
1886 Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1887 OpWorkCount.push_back(1);
1890 auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1891 // If this is an Op, we can remove the op from the list. Remark any
1892 // search associated with it as from the current OpNumber.
1893 if (SeenOps.count(Op) != 0) {
1896 unsigned OrigOpNumber = 0;
1897 while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1899 assert((OrigOpNumber != Ops.size()) &&
1900 "expected to find TokenFactor Operand");
1901 // Re-mark worklist from OrigOpNumber to OpNumber
1902 for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1903 if (Worklist[i].second == OrigOpNumber) {
1904 Worklist[i].second = OpNumber;
1907 OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1908 OpWorkCount[OrigOpNumber] = 0;
1909 NumLeftToConsider--;
1911 // Add if it's a new chain
1912 if (SeenChains.insert(Op).second) {
1913 OpWorkCount[OpNumber]++;
1914 Worklist.push_back(std::make_pair(Op, OpNumber));
1918 for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1919 // We need at least be consider at least 2 Ops to prune.
1920 if (NumLeftToConsider <= 1)
1922 auto CurNode = Worklist[i].first;
1923 auto CurOpNumber = Worklist[i].second;
1924 assert((OpWorkCount[CurOpNumber] > 0) &&
1925 "Node should not appear in worklist");
1926 switch (CurNode->getOpcode()) {
1927 case ISD::EntryToken:
1928 // Hitting EntryToken is the only way for the search to terminate without
1930 // another operand's search. Prevent us from marking this operand
1932 NumLeftToConsider++;
1934 case ISD::TokenFactor:
1935 for (const SDValue &Op : CurNode->op_values())
1936 AddToWorklist(i, Op.getNode(), CurOpNumber);
1938 case ISD::LIFETIME_START:
1939 case ISD::LIFETIME_END:
1940 case ISD::CopyFromReg:
1941 case ISD::CopyToReg:
1942 AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1945 if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1946 AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1949 OpWorkCount[CurOpNumber]--;
1950 if (OpWorkCount[CurOpNumber] == 0)
1951 NumLeftToConsider--;
1954 // If we've changed things around then replace token factor.
1958 // The entry token is the only possible outcome.
1959 Result = DAG.getEntryNode();
1962 SmallVector<SDValue, 8> PrunedOps;
1964 for (const SDValue &Op : Ops) {
1965 if (SeenChains.count(Op.getNode()) == 0)
1966 PrunedOps.push_back(Op);
1968 Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1970 Result = DAG.getTokenFactor(SDLoc(N), Ops);
1978 /// MERGE_VALUES can always be eliminated.
1979 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1980 WorklistRemover DeadNodes(*this);
1981 // Replacing results may cause a different MERGE_VALUES to suddenly
1982 // be CSE'd with N, and carry its uses with it. Iterate until no
1983 // uses remain, to ensure that the node can be safely deleted.
1984 // First add the users of this node to the work list so that they
1985 // can be tried again once they have new operands.
1986 AddUsersToWorklist(N);
1988 // Do as a single replacement to avoid rewalking use lists.
1989 SmallVector<SDValue, 8> Ops;
1990 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1991 Ops.push_back(N->getOperand(i));
1992 DAG.ReplaceAllUsesWith(N, Ops.data());
1993 } while (!N->use_empty());
1994 deleteAndRecombine(N);
1995 return SDValue(N, 0); // Return N so it doesn't get rechecked!
1998 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1999 /// ConstantSDNode pointer else nullptr.
2000 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2001 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2002 return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2005 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2006 assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2007 "Unexpected binary operator");
2009 // Don't do this unless the old select is going away. We want to eliminate the
2010 // binary operator, not replace a binop with a select.
2011 // TODO: Handle ISD::SELECT_CC.
2012 unsigned SelOpNo = 0;
2013 SDValue Sel = BO->getOperand(0);
2014 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2016 Sel = BO->getOperand(1);
2019 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2022 SDValue CT = Sel.getOperand(1);
2023 if (!isConstantOrConstantVector(CT, true) &&
2024 !isConstantFPBuildVectorOrConstantFP(CT))
2027 SDValue CF = Sel.getOperand(2);
2028 if (!isConstantOrConstantVector(CF, true) &&
2029 !isConstantFPBuildVectorOrConstantFP(CF))
2032 // Bail out if any constants are opaque because we can't constant fold those.
2033 // The exception is "and" and "or" with either 0 or -1 in which case we can
2034 // propagate non constant operands into select. I.e.:
2035 // and (select Cond, 0, -1), X --> select Cond, 0, X
2036 // or X, (select Cond, -1, 0) --> select Cond, -1, X
2037 auto BinOpcode = BO->getOpcode();
2038 bool CanFoldNonConst =
2039 (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2040 (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2041 (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2043 SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2044 if (!CanFoldNonConst &&
2045 !isConstantOrConstantVector(CBO, true) &&
2046 !isConstantFPBuildVectorOrConstantFP(CBO))
2049 EVT VT = Sel.getValueType();
2051 // In case of shift value and shift amount may have different VT. For instance
2052 // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2053 // swapped operands and value types do not match. NB: x86 is fine if operands
2054 // are not swapped with shift amount VT being not bigger than shifted value.
2055 // TODO: that is possible to check for a shift operation, correct VTs and
2056 // still perform optimization on x86 if needed.
2057 if (SelOpNo && VT != CBO.getValueType())
2060 // We have a select-of-constants followed by a binary operator with a
2061 // constant. Eliminate the binop by pulling the constant math into the select.
2062 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2064 SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2065 : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2066 if (!CanFoldNonConst && !NewCT.isUndef() &&
2067 !isConstantOrConstantVector(NewCT, true) &&
2068 !isConstantFPBuildVectorOrConstantFP(NewCT))
2071 SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2072 : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2073 if (!CanFoldNonConst && !NewCF.isUndef() &&
2074 !isConstantOrConstantVector(NewCF, true) &&
2075 !isConstantFPBuildVectorOrConstantFP(NewCF))
2078 SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2079 SelectOp->setFlags(BO->getFlags());
2083 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2084 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2085 "Expecting add or sub");
2087 // Match a constant operand and a zext operand for the math instruction:
2090 bool IsAdd = N->getOpcode() == ISD::ADD;
2091 SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2092 SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2093 auto *CN = dyn_cast<ConstantSDNode>(C);
2094 if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2097 // Match the zext operand as a setcc of a boolean.
2098 if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2099 Z.getOperand(0).getValueType() != MVT::i1)
2102 // Match the compare as: setcc (X & 1), 0, eq.
2103 SDValue SetCC = Z.getOperand(0);
2104 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2105 if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2106 SetCC.getOperand(0).getOpcode() != ISD::AND ||
2107 !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2110 // We are adding/subtracting a constant and an inverted low bit. Turn that
2111 // into a subtract/add of the low bit with incremented/decremented constant:
2112 // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2113 // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2114 EVT VT = C.getValueType();
2116 SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2117 SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2118 DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2119 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2122 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2123 /// a shift and add with a different constant.
2124 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2125 assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2126 "Expecting add or sub");
2128 // We need a constant operand for the add/sub, and the other operand is a
2129 // logical shift right: add (srl), C or sub C, (srl).
2130 bool IsAdd = N->getOpcode() == ISD::ADD;
2131 SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2132 SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2133 if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2134 ShiftOp.getOpcode() != ISD::SRL)
2137 // The shift must be of a 'not' value.
2138 SDValue Not = ShiftOp.getOperand(0);
2139 if (!Not.hasOneUse() || !isBitwiseNot(Not))
2142 // The shift must be moving the sign bit to the least-significant-bit.
2143 EVT VT = ShiftOp.getValueType();
2144 SDValue ShAmt = ShiftOp.getOperand(1);
2145 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2146 if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2149 // Eliminate the 'not' by adjusting the shift and add/sub constant:
2150 // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2151 // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2153 auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2154 SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2156 DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2157 {ConstantOp, DAG.getConstant(1, DL, VT)}))
2158 return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2162 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2163 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2164 /// are no common bits set in the operands).
2165 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2166 SDValue N0 = N->getOperand(0);
2167 SDValue N1 = N->getOperand(1);
2168 EVT VT = N0.getValueType();
2172 if (VT.isVector()) {
2173 if (SDValue FoldedVOp = SimplifyVBinOp(N))
2176 // fold (add x, 0) -> x, vector edition
2177 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2179 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2183 // fold (add x, undef) -> undef
2190 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2191 // canonicalize constant to RHS
2192 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2193 return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2194 // fold (add c1, c2) -> c1+c2
2195 return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2198 // fold (add x, 0) -> x
2199 if (isNullConstant(N1))
2202 if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2203 // fold ((A-c1)+c2) -> (A+(c2-c1))
2204 if (N0.getOpcode() == ISD::SUB &&
2205 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2207 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2208 assert(Sub && "Constant folding failed");
2209 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2212 // fold ((c1-A)+c2) -> (c1+c2)-A
2213 if (N0.getOpcode() == ISD::SUB &&
2214 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2216 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2217 assert(Add && "Constant folding failed");
2218 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2221 // add (sext i1 X), 1 -> zext (not i1 X)
2222 // We don't transform this pattern:
2223 // add (zext i1 X), -1 -> sext (not i1 X)
2224 // because most (?) targets generate better code for the zext form.
2225 if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2226 isOneOrOneSplat(N1)) {
2227 SDValue X = N0.getOperand(0);
2228 if ((!LegalOperations ||
2229 (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2230 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2231 X.getScalarValueSizeInBits() == 1) {
2232 SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2233 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2237 // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2238 // equivalent to (add x, c0).
2239 if (N0.getOpcode() == ISD::OR &&
2240 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2241 DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2242 if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2243 {N1, N0.getOperand(1)}))
2244 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2248 if (SDValue NewSel = foldBinOpIntoSelect(N))
2252 if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2253 if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2256 // fold ((0-A) + B) -> B-A
2257 if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2258 return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2260 // fold (A + (0-B)) -> A-B
2261 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2262 return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2264 // fold (A+(B-A)) -> B
2265 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2266 return N1.getOperand(0);
2268 // fold ((B-A)+A) -> B
2269 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2270 return N0.getOperand(0);
2272 // fold ((A-B)+(C-A)) -> (C-B)
2273 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2274 N0.getOperand(0) == N1.getOperand(1))
2275 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2278 // fold ((A-B)+(B-C)) -> (A-C)
2279 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2280 N0.getOperand(1) == N1.getOperand(0))
2281 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2284 // fold (A+(B-(A+C))) to (B-C)
2285 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2286 N0 == N1.getOperand(1).getOperand(0))
2287 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2288 N1.getOperand(1).getOperand(1));
2290 // fold (A+(B-(C+A))) to (B-C)
2291 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2292 N0 == N1.getOperand(1).getOperand(1))
2293 return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2294 N1.getOperand(1).getOperand(0));
2296 // fold (A+((B-A)+or-C)) to (B+or-C)
2297 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2298 N1.getOperand(0).getOpcode() == ISD::SUB &&
2299 N0 == N1.getOperand(0).getOperand(1))
2300 return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2303 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2304 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2305 SDValue N00 = N0.getOperand(0);
2306 SDValue N01 = N0.getOperand(1);
2307 SDValue N10 = N1.getOperand(0);
2308 SDValue N11 = N1.getOperand(1);
2310 if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2311 return DAG.getNode(ISD::SUB, DL, VT,
2312 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2313 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2316 // fold (add (umax X, C), -C) --> (usubsat X, C)
2317 if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2318 auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2319 return (!Max && !Op) ||
2320 (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2322 if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2323 /*AllowUndefs*/ true))
2324 return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2328 if (SimplifyDemandedBits(SDValue(N, 0)))
2329 return SDValue(N, 0);
2331 if (isOneOrOneSplat(N1)) {
2332 // fold (add (xor a, -1), 1) -> (sub 0, a)
2333 if (isBitwiseNot(N0))
2334 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2337 // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2338 if (N0.getOpcode() == ISD::ADD ||
2339 N0.getOpcode() == ISD::UADDO ||
2340 N0.getOpcode() == ISD::SADDO) {
2343 if (isBitwiseNot(N0.getOperand(0))) {
2344 A = N0.getOperand(1);
2345 Xor = N0.getOperand(0);
2346 } else if (isBitwiseNot(N0.getOperand(1))) {
2347 A = N0.getOperand(0);
2348 Xor = N0.getOperand(1);
2352 return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2356 // add (add x, y), 1
2357 // And if the target does not like this form then turn into:
2358 // sub y, (xor x, -1)
2359 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2360 N0.getOpcode() == ISD::ADD) {
2361 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2362 DAG.getAllOnesConstant(DL, VT));
2363 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2367 // (x - y) + -1 -> add (xor y, -1), x
2368 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2369 isAllOnesOrAllOnesSplat(N1)) {
2370 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2371 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2374 if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2377 if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2383 SDValue DAGCombiner::visitADD(SDNode *N) {
2384 SDValue N0 = N->getOperand(0);
2385 SDValue N1 = N->getOperand(1);
2386 EVT VT = N0.getValueType();
2389 if (SDValue Combined = visitADDLike(N))
2392 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2395 if (SDValue V = foldAddSubOfSignBit(N, DAG))
2398 // fold (a+b) -> (a|b) iff a and b share no bits.
2399 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2400 DAG.haveNoCommonBitsSet(N0, N1))
2401 return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2403 // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2404 if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2405 APInt C0 = N0->getConstantOperandAPInt(0);
2406 APInt C1 = N1->getConstantOperandAPInt(0);
2407 return DAG.getVScale(DL, VT, C0 + C1);
2410 // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2411 if ((N0.getOpcode() == ISD::ADD) &&
2412 (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2413 (N1.getOpcode() == ISD::VSCALE)) {
2414 auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2415 auto VS1 = N1->getConstantOperandAPInt(0);
2416 auto VS = DAG.getVScale(DL, VT, VS0 + VS1);
2417 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2423 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2424 unsigned Opcode = N->getOpcode();
2425 SDValue N0 = N->getOperand(0);
2426 SDValue N1 = N->getOperand(1);
2427 EVT VT = N0.getValueType();
2431 if (VT.isVector()) {
2432 // TODO SimplifyVBinOp
2434 // fold (add_sat x, 0) -> x, vector edition
2435 if (ISD::isBuildVectorAllZeros(N1.getNode()))
2437 if (ISD::isBuildVectorAllZeros(N0.getNode()))
2441 // fold (add_sat x, undef) -> -1
2442 if (N0.isUndef() || N1.isUndef())
2443 return DAG.getAllOnesConstant(DL, VT);
2445 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2446 // canonicalize constant to RHS
2447 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2448 return DAG.getNode(Opcode, DL, VT, N1, N0);
2449 // fold (add_sat c1, c2) -> c3
2450 return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2453 // fold (add_sat x, 0) -> x
2454 if (isNullConstant(N1))
2457 // If it cannot overflow, transform into an add.
2458 if (Opcode == ISD::UADDSAT)
2459 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2460 return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2465 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2466 bool Masked = false;
2468 // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2470 if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2471 V = V.getOperand(0);
2475 if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2477 V = V.getOperand(0);
2484 // If this is not a carry, return.
2485 if (V.getResNo() != 1)
2488 if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2489 V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2492 EVT VT = V.getNode()->getValueType(0);
2493 if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2496 // If the result is masked, then no matter what kind of bool it is we can
2497 // return. If it isn't, then we need to make sure the bool type is either 0 or
2498 // 1 and not other values.
2500 TLI.getBooleanContents(V.getValueType()) ==
2501 TargetLoweringBase::ZeroOrOneBooleanContent)
2507 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2508 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2509 /// the opcode and bypass the mask operation.
2510 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2511 SelectionDAG &DAG, const SDLoc &DL) {
2512 if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2515 EVT VT = N0.getValueType();
2516 if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2519 // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2520 // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2521 return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2524 /// Helper for doing combines based on N0 and N1 being added to each other.
2525 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2526 SDNode *LocReference) {
2527 EVT VT = N0.getValueType();
2528 SDLoc DL(LocReference);
2530 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2531 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2532 isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2533 return DAG.getNode(ISD::SUB, DL, VT, N0,
2534 DAG.getNode(ISD::SHL, DL, VT,
2535 N1.getOperand(0).getOperand(1),
2538 if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2542 // add (add x, 1), y
2543 // And if the target does not like this form then turn into:
2544 // sub y, (xor x, -1)
2545 if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2546 N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2547 SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2548 DAG.getAllOnesConstant(DL, VT));
2549 return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2552 // Hoist one-use subtraction by non-opaque constant:
2553 // (x - C) + y -> (x + y) - C
2554 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2555 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2556 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2557 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2558 return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2560 // Hoist one-use subtraction from non-opaque constant:
2561 // (C - x) + y -> (y - x) + C
2562 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2563 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2564 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2565 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2568 // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2569 // rather than 'add 0/-1' (the zext should get folded).
2570 // add (sext i1 Y), X --> sub X, (zext i1 Y)
2571 if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2572 N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2573 TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2574 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2575 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2578 // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2579 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2580 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2581 if (TN->getVT() == MVT::i1) {
2582 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2583 DAG.getConstant(1, DL, VT));
2584 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2588 // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2589 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2591 return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2592 N0, N1.getOperand(0), N1.getOperand(2));
2594 // (add X, Carry) -> (addcarry X, 0, Carry)
2595 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2596 if (SDValue Carry = getAsCarry(TLI, N1))
2597 return DAG.getNode(ISD::ADDCARRY, DL,
2598 DAG.getVTList(VT, Carry.getValueType()), N0,
2599 DAG.getConstant(0, DL, VT), Carry);
2604 SDValue DAGCombiner::visitADDC(SDNode *N) {
2605 SDValue N0 = N->getOperand(0);
2606 SDValue N1 = N->getOperand(1);
2607 EVT VT = N0.getValueType();
2610 // If the flag result is dead, turn this into an ADD.
2611 if (!N->hasAnyUseOfValue(1))
2612 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2613 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2615 // canonicalize constant to RHS.
2616 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2617 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2619 return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2621 // fold (addc x, 0) -> x + no carry out
2622 if (isNullConstant(N1))
2623 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2626 // If it cannot overflow, transform into an add.
2627 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2628 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2629 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2634 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2635 SelectionDAG &DAG, const TargetLowering &TLI) {
2636 EVT VT = V.getValueType();
2639 switch (TLI.getBooleanContents(VT)) {
2640 case TargetLowering::ZeroOrOneBooleanContent:
2641 case TargetLowering::UndefinedBooleanContent:
2642 Cst = DAG.getConstant(1, DL, VT);
2644 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2645 Cst = DAG.getAllOnesConstant(DL, VT);
2649 return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2653 * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2654 * then the flip also occurs if computing the inverse is the same cost.
2655 * This function returns an empty SDValue in case it cannot flip the boolean
2656 * without increasing the cost of the computation. If you want to flip a boolean
2657 * no matter what, use flipBoolean.
2659 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2660 const TargetLowering &TLI,
2662 if (Force && isa<ConstantSDNode>(V))
2663 return flipBoolean(V, SDLoc(V), DAG, TLI);
2665 if (V.getOpcode() != ISD::XOR)
2668 ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2672 EVT VT = V.getValueType();
2674 bool IsFlip = false;
2675 switch(TLI.getBooleanContents(VT)) {
2676 case TargetLowering::ZeroOrOneBooleanContent:
2677 IsFlip = Const->isOne();
2679 case TargetLowering::ZeroOrNegativeOneBooleanContent:
2680 IsFlip = Const->isAllOnesValue();
2682 case TargetLowering::UndefinedBooleanContent:
2683 IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2688 return V.getOperand(0);
2690 return flipBoolean(V, SDLoc(V), DAG, TLI);
2694 SDValue DAGCombiner::visitADDO(SDNode *N) {
2695 SDValue N0 = N->getOperand(0);
2696 SDValue N1 = N->getOperand(1);
2697 EVT VT = N0.getValueType();
2698 bool IsSigned = (ISD::SADDO == N->getOpcode());
2700 EVT CarryVT = N->getValueType(1);
2703 // If the flag result is dead, turn this into an ADD.
2704 if (!N->hasAnyUseOfValue(1))
2705 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2706 DAG.getUNDEF(CarryVT));
2708 // canonicalize constant to RHS.
2709 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2710 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2711 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2713 // fold (addo x, 0) -> x + no carry out
2714 if (isNullOrNullSplat(N1))
2715 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2718 // If it cannot overflow, transform into an add.
2719 if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2720 return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2721 DAG.getConstant(0, DL, CarryVT));
2723 // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2724 if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2725 SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2726 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2727 return CombineTo(N, Sub,
2728 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2731 if (SDValue Combined = visitUADDOLike(N0, N1, N))
2734 if (SDValue Combined = visitUADDOLike(N1, N0, N))
2741 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2742 EVT VT = N0.getValueType();
2746 // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2747 // If Y + 1 cannot overflow.
2748 if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2749 SDValue Y = N1.getOperand(0);
2750 SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2751 if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2752 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2756 // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2757 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2758 if (SDValue Carry = getAsCarry(TLI, N1))
2759 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2760 DAG.getConstant(0, SDLoc(N), VT), Carry);
2765 SDValue DAGCombiner::visitADDE(SDNode *N) {
2766 SDValue N0 = N->getOperand(0);
2767 SDValue N1 = N->getOperand(1);
2768 SDValue CarryIn = N->getOperand(2);
2770 // canonicalize constant to RHS
2771 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2772 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2774 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2777 // fold (adde x, y, false) -> (addc x, y)
2778 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2779 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2784 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2785 SDValue N0 = N->getOperand(0);
2786 SDValue N1 = N->getOperand(1);
2787 SDValue CarryIn = N->getOperand(2);
2790 // canonicalize constant to RHS
2791 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2792 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2794 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2796 // fold (addcarry x, y, false) -> (uaddo x, y)
2797 if (isNullConstant(CarryIn)) {
2798 if (!LegalOperations ||
2799 TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2800 return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2803 // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2804 if (isNullConstant(N0) && isNullConstant(N1)) {
2805 EVT VT = N0.getValueType();
2806 EVT CarryVT = CarryIn.getValueType();
2807 SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2808 AddToWorklist(CarryExt.getNode());
2809 return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2810 DAG.getConstant(1, DL, VT)),
2811 DAG.getConstant(0, DL, CarryVT));
2814 if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2817 if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2824 * If we are facing some sort of diamond carry propapagtion pattern try to
2825 * break it up to generate something like:
2826 * (addcarry X, 0, (addcarry A, B, Z):Carry)
2828 * The end result is usually an increase in operation required, but because the
2829 * carry is now linearized, other tranforms can kick in and optimize the DAG.
2831 * Patterns typically look something like
2836 * | (addcarry *, 0, Z)
2840 * (addcarry X, *, *)
2842 * But numerous variation exist. Our goal is to identify A, B, X and Z and
2843 * produce a combine with a single path for carry propagation.
2845 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2846 SDValue X, SDValue Carry0, SDValue Carry1,
2848 if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2850 if (Carry1.getOpcode() != ISD::UADDO)
2856 * First look for a suitable Z. It will present itself in the form of
2857 * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2859 if (Carry0.getOpcode() == ISD::ADDCARRY &&
2860 isNullConstant(Carry0.getOperand(1))) {
2861 Z = Carry0.getOperand(2);
2862 } else if (Carry0.getOpcode() == ISD::UADDO &&
2863 isOneConstant(Carry0.getOperand(1))) {
2864 EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2865 Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2867 // We couldn't find a suitable Z.
2872 auto cancelDiamond = [&](SDValue A,SDValue B) {
2874 SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2875 Combiner.AddToWorklist(NewY.getNode());
2876 return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2877 DAG.getConstant(0, DL, X.getValueType()),
2886 * (addcarry *, 0, Z)
2888 if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2889 return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2893 * (addcarry A, 0, Z)
2899 if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2900 return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2903 if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2904 return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2910 // If we are facing some sort of diamond carry/borrow in/out pattern try to
2911 // match patterns like:
2913 // (uaddo A, B) CarryIn
2916 // PartialSum PartialCarryOutX /
2918 // | ____|____________/
2920 // (uaddo *, *) \________
2923 // | PartialCarryOutY |
2926 // AddCarrySum | ______/
2928 // CarryOut = (or *, *)
2930 // And generate ADDCARRY (or SUBCARRY) with two result values:
2932 // {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
2934 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
2935 // a single path for carry/borrow out propagation:
2936 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2937 const TargetLowering &TLI, SDValue Carry0,
2938 SDValue Carry1, SDNode *N) {
2939 if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
2941 unsigned Opcode = Carry0.getOpcode();
2942 if (Opcode != Carry1.getOpcode())
2944 if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
2947 // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
2948 // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
2949 // the above ASCII art.)
2950 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2951 Carry1.getOperand(1) != Carry0.getValue(0))
2952 std::swap(Carry0, Carry1);
2953 if (Carry1.getOperand(0) != Carry0.getValue(0) &&
2954 Carry1.getOperand(1) != Carry0.getValue(0))
2957 // The carry in value must be on the righthand side for subtraction.
2958 unsigned CarryInOperandNum =
2959 Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
2960 if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
2962 SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
2964 unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
2965 if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
2968 // Verify that the carry/borrow in is plausibly a carry/borrow bit.
2969 // TODO: make getAsCarry() aware of how partial carries are merged.
2970 if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
2972 CarryIn = CarryIn.getOperand(0);
2973 if (CarryIn.getValueType() != MVT::i1)
2978 DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
2979 Carry0.getOperand(1), CarryIn);
2981 // Please note that because we have proven that the result of the UADDO/USUBO
2982 // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
2983 // therefore prove that if the first UADDO/USUBO overflows, the second
2984 // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
2987 // 0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
2988 // 0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
2990 // This is important because it means that OR and XOR can be used to merge
2991 // carry flags; and that AND can return a constant zero.
2993 // TODO: match other operations that can merge flags (ADD, etc)
2994 DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
2995 if (N->getOpcode() == ISD::AND)
2996 return DAG.getConstant(0, DL, MVT::i1);
2997 return Merged.getValue(1);
3000 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3002 // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3003 if (isBitwiseNot(N0))
3004 if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3006 SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3007 N0.getOperand(0), NotC);
3008 return CombineTo(N, Sub,
3009 flipBoolean(Sub.getValue(1), DL, DAG, TLI));
3012 // Iff the flag result is dead:
3013 // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3014 // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3015 // or the dependency between the instructions.
3016 if ((N0.getOpcode() == ISD::ADD ||
3017 (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3018 N0.getValue(1) != CarryIn)) &&
3019 isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3020 return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3021 N0.getOperand(0), N0.getOperand(1), CarryIn);
3024 * When one of the addcarry argument is itself a carry, we may be facing
3025 * a diamond carry propagation. In which case we try to transform the DAG
3026 * to ensure linear carry propagation if that is possible.
3028 if (auto Y = getAsCarry(TLI, N1)) {
3029 // Because both are carries, Y and Z can be swapped.
3030 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3032 if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3039 // Since it may not be valid to emit a fold to zero for vector initializers
3040 // check if we can before folding.
3041 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3042 SelectionDAG &DAG, bool LegalOperations) {
3044 return DAG.getConstant(0, DL, VT);
3045 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3046 return DAG.getConstant(0, DL, VT);
3050 SDValue DAGCombiner::visitSUB(SDNode *N) {
3051 SDValue N0 = N->getOperand(0);
3052 SDValue N1 = N->getOperand(1);
3053 EVT VT = N0.getValueType();
3057 if (VT.isVector()) {
3058 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3061 // fold (sub x, 0) -> x, vector edition
3062 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3066 // fold (sub x, x) -> 0
3067 // FIXME: Refactor this and xor and other similar operations together.
3069 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3071 // fold (sub c1, c2) -> c3
3072 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3075 if (SDValue NewSel = foldBinOpIntoSelect(N))
3078 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3080 // fold (sub x, c) -> (add x, -c)
3082 return DAG.getNode(ISD::ADD, DL, VT, N0,
3083 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3086 if (isNullOrNullSplat(N0)) {
3087 unsigned BitWidth = VT.getScalarSizeInBits();
3088 // Right-shifting everything out but the sign bit followed by negation is
3089 // the same as flipping arithmetic/logical shift type without the negation:
3090 // -(X >>u 31) -> (X >>s 31)
3091 // -(X >>s 31) -> (X >>u 31)
3092 if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3093 ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3094 if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3095 auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3096 if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3097 return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3101 // 0 - X --> 0 if the sub is NUW.
3102 if (N->getFlags().hasNoUnsignedWrap())
3105 if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3106 // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3107 // N1 must be 0 because negating the minimum signed value is undefined.
3108 if (N->getFlags().hasNoSignedWrap())
3111 // 0 - X --> X if X is 0 or the minimum signed value.
3116 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3117 if (isAllOnesOrAllOnesSplat(N0))
3118 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3120 // fold (A - (0-B)) -> A+B
3121 if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3122 return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3124 // fold A-(A-B) -> B
3125 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3126 return N1.getOperand(1);
3128 // fold (A+B)-A -> B
3129 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3130 return N0.getOperand(1);
3132 // fold (A+B)-B -> A
3133 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3134 return N0.getOperand(0);
3136 // fold (A+C1)-C2 -> A+(C1-C2)
3137 if (N0.getOpcode() == ISD::ADD &&
3138 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3139 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3141 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3142 assert(NewC && "Constant folding failed");
3143 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3146 // fold C2-(A+C1) -> (C2-C1)-A
3147 if (N1.getOpcode() == ISD::ADD) {
3148 SDValue N11 = N1.getOperand(1);
3149 if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3150 isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3151 SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3152 assert(NewC && "Constant folding failed");
3153 return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3157 // fold (A-C1)-C2 -> A-(C1+C2)
3158 if (N0.getOpcode() == ISD::SUB &&
3159 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3160 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3162 DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3163 assert(NewC && "Constant folding failed");
3164 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3167 // fold (c1-A)-c2 -> (c1-c2)-A
3168 if (N0.getOpcode() == ISD::SUB &&
3169 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3170 isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3172 DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3173 assert(NewC && "Constant folding failed");
3174 return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3177 // fold ((A+(B+or-C))-B) -> A+or-C
3178 if (N0.getOpcode() == ISD::ADD &&
3179 (N0.getOperand(1).getOpcode() == ISD::SUB ||
3180 N0.getOperand(1).getOpcode() == ISD::ADD) &&
3181 N0.getOperand(1).getOperand(0) == N1)
3182 return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3183 N0.getOperand(1).getOperand(1));
3185 // fold ((A+(C+B))-B) -> A+C
3186 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3187 N0.getOperand(1).getOperand(1) == N1)
3188 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3189 N0.getOperand(1).getOperand(0));
3191 // fold ((A-(B-C))-C) -> A-B
3192 if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3193 N0.getOperand(1).getOperand(1) == N1)
3194 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3195 N0.getOperand(1).getOperand(0));
3197 // fold (A-(B-C)) -> A+(C-B)
3198 if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3199 return DAG.getNode(ISD::ADD, DL, VT, N0,
3200 DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3203 // A - (A & B) -> A & (~B)
3204 if (N1.getOpcode() == ISD::AND) {
3205 SDValue A = N1.getOperand(0);
3206 SDValue B = N1.getOperand(1);
3210 (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3212 DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3213 return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3217 // fold (X - (-Y * Z)) -> (X + (Y * Z))
3218 if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3219 if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3220 isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3221 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3222 N1.getOperand(0).getOperand(1),
3224 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3226 if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3227 isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3228 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3230 N1.getOperand(1).getOperand(1));
3231 return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3235 // If either operand of a sub is undef, the result is undef
3241 if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3244 if (SDValue V = foldAddSubOfSignBit(N, DAG))
3247 if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3250 // (x - y) - 1 -> add (xor y, -1), x
3251 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3252 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3253 DAG.getAllOnesConstant(DL, VT));
3254 return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3258 // sub y, (xor x, -1)
3259 // And if the target does not like this form then turn into:
3260 // add (add x, y), 1
3261 if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3262 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3263 return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3266 // Hoist one-use addition by non-opaque constant:
3267 // (x + C) - y -> (x - y) + C
3268 if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3269 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3270 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3271 return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3273 // y - (x + C) -> (y - x) - C
3274 if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3275 isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3276 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3277 return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3279 // (x - C) - y -> (x - y) - C
3280 // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3281 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3282 isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3283 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3284 return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3286 // (C - x) - y -> C - (x + y)
3287 if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3288 isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3289 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3290 return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3293 // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3294 // rather than 'sub 0/1' (the sext should get folded).
3295 // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3296 if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3297 N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3298 TLI.getBooleanContents(VT) ==
3299 TargetLowering::ZeroOrNegativeOneBooleanContent) {
3300 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3301 return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3304 // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3305 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3306 if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3307 SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3308 SDValue S0 = N1.getOperand(0);
3309 if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3310 unsigned OpSizeInBits = VT.getScalarSizeInBits();
3311 if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3312 if (C->getAPIntValue() == (OpSizeInBits - 1))
3313 return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3318 // If the relocation model supports it, consider symbol offsets.
3319 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3320 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3321 // fold (sub Sym, c) -> Sym-c
3322 if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3323 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3325 (uint64_t)N1C->getSExtValue());
3326 // fold (sub Sym+c1, Sym+c2) -> c1-c2
3327 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3328 if (GA->getGlobal() == GB->getGlobal())
3329 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3333 // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3334 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3335 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3336 if (TN->getVT() == MVT::i1) {
3337 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3338 DAG.getConstant(1, DL, VT));
3339 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3343 // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3344 if (N1.getOpcode() == ISD::VSCALE) {
3345 APInt IntVal = N1.getConstantOperandAPInt(0);
3346 return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3349 // Prefer an add for more folding potential and possibly better codegen:
3350 // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3351 if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3352 SDValue ShAmt = N1.getOperand(1);
3353 ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3355 ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3356 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3357 return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3361 if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3362 // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
3363 if (SDValue Carry = getAsCarry(TLI, N0)) {
3365 SDValue Zero = DAG.getConstant(0, DL, VT);
3366 SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3367 return DAG.getNode(ISD::ADDCARRY, DL,
3368 DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3376 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3377 SDValue N0 = N->getOperand(0);
3378 SDValue N1 = N->getOperand(1);
3379 EVT VT = N0.getValueType();
3383 if (VT.isVector()) {
3384 // TODO SimplifyVBinOp
3386 // fold (sub_sat x, 0) -> x, vector edition
3387 if (ISD::isBuildVectorAllZeros(N1.getNode()))
3391 // fold (sub_sat x, undef) -> 0
3392 if (N0.isUndef() || N1.isUndef())
3393 return DAG.getConstant(0, DL, VT);
3395 // fold (sub_sat x, x) -> 0
3397 return DAG.getConstant(0, DL, VT);
3399 // fold (sub_sat c1, c2) -> c3
3400 if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3403 // fold (sub_sat x, 0) -> x
3404 if (isNullConstant(N1))
3410 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3411 SDValue N0 = N->getOperand(0);
3412 SDValue N1 = N->getOperand(1);
3413 EVT VT = N0.getValueType();
3416 // If the flag result is dead, turn this into an SUB.
3417 if (!N->hasAnyUseOfValue(1))
3418 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3419 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3421 // fold (subc x, x) -> 0 + no borrow
3423 return CombineTo(N, DAG.getConstant(0, DL, VT),
3424 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3426 // fold (subc x, 0) -> x + no borrow
3427 if (isNullConstant(N1))
3428 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3430 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3431 if (isAllOnesConstant(N0))
3432 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3433 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3438 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3439 SDValue N0 = N->getOperand(0);
3440 SDValue N1 = N->getOperand(1);
3441 EVT VT = N0.getValueType();
3442 bool IsSigned = (ISD::SSUBO == N->getOpcode());
3444 EVT CarryVT = N->getValueType(1);
3447 // If the flag result is dead, turn this into an SUB.
3448 if (!N->hasAnyUseOfValue(1))
3449 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3450 DAG.getUNDEF(CarryVT));
3452 // fold (subo x, x) -> 0 + no borrow
3454 return CombineTo(N, DAG.getConstant(0, DL, VT),
3455 DAG.getConstant(0, DL, CarryVT));
3457 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3459 // fold (subox, c) -> (addo x, -c)
3460 if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3461 return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3462 DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3465 // fold (subo x, 0) -> x + no borrow
3466 if (isNullOrNullSplat(N1))
3467 return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3469 // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3470 if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3471 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3472 DAG.getConstant(0, DL, CarryVT));
3477 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3478 SDValue N0 = N->getOperand(0);
3479 SDValue N1 = N->getOperand(1);
3480 SDValue CarryIn = N->getOperand(2);
3482 // fold (sube x, y, false) -> (subc x, y)
3483 if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3484 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3489 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3490 SDValue N0 = N->getOperand(0);
3491 SDValue N1 = N->getOperand(1);
3492 SDValue CarryIn = N->getOperand(2);
3494 // fold (subcarry x, y, false) -> (usubo x, y)
3495 if (isNullConstant(CarryIn)) {
3496 if (!LegalOperations ||
3497 TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3498 return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3504 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3506 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3507 SDValue N0 = N->getOperand(0);
3508 SDValue N1 = N->getOperand(1);
3509 SDValue Scale = N->getOperand(2);
3510 EVT VT = N0.getValueType();
3512 // fold (mulfix x, undef, scale) -> 0
3513 if (N0.isUndef() || N1.isUndef())
3514 return DAG.getConstant(0, SDLoc(N), VT);
3516 // Canonicalize constant to RHS (vector doesn't have to splat)
3517 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3518 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3519 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3521 // fold (mulfix x, 0, scale) -> 0
3522 if (isNullConstant(N1))
3523 return DAG.getConstant(0, SDLoc(N), VT);
3528 SDValue DAGCombiner::visitMUL(SDNode *N) {
3529 SDValue N0 = N->getOperand(0);
3530 SDValue N1 = N->getOperand(1);
3531 EVT VT = N0.getValueType();
3533 // fold (mul x, undef) -> 0
3534 if (N0.isUndef() || N1.isUndef())
3535 return DAG.getConstant(0, SDLoc(N), VT);
3537 bool N1IsConst = false;
3538 bool N1IsOpaqueConst = false;
3542 if (VT.isVector()) {
3543 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3546 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3547 assert((!N1IsConst ||
3548 ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3549 "Splat APInt should be element width");
3551 N1IsConst = isa<ConstantSDNode>(N1);
3553 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3554 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3558 // fold (mul c1, c2) -> c1*c2
3559 if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3562 // canonicalize constant to RHS (vector doesn't have to splat)
3563 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3564 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3565 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3567 // fold (mul x, 0) -> 0
3568 if (N1IsConst && ConstValue1.isNullValue())
3571 // fold (mul x, 1) -> x
3572 if (N1IsConst && ConstValue1.isOneValue())
3575 if (SDValue NewSel = foldBinOpIntoSelect(N))
3578 // fold (mul x, -1) -> 0-x
3579 if (N1IsConst && ConstValue1.isAllOnesValue()) {
3581 return DAG.getNode(ISD::SUB, DL, VT,
3582 DAG.getConstant(0, DL, VT), N0);
3585 // fold (mul x, (1 << c)) -> x << c
3586 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3587 DAG.isKnownToBeAPowerOfTwo(N1) &&
3588 (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3590 SDValue LogBase2 = BuildLogBase2(N1, DL);
3591 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3592 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3593 return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3596 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3597 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3598 unsigned Log2Val = (-ConstValue1).logBase2();
3600 // FIXME: If the input is something that is easily negated (e.g. a
3601 // single-use add), we should put the negate there.
3602 return DAG.getNode(ISD::SUB, DL, VT,
3603 DAG.getConstant(0, DL, VT),
3604 DAG.getNode(ISD::SHL, DL, VT, N0,
3605 DAG.getConstant(Log2Val, DL,
3606 getShiftAmountTy(N0.getValueType()))));
3609 // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3610 // mul x, (2^N + 1) --> add (shl x, N), x
3611 // mul x, (2^N - 1) --> sub (shl x, N), x
3612 // Examples: x * 33 --> (x << 5) + x
3613 // x * 15 --> (x << 4) - x
3614 // x * -33 --> -((x << 5) + x)
3615 // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3616 if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3617 // TODO: We could handle more general decomposition of any constant by
3618 // having the target set a limit on number of ops and making a
3619 // callback to determine that sequence (similar to sqrt expansion).
3620 unsigned MathOp = ISD::DELETED_NODE;
3621 APInt MulC = ConstValue1.abs();
3622 if ((MulC - 1).isPowerOf2())
3624 else if ((MulC + 1).isPowerOf2())
3627 if (MathOp != ISD::DELETED_NODE) {
3629 MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3630 assert(ShAmt < VT.getScalarSizeInBits() &&
3631 "multiply-by-constant generated out of bounds shift");
3634 DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3635 SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3636 if (ConstValue1.isNegative())
3637 R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3642 // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3643 if (N0.getOpcode() == ISD::SHL &&
3644 isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3645 isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3646 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3647 if (isConstantOrConstantVector(C3))
3648 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3651 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3654 SDValue Sh(nullptr, 0), Y(nullptr, 0);
3656 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3657 if (N0.getOpcode() == ISD::SHL &&
3658 isConstantOrConstantVector(N0.getOperand(1)) &&
3659 N0.getNode()->hasOneUse()) {
3661 } else if (N1.getOpcode() == ISD::SHL &&
3662 isConstantOrConstantVector(N1.getOperand(1)) &&
3663 N1.getNode()->hasOneUse()) {
3668 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3669 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3673 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3674 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3675 N0.getOpcode() == ISD::ADD &&
3676 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3677 isMulAddWithConstProfitable(N, N0, N1))
3678 return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3679 DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3680 N0.getOperand(0), N1),
3681 DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3682 N0.getOperand(1), N1));
3684 // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3685 if (N0.getOpcode() == ISD::VSCALE)
3686 if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3687 APInt C0 = N0.getConstantOperandAPInt(0);
3688 APInt C1 = NC1->getAPIntValue();
3689 return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3693 if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3699 /// Return true if divmod libcall is available.
3700 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3701 const TargetLowering &TLI) {
3703 EVT NodeType = Node->getValueType(0);
3704 if (!NodeType.isSimple())
3706 switch (NodeType.getSimpleVT().SimpleTy) {
3707 default: return false; // No libcall for vector types.
3708 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3709 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3710 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3711 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3712 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3715 return TLI.getLibcallName(LC) != nullptr;
3718 /// Issue divrem if both quotient and remainder are needed.
3719 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3720 if (Node->use_empty())
3721 return SDValue(); // This is a dead node, leave it alone.
3723 unsigned Opcode = Node->getOpcode();
3724 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3725 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3727 // DivMod lib calls can still work on non-legal types if using lib-calls.
3728 EVT VT = Node->getValueType(0);
3729 if (VT.isVector() || !VT.isInteger())
3732 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3735 // If DIVREM is going to get expanded into a libcall,
3736 // but there is no libcall available, then don't combine.
3737 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3738 !isDivRemLibcallAvailable(Node, isSigned, TLI))
3741 // If div is legal, it's better to do the normal expansion
3742 unsigned OtherOpcode = 0;
3743 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3744 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3745 if (TLI.isOperationLegalOrCustom(Opcode, VT))
3748 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3749 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3753 SDValue Op0 = Node->getOperand(0);
3754 SDValue Op1 = Node->getOperand(1);
3756 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3757 UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3759 if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3762 // Convert the other matching node(s), too;
3763 // otherwise, the DIVREM may get target-legalized into something
3764 // target-specific that we won't be able to recognize.
3765 unsigned UserOpc = User->getOpcode();
3766 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3767 User->getOperand(0) == Op0 &&
3768 User->getOperand(1) == Op1) {
3770 if (UserOpc == OtherOpcode) {
3771 SDVTList VTs = DAG.getVTList(VT, VT);
3772 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3773 } else if (UserOpc == DivRemOpc) {
3774 combined = SDValue(User, 0);
3776 assert(UserOpc == Opcode);
3780 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3781 CombineTo(User, combined);
3782 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3783 CombineTo(User, combined.getValue(1));
3789 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3790 SDValue N0 = N->getOperand(0);
3791 SDValue N1 = N->getOperand(1);
3792 EVT VT = N->getValueType(0);
3795 unsigned Opc = N->getOpcode();
3796 bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3797 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3799 // X / undef -> undef
3800 // X % undef -> undef
3803 // NOTE: This includes vectors where any divisor element is zero/undef.
3804 if (DAG.isUndef(Opc, {N0, N1}))
3805 return DAG.getUNDEF(VT);
3810 return DAG.getConstant(0, DL, VT);
3814 ConstantSDNode *N0C = isConstOrConstSplat(N0);
3815 if (N0C && N0C->isNullValue())
3821 return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3825 // If this is a boolean op (single-bit element type), we can't have
3826 // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3827 // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3829 if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3830 return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3835 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3836 SDValue N0 = N->getOperand(0);
3837 SDValue N1 = N->getOperand(1);
3838 EVT VT = N->getValueType(0);
3839 EVT CCVT = getSetCCResultType(VT);
3843 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3848 // fold (sdiv c1, c2) -> c1/c2
3849 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3850 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
3853 // fold (sdiv X, -1) -> 0-X
3854 if (N1C && N1C->isAllOnesValue())
3855 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3857 // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3858 if (N1C && N1C->getAPIntValue().isMinSignedValue())
3859 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3860 DAG.getConstant(1, DL, VT),
3861 DAG.getConstant(0, DL, VT));
3863 if (SDValue V = simplifyDivRem(N, DAG))
3866 if (SDValue NewSel = foldBinOpIntoSelect(N))
3869 // If we know the sign bits of both operands are zero, strength reduce to a
3870 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3871 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3872 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3874 if (SDValue V = visitSDIVLike(N0, N1, N)) {
3875 // If the corresponding remainder node exists, update its users with
3876 // (Dividend - (Quotient * Divisor).
3877 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3879 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3880 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3881 AddToWorklist(Mul.getNode());
3882 AddToWorklist(Sub.getNode());
3883 CombineTo(RemNode, Sub);
3888 // sdiv, srem -> sdivrem
3889 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3890 // true. Otherwise, we break the simplification logic in visitREM().
3891 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3892 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3893 if (SDValue DivRem = useDivRem(N))
3899 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3901 EVT VT = N->getValueType(0);
3902 EVT CCVT = getSetCCResultType(VT);
3903 unsigned BitWidth = VT.getScalarSizeInBits();
3905 // Helper for determining whether a value is a power-2 constant scalar or a
3906 // vector of such elements.
3907 auto IsPowerOfTwo = [](ConstantSDNode *C) {
3908 if (C->isNullValue() || C->isOpaque())
3910 if (C->getAPIntValue().isPowerOf2())
3912 if ((-C->getAPIntValue()).isPowerOf2())
3917 // fold (sdiv X, pow2) -> simple ops after legalize
3918 // FIXME: We check for the exact bit here because the generic lowering gives
3919 // better results in that case. The target-specific lowering should learn how
3920 // to handle exact sdivs efficiently.
3921 if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3922 // Target-specific implementation of sdiv x, pow2.
3923 if (SDValue Res = BuildSDIVPow2(N))
3926 // Create constants that are functions of the shift amount value.
3927 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3928 SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3929 SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3930 C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3931 SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3932 if (!isConstantOrConstantVector(Inexact))
3935 // Splat the sign bit into the register
3936 SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3937 DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3938 AddToWorklist(Sign.getNode());
3940 // Add (N0 < 0) ? abs2 - 1 : 0;
3941 SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3942 AddToWorklist(Srl.getNode());
3943 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3944 AddToWorklist(Add.getNode());
3945 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3946 AddToWorklist(Sra.getNode());
3948 // Special case: (sdiv X, 1) -> X
3949 // Special Case: (sdiv X, -1) -> 0-X
3950 SDValue One = DAG.getConstant(1, DL, VT);
3951 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3952 SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3953 SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3954 SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3955 Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3957 // If dividing by a positive value, we're done. Otherwise, the result must
3959 SDValue Zero = DAG.getConstant(0, DL, VT);
3960 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3962 // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3963 SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3964 SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3968 // If integer divide is expensive and we satisfy the requirements, emit an
3969 // alternate sequence. Targets may check function attributes for size/speed
3971 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3972 if (isConstantOrConstantVector(N1) &&
3973 !TLI.isIntDivCheap(N->getValueType(0), Attr))
3974 if (SDValue Op = BuildSDIV(N))
3980 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3981 SDValue N0 = N->getOperand(0);
3982 SDValue N1 = N->getOperand(1);
3983 EVT VT = N->getValueType(0);
3984 EVT CCVT = getSetCCResultType(VT);
3988 if (SDValue FoldedVOp = SimplifyVBinOp(N))
3993 // fold (udiv c1, c2) -> c1/c2
3994 ConstantSDNode *N1C = isConstOrConstSplat(N1);
3995 if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
3998 // fold (udiv X, -1) -> select(X == -1, 1, 0)
3999 if (N1C && N1C->getAPIntValue().isAllOnesValue())
4000 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4001 DAG.getConstant(1, DL, VT),
4002 DAG.getConstant(0, DL, VT));
4004 if (SDValue V = simplifyDivRem(N, DAG))
4007 if (SDValue NewSel = foldBinOpIntoSelect(N))
4010 if (SDValue V = visitUDIVLike(N0, N1, N)) {
4011 // If the corresponding remainder node exists, update its users with
4012 // (Dividend - (Quotient * Divisor).
4013 if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4015 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4016 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4017 AddToWorklist(Mul.getNode());
4018 AddToWorklist(Sub.getNode());
4019 CombineTo(RemNode, Sub);
4024 // sdiv, srem -> sdivrem
4025 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4026 // true. Otherwise, we break the simplification logic in visitREM().
4027 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4028 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4029 if (SDValue DivRem = useDivRem(N))
4035 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4037 EVT VT = N->getValueType(0);
4039 // fold (udiv x, (1 << c)) -> x >>u c
4040 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4041 DAG.isKnownToBeAPowerOfTwo(N1)) {
4042 SDValue LogBase2 = BuildLogBase2(N1, DL);
4043 AddToWorklist(LogBase2.getNode());
4045 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4046 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4047 AddToWorklist(Trunc.getNode());
4048 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4051 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4052 if (N1.getOpcode() == ISD::SHL) {
4053 SDValue N10 = N1.getOperand(0);
4054 if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4055 DAG.isKnownToBeAPowerOfTwo(N10)) {
4056 SDValue LogBase2 = BuildLogBase2(N10, DL);
4057 AddToWorklist(LogBase2.getNode());
4059 EVT ADDVT = N1.getOperand(1).getValueType();
4060 SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4061 AddToWorklist(Trunc.getNode());
4062 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4063 AddToWorklist(Add.getNode());
4064 return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4068 // fold (udiv x, c) -> alternate
4069 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4070 if (isConstantOrConstantVector(N1) &&
4071 !TLI.isIntDivCheap(N->getValueType(0), Attr))
4072 if (SDValue Op = BuildUDIV(N))
4078 // handles ISD::SREM and ISD::UREM
4079 SDValue DAGCombiner::visitREM(SDNode *N) {
4080 unsigned Opcode = N->getOpcode();
4081 SDValue N0 = N->getOperand(0);
4082 SDValue N1 = N->getOperand(1);
4083 EVT VT = N->getValueType(0);
4084 EVT CCVT = getSetCCResultType(VT);
4086 bool isSigned = (Opcode == ISD::SREM);
4089 // fold (rem c1, c2) -> c1%c2
4090 ConstantSDNode *N1C = isConstOrConstSplat(N1);
4091 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4094 // fold (urem X, -1) -> select(X == -1, 0, x)
4095 if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4096 return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4097 DAG.getConstant(0, DL, VT), N0);
4099 if (SDValue V = simplifyDivRem(N, DAG))
4102 if (SDValue NewSel = foldBinOpIntoSelect(N))
4106 // If we know the sign bits of both operands are zero, strength reduce to a
4107 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4108 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4109 return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4111 SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4112 if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4113 // fold (urem x, pow2) -> (and x, pow2-1)
4114 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4115 AddToWorklist(Add.getNode());
4116 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4118 if (N1.getOpcode() == ISD::SHL &&
4119 DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4120 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4121 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4122 AddToWorklist(Add.getNode());
4123 return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4127 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4129 // If X/C can be simplified by the division-by-constant logic, lower
4130 // X%C to the equivalent of X-X/C*C.
4131 // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4132 // speculative DIV must not cause a DIVREM conversion. We guard against this
4133 // by skipping the simplification if isIntDivCheap(). When div is not cheap,
4134 // combine will not return a DIVREM. Regardless, checking cheapness here
4135 // makes sense since the simplification results in fatter code.
4136 if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4137 SDValue OptimizedDiv =
4138 isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4139 if (OptimizedDiv.getNode()) {
4140 // If the equivalent Div node also exists, update its users.
4141 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4142 if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4144 CombineTo(DivNode, OptimizedDiv);
4145 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4146 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4147 AddToWorklist(OptimizedDiv.getNode());
4148 AddToWorklist(Mul.getNode());
4153 // sdiv, srem -> sdivrem
4154 if (SDValue DivRem = useDivRem(N))
4155 return DivRem.getValue(1);
4160 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4161 SDValue N0 = N->getOperand(0);
4162 SDValue N1 = N->getOperand(1);
4163 EVT VT = N->getValueType(0);
4166 if (VT.isVector()) {
4167 // fold (mulhs x, 0) -> 0
4168 // do not return N0/N1, because undef node may exist.
4169 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4170 ISD::isBuildVectorAllZeros(N1.getNode()))
4171 return DAG.getConstant(0, DL, VT);
4174 // fold (mulhs x, 0) -> 0
4175 if (isNullConstant(N1))
4177 // fold (mulhs x, 1) -> (sra x, size(x)-1)
4178 if (isOneConstant(N1))
4179 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4180 DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4181 getShiftAmountTy(N0.getValueType())));
4183 // fold (mulhs x, undef) -> 0
4184 if (N0.isUndef() || N1.isUndef())
4185 return DAG.getConstant(0, DL, VT);
4187 // If the type twice as wide is legal, transform the mulhs to a wider multiply
4189 if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4190 MVT Simple = VT.getSimpleVT();
4191 unsigned SimpleSize = Simple.getSizeInBits();
4192 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4193 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4194 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4195 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4196 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4197 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4198 DAG.getConstant(SimpleSize, DL,
4199 getShiftAmountTy(N1.getValueType())));
4200 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4207 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4208 SDValue N0 = N->getOperand(0);
4209 SDValue N1 = N->getOperand(1);
4210 EVT VT = N->getValueType(0);
4213 if (VT.isVector()) {
4214 // fold (mulhu x, 0) -> 0
4215 // do not return N0/N1, because undef node may exist.
4216 if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4217 ISD::isBuildVectorAllZeros(N1.getNode()))
4218 return DAG.getConstant(0, DL, VT);
4221 // fold (mulhu x, 0) -> 0
4222 if (isNullConstant(N1))
4224 // fold (mulhu x, 1) -> 0
4225 if (isOneConstant(N1))
4226 return DAG.getConstant(0, DL, N0.getValueType());
4227 // fold (mulhu x, undef) -> 0
4228 if (N0.isUndef() || N1.isUndef())
4229 return DAG.getConstant(0, DL, VT);
4231 // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4232 if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4233 DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4234 unsigned NumEltBits = VT.getScalarSizeInBits();
4235 SDValue LogBase2 = BuildLogBase2(N1, DL);
4236 SDValue SRLAmt = DAG.getNode(
4237 ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4238 EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4239 SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4240 return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4243 // If the type twice as wide is legal, transform the mulhu to a wider multiply
4245 if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) {
4246 MVT Simple = VT.getSimpleVT();
4247 unsigned SimpleSize = Simple.getSizeInBits();
4248 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4249 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4250 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4251 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4252 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4253 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4254 DAG.getConstant(SimpleSize, DL,
4255 getShiftAmountTy(N1.getValueType())));
4256 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4263 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4264 /// give the opcodes for the two computations that are being performed. Return
4265 /// true if a simplification was made.
4266 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4268 // If the high half is not needed, just compute the low half.
4269 bool HiExists = N->hasAnyUseOfValue(1);
4270 if (!HiExists && (!LegalOperations ||
4271 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4272 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4273 return CombineTo(N, Res, Res);
4276 // If the low half is not needed, just compute the high half.
4277 bool LoExists = N->hasAnyUseOfValue(0);
4278 if (!LoExists && (!LegalOperations ||
4279 TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4280 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4281 return CombineTo(N, Res, Res);
4284 // If both halves are used, return as it is.
4285 if (LoExists && HiExists)
4288 // If the two computed results can be simplified separately, separate them.
4290 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4291 AddToWorklist(Lo.getNode());
4292 SDValue LoOpt = combine(Lo.getNode());
4293 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4294 (!LegalOperations ||
4295 TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4296 return CombineTo(N, LoOpt, LoOpt);
4300 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4301 AddToWorklist(Hi.getNode());
4302 SDValue HiOpt = combine(Hi.getNode());
4303 if (HiOpt.getNode() && HiOpt != Hi &&
4304 (!LegalOperations ||
4305 TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4306 return CombineTo(N, HiOpt, HiOpt);
4312 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4313 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4316 EVT VT = N->getValueType(0);
4319 // If the type is twice as wide is legal, transform the mulhu to a wider
4320 // multiply plus a shift.
4321 if (VT.isSimple() && !VT.isVector()) {
4322 MVT Simple = VT.getSimpleVT();
4323 unsigned SimpleSize = Simple.getSizeInBits();
4324 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4325 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4326 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4327 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4328 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4329 // Compute the high part as N1.
4330 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4331 DAG.getConstant(SimpleSize, DL,
4332 getShiftAmountTy(Lo.getValueType())));
4333 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4334 // Compute the low part as N0.
4335 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4336 return CombineTo(N, Lo, Hi);
4343 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4344 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4347 EVT VT = N->getValueType(0);
4350 // (umul_lohi N0, 0) -> (0, 0)
4351 if (isNullConstant(N->getOperand(1))) {
4352 SDValue Zero = DAG.getConstant(0, DL, VT);
4353 return CombineTo(N, Zero, Zero);
4356 // (umul_lohi N0, 1) -> (N0, 0)
4357 if (isOneConstant(N->getOperand(1))) {
4358 SDValue Zero = DAG.getConstant(0, DL, VT);
4359 return CombineTo(N, N->getOperand(0), Zero);
4362 // If the type is twice as wide is legal, transform the mulhu to a wider
4363 // multiply plus a shift.
4364 if (VT.isSimple() && !VT.isVector()) {
4365 MVT Simple = VT.getSimpleVT();
4366 unsigned SimpleSize = Simple.getSizeInBits();
4367 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4368 if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4369 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4370 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4371 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4372 // Compute the high part as N1.
4373 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4374 DAG.getConstant(SimpleSize, DL,
4375 getShiftAmountTy(Lo.getValueType())));
4376 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4377 // Compute the low part as N0.
4378 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4379 return CombineTo(N, Lo, Hi);
4386 SDValue DAGCombiner::visitMULO(SDNode *N) {
4387 SDValue N0 = N->getOperand(0);
4388 SDValue N1 = N->getOperand(1);
4389 EVT VT = N0.getValueType();
4390 bool IsSigned = (ISD::SMULO == N->getOpcode());
4392 EVT CarryVT = N->getValueType(1);
4395 // canonicalize constant to RHS.
4396 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4397 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4398 return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4400 // fold (mulo x, 0) -> 0 + no carry out
4401 if (isNullOrNullSplat(N1))
4402 return CombineTo(N, DAG.getConstant(0, DL, VT),
4403 DAG.getConstant(0, DL, CarryVT));
4405 // (mulo x, 2) -> (addo x, x)
4406 if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4407 if (C2->getAPIntValue() == 2)
4408 return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4409 N->getVTList(), N0, N0);
4414 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4415 SDValue N0 = N->getOperand(0);
4416 SDValue N1 = N->getOperand(1);
4417 EVT VT = N0.getValueType();
4418 unsigned Opcode = N->getOpcode();
4422 if (SDValue FoldedVOp = SimplifyVBinOp(N))
4425 // fold operation with constant operands.
4426 if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4429 // canonicalize constant to RHS
4430 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4431 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4432 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4434 // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4435 // Only do this if the current op isn't legal and the flipped is.
4436 if (!TLI.isOperationLegal(Opcode, VT) &&
4437 (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4438 (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4441 case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4442 case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4443 case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4444 case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4445 default: llvm_unreachable("Unknown MINMAX opcode");
4447 if (TLI.isOperationLegal(AltOpcode, VT))
4448 return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4454 /// If this is a bitwise logic instruction and both operands have the same
4455 /// opcode, try to sink the other opcode after the logic instruction.
4456 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4457 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4458 EVT VT = N0.getValueType();
4459 unsigned LogicOpcode = N->getOpcode();
4460 unsigned HandOpcode = N0.getOpcode();
4461 assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4462 LogicOpcode == ISD::XOR) && "Expected logic opcode");
4463 assert(HandOpcode == N1.getOpcode() && "Bad input!");
4465 // Bail early if none of these transforms apply.
4466 if (N0.getNumOperands() == 0)
4469 // FIXME: We should check number of uses of the operands to not increase
4470 // the instruction count for all transforms.
4472 // Handle size-changing casts.
4473 SDValue X = N0.getOperand(0);
4474 SDValue Y = N1.getOperand(0);
4475 EVT XVT = X.getValueType();
4477 if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4478 HandOpcode == ISD::SIGN_EXTEND) {
4479 // If both operands have other uses, this transform would create extra
4480 // instructions without eliminating anything.
4481 if (!N0.hasOneUse() && !N1.hasOneUse())
4483 // We need matching integer source types.
4484 if (XVT != Y.getValueType())
4486 // Don't create an illegal op during or after legalization. Don't ever
4487 // create an unsupported vector op.
4488 if ((VT.isVector() || LegalOperations) &&
4489 !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4491 // Avoid infinite looping with PromoteIntBinOp.
4492 // TODO: Should we apply desirable/legal constraints to all opcodes?
4493 if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4494 !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4496 // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4497 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4498 return DAG.getNode(HandOpcode, DL, VT, Logic);
4501 // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4502 if (HandOpcode == ISD::TRUNCATE) {
4503 // If both operands have other uses, this transform would create extra
4504 // instructions without eliminating anything.
4505 if (!N0.hasOneUse() && !N1.hasOneUse())
4507 // We need matching source types.
4508 if (XVT != Y.getValueType())
4510 // Don't create an illegal op during or after legalization.
4511 if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4513 // Be extra careful sinking truncate. If it's free, there's no benefit in
4514 // widening a binop. Also, don't create a logic op on an illegal type.
4515 if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4517 if (!TLI.isTypeLegal(XVT))
4519 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4520 return DAG.getNode(HandOpcode, DL, VT, Logic);
4523 // For binops SHL/SRL/SRA/AND:
4524 // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4525 if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4526 HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4527 N0.getOperand(1) == N1.getOperand(1)) {
4528 // If either operand has other uses, this transform is not an improvement.
4529 if (!N0.hasOneUse() || !N1.hasOneUse())
4531 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4532 return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4535 // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4536 if (HandOpcode == ISD::BSWAP) {
4537 // If either operand has other uses, this transform is not an improvement.
4538 if (!N0.hasOneUse() || !N1.hasOneUse())
4540 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4541 return DAG.getNode(HandOpcode, DL, VT, Logic);
4544 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4545 // Only perform this optimization up until type legalization, before
4546 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4547 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4548 // we don't want to undo this promotion.
4549 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4551 if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4552 Level <= AfterLegalizeTypes) {
4553 // Input types must be integer and the same.
4554 if (XVT.isInteger() && XVT == Y.getValueType() &&
4555 !(VT.isVector() && TLI.isTypeLegal(VT) &&
4556 !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4557 SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4558 return DAG.getNode(HandOpcode, DL, VT, Logic);
4562 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4563 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4564 // If both shuffles use the same mask, and both shuffle within a single
4565 // vector, then it is worthwhile to move the swizzle after the operation.
4566 // The type-legalizer generates this pattern when loading illegal
4567 // vector types from memory. In many cases this allows additional shuffle
4569 // There are other cases where moving the shuffle after the xor/and/or
4570 // is profitable even if shuffles don't perform a swizzle.
4571 // If both shuffles use the same mask, and both shuffles have the same first
4572 // or second operand, then it might still be profitable to move the shuffle
4573 // after the xor/and/or operation.
4574 if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4575 auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4576 auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4577 assert(X.getValueType() == Y.getValueType() &&
4578 "Inputs to shuffles are not the same type");
4580 // Check that both shuffles use the same mask. The masks are known to be of
4581 // the same length because the result vector type is the same.
4582 // Check also that shuffles have only one use to avoid introducing extra
4584 if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4585 !SVN0->getMask().equals(SVN1->getMask()))
4588 // Don't try to fold this node if it requires introducing a
4589 // build vector of all zeros that might be illegal at this stage.
4590 SDValue ShOp = N0.getOperand(1);
4591 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4592 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4594 // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4595 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4596 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4597 N0.getOperand(0), N1.getOperand(0));
4598 return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4601 // Don't try to fold this node if it requires introducing a
4602 // build vector of all zeros that might be illegal at this stage.
4603 ShOp = N0.getOperand(0);
4604 if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4605 ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4607 // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4608 if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4609 SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4611 return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4618 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4619 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4621 SDValue LL, LR, RL, RR, N0CC, N1CC;
4622 if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4623 !isSetCCEquivalent(N1, RL, RR, N1CC))
4626 assert(N0.getValueType() == N1.getValueType() &&
4627 "Unexpected operand types for bitwise logic op");
4628 assert(LL.getValueType() == LR.getValueType() &&
4629 RL.getValueType() == RR.getValueType() &&
4630 "Unexpected operand types for setcc");
4632 // If we're here post-legalization or the logic op type is not i1, the logic
4633 // op type must match a setcc result type. Also, all folds require new
4634 // operations on the left and right operands, so those types must match.
4635 EVT VT = N0.getValueType();
4636 EVT OpVT = LL.getValueType();
4637 if (LegalOperations || VT.getScalarType() != MVT::i1)
4638 if (VT != getSetCCResultType(OpVT))
4640 if (OpVT != RL.getValueType())
4643 ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4644 ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4645 bool IsInteger = OpVT.isInteger();
4646 if (LR == RR && CC0 == CC1 && IsInteger) {
4647 bool IsZero = isNullOrNullSplat(LR);
4648 bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4651 bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4652 // All sign bits clear?
4653 bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4655 bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4656 // Any sign bits set?
4657 bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4659 // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4660 // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4661 // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4662 // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4663 if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4664 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4665 AddToWorklist(Or.getNode());
4666 return DAG.getSetCC(DL, VT, Or, LR, CC1);
4670 bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4671 // All sign bits set?
4672 bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4674 bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4675 // Any sign bits clear?
4676 bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4678 // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4679 // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4680 // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4681 // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4682 if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4683 SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4684 AddToWorklist(And.getNode());
4685 return DAG.getSetCC(DL, VT, And, LR, CC1);
4689 // TODO: What is the 'or' equivalent of this fold?
4690 // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4691 if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4692 IsInteger && CC0 == ISD::SETNE &&
4693 ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4694 (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4695 SDValue One = DAG.getConstant(1, DL, OpVT);
4696 SDValue Two = DAG.getConstant(2, DL, OpVT);
4697 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4698 AddToWorklist(Add.getNode());
4699 return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4702 // Try more general transforms if the predicates match and the only user of
4703 // the compares is the 'and' or 'or'.
4704 if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4705 N0.hasOneUse() && N1.hasOneUse()) {
4706 // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4707 // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4708 if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4709 SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4710 SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4711 SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4712 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4713 return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4716 // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4717 // TODO - support non-uniform vector amounts.
4718 if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4719 // Match a shared variable operand and 2 non-opaque constant operands.
4720 ConstantSDNode *C0 = isConstOrConstSplat(LR);
4721 ConstantSDNode *C1 = isConstOrConstSplat(RR);
4722 if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4723 // Canonicalize larger constant as C0.
4724 if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4727 // The difference of the constants must be a single bit.
4728 const APInt &C0Val = C0->getAPIntValue();
4729 const APInt &C1Val = C1->getAPIntValue();
4730 if ((C0Val - C1Val).isPowerOf2()) {
4731 // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4732 // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4733 SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4734 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4735 SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4736 SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4737 SDValue Zero = DAG.getConstant(0, DL, OpVT);
4738 return DAG.getSetCC(DL, VT, And, Zero, CC0);
4744 // Canonicalize equivalent operands to LL == RL.
4745 if (LL == RR && LR == RL) {
4746 CC1 = ISD::getSetCCSwappedOperands(CC1);
4750 // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4751 // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4752 if (LL == RL && LR == RR) {
4753 ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4754 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4755 if (NewCC != ISD::SETCC_INVALID &&
4756 (!LegalOperations ||
4757 (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4758 TLI.isOperationLegal(ISD::SETCC, OpVT))))
4759 return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4765 /// This contains all DAGCombine rules which reduce two values combined by
4766 /// an And operation to a single value. This makes them reusable in the context
4767 /// of visitSELECT(). Rules involving constants are not included as
4768 /// visitSELECT() already handles those cases.
4769 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4770 EVT VT = N1.getValueType();
4773 // fold (and x, undef) -> 0
4774 if (N0.isUndef() || N1.isUndef())
4775 return DAG.getConstant(0, DL, VT);
4777 if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4780 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4781 VT.getSizeInBits() <= 64) {
4782 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4783 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4784 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4785 // immediate for an add, but it is legal if its top c2 bits are set,
4786 // transform the ADD so the immediate doesn't need to be materialized
4788 APInt ADDC = ADDI->getAPIntValue();
4789 APInt SRLC = SRLI->getAPIntValue();
4790 if (ADDC.getMinSignedBits() <= 64 &&
4791 SRLC.ult(VT.getSizeInBits()) &&
4792 !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4793 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4794 SRLC.getZExtValue());
4795 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4797 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4800 DAG.getNode(ISD::ADD, DL0, VT,
4801 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4802 CombineTo(N0.getNode(), NewAdd);
4803 // Return N so it doesn't get rechecked!
4804 return SDValue(N, 0);
4812 // Reduce bit extract of low half of an integer to the narrower type.
4813 // (and (srl i64:x, K), KMask) ->
4814 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4815 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4816 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4817 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4818 unsigned Size = VT.getSizeInBits();
4819 const APInt &AndMask = CAnd->getAPIntValue();
4820 unsigned ShiftBits = CShift->getZExtValue();
4822 // Bail out, this node will probably disappear anyway.
4826 unsigned MaskBits = AndMask.countTrailingOnes();
4827 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4829 if (AndMask.isMask() &&
4830 // Required bits must not span the two halves of the integer and
4831 // must fit in the half size type.
4832 (ShiftBits + MaskBits <= Size / 2) &&
4833 TLI.isNarrowingProfitable(VT, HalfVT) &&
4834 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4835 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4836 TLI.isTruncateFree(VT, HalfVT) &&
4837 TLI.isZExtFree(HalfVT, VT)) {
4838 // The isNarrowingProfitable is to avoid regressions on PPC and
4839 // AArch64 which match a few 64-bit bit insert / bit extract patterns
4840 // on downstream users of this. Those patterns could probably be
4841 // extended to handle extensions mixed in.
4844 assert(MaskBits <= Size);
4846 // Extracting the highest bit of the low half.
4847 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4848 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4851 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4852 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4853 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4854 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4855 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4864 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4865 EVT LoadResultTy, EVT &ExtVT) {
4866 if (!AndC->getAPIntValue().isMask())
4869 unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4871 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4872 EVT LoadedVT = LoadN->getMemoryVT();
4874 if (ExtVT == LoadedVT &&
4875 (!LegalOperations ||
4876 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4877 // ZEXTLOAD will match without needing to change the size of the value being
4882 // Do not change the width of a volatile or atomic loads.
4883 if (!LoadN->isSimple())
4886 // Do not generate loads of non-round integer types since these can
4887 // be expensive (and would be wrong if the type is not byte sized).
4888 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4891 if (LegalOperations &&
4892 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4895 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4901 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4902 ISD::LoadExtType ExtType, EVT &MemVT,
4906 // Only allow byte offsets.
4910 // Do not generate loads of non-round integer types since these can
4911 // be expensive (and would be wrong if the type is not byte sized).
4912 if (!MemVT.isRound())
4915 // Don't change the width of a volatile or atomic loads.
4916 if (!LDST->isSimple())
4919 // Verify that we are actually reducing a load width here.
4920 if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4923 // Ensure that this isn't going to produce an unsupported memory access.
4925 assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
4926 const unsigned ByteShAmt = ShAmt / 8;
4927 const Align LDSTAlign = LDST->getAlign();
4928 const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
4929 if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4930 LDST->getAddressSpace(), NarrowAlign,
4931 LDST->getMemOperand()->getFlags()))
4935 // It's not possible to generate a constant of extended or untyped type.
4936 EVT PtrType = LDST->getBasePtr().getValueType();
4937 if (PtrType == MVT::Untyped || PtrType.isExtended())
4940 if (isa<LoadSDNode>(LDST)) {
4941 LoadSDNode *Load = cast<LoadSDNode>(LDST);
4942 // Don't transform one with multiple uses, this would require adding a new
4944 if (!SDValue(Load, 0).hasOneUse())
4947 if (LegalOperations &&
4948 !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4951 // For the transform to be legal, the load must produce only two values
4952 // (the value loaded and the chain). Don't transform a pre-increment
4953 // load, for example, which produces an extra value. Otherwise the
4954 // transformation is not equivalent, and the downstream logic to replace
4955 // uses gets things wrong.
4956 if (Load->getNumValues() > 2)
4959 // If the load that we're shrinking is an extload and we're not just
4960 // discarding the extension we can't simply shrink the load. Bail.
4961 // TODO: It would be possible to merge the extensions in some cases.
4962 if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4963 Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4966 if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4969 assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4970 StoreSDNode *Store = cast<StoreSDNode>(LDST);
4971 // Can't write outside the original store
4972 if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4975 if (LegalOperations &&
4976 !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4982 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4983 SmallVectorImpl<LoadSDNode*> &Loads,
4984 SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4985 ConstantSDNode *Mask,
4986 SDNode *&NodeToMask) {
4987 // Recursively search for the operands, looking for loads which can be
4989 for (SDValue Op : N->op_values()) {
4990 if (Op.getValueType().isVector())
4993 // Some constants may need fixing up later if they are too large.
4994 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4995 if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4996 (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4997 NodesWithConsts.insert(N);
5001 if (!Op.hasOneUse())
5004 switch(Op.getOpcode()) {
5006 auto *Load = cast<LoadSDNode>(Op);
5008 if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5009 isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5011 // ZEXTLOAD is already small enough.
5012 if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5013 ExtVT.bitsGE(Load->getMemoryVT()))
5016 // Use LE to convert equal sized loads to zext.
5017 if (ExtVT.bitsLE(Load->getMemoryVT()))
5018 Loads.push_back(Load);
5024 case ISD::ZERO_EXTEND:
5025 case ISD::AssertZext: {
5026 unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5027 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5028 EVT VT = Op.getOpcode() == ISD::AssertZext ?
5029 cast<VTSDNode>(Op.getOperand(1))->getVT() :
5030 Op.getOperand(0).getValueType();
5032 // We can accept extending nodes if the mask is wider or an equal
5033 // width to the original type.
5034 if (ExtVT.bitsGE(VT))
5041 if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5047 // Allow one node which will masked along with any loads found.
5051 // Also ensure that the node to be masked only produces one data result.
5052 NodeToMask = Op.getNode();
5053 if (NodeToMask->getNumValues() > 1) {
5054 bool HasValue = false;
5055 for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5056 MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5057 if (VT != MVT::Glue && VT != MVT::Other) {
5059 NodeToMask = nullptr;
5065 assert(HasValue && "Node to be masked has no data result?");
5071 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5072 auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5076 if (!Mask->getAPIntValue().isMask())
5079 // No need to do anything if the and directly uses a load.
5080 if (isa<LoadSDNode>(N->getOperand(0)))
5083 SmallVector<LoadSDNode*, 8> Loads;
5084 SmallPtrSet<SDNode*, 2> NodesWithConsts;
5085 SDNode *FixupNode = nullptr;
5086 if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5087 if (Loads.size() == 0)
5090 LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5091 SDValue MaskOp = N->getOperand(1);
5093 // If it exists, fixup the single node we allow in the tree that needs
5096 LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5097 SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5098 FixupNode->getValueType(0),
5099 SDValue(FixupNode, 0), MaskOp);
5100 DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5101 if (And.getOpcode() == ISD ::AND)
5102 DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5105 // Narrow any constants that need it.
5106 for (auto *LogicN : NodesWithConsts) {
5107 SDValue Op0 = LogicN->getOperand(0);
5108 SDValue Op1 = LogicN->getOperand(1);
5110 if (isa<ConstantSDNode>(Op0))
5111 std::swap(Op0, Op1);
5113 SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5116 DAG.UpdateNodeOperands(LogicN, Op0, And);
5119 // Create narrow loads.
5120 for (auto *Load : Loads) {
5121 LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5122 SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5123 SDValue(Load, 0), MaskOp);
5124 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5125 if (And.getOpcode() == ISD ::AND)
5127 DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5128 SDValue NewLoad = ReduceLoadWidth(And.getNode());
5130 "Shouldn't be masking the load if it can't be narrowed");
5131 CombineTo(Load, NewLoad, NewLoad.getValue(1));
5133 DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5140 // x & (-1 'logical shift' y)
5142 // (x 'opposite logical shift' y) 'logical shift' y
5143 // if it is better for performance.
5144 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5145 assert(N->getOpcode() == ISD::AND);
5147 SDValue N0 = N->getOperand(0);
5148 SDValue N1 = N->getOperand(1);
5150 // Do we actually prefer shifts over mask?
5151 if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5154 // Try to match (-1 '[outer] logical shift' y)
5155 unsigned OuterShift;
5156 unsigned InnerShift; // The opposite direction to the OuterShift.
5157 SDValue Y; // Shift amount.
5158 auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5161 OuterShift = M->getOpcode();
5162 if (OuterShift == ISD::SHL)
5163 InnerShift = ISD::SRL;
5164 else if (OuterShift == ISD::SRL)
5165 InnerShift = ISD::SHL;
5168 if (!isAllOnesConstant(M->getOperand(0)))
5170 Y = M->getOperand(1);
5177 else if (matchMask(N0))
5183 EVT VT = N->getValueType(0);
5185 // tmp = x 'opposite logical shift' y
5186 SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5187 // ret = tmp 'logical shift' y
5188 SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5193 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5194 /// For a target with a bit test, this is expected to become test + set and save
5195 /// at least 1 instruction.
5196 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5197 assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5199 // This is probably not worthwhile without a supported type.
5200 EVT VT = And->getValueType(0);
5201 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5202 if (!TLI.isTypeLegal(VT))
5205 // Look through an optional extension and find a 'not'.
5206 // TODO: Should we favor test+set even without the 'not' op?
5207 SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5208 if (Not.getOpcode() == ISD::ANY_EXTEND)
5209 Not = Not.getOperand(0);
5210 if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5213 // Look though an optional truncation. The source operand may not be the same
5214 // type as the original 'and', but that is ok because we are masking off
5215 // everything but the low bit.
5216 SDValue Srl = Not.getOperand(0);
5217 if (Srl.getOpcode() == ISD::TRUNCATE)
5218 Srl = Srl.getOperand(0);
5220 // Match a shift-right by constant.
5221 if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5222 !isa<ConstantSDNode>(Srl.getOperand(1)))
5225 // We might have looked through casts that make this transform invalid.
5226 // TODO: If the source type is wider than the result type, do the mask and
5227 // compare in the source type.
5228 const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5229 unsigned VTBitWidth = VT.getSizeInBits();
5230 if (ShiftAmt.uge(VTBitWidth))
5233 // Turn this into a bit-test pattern using mask op + setcc:
5234 // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5236 SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5237 EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5238 SDValue Mask = DAG.getConstant(
5239 APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5240 SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5241 SDValue Zero = DAG.getConstant(0, DL, VT);
5242 SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5243 return DAG.getZExtOrTrunc(Setcc, DL, VT);
5246 SDValue DAGCombiner::visitAND(SDNode *N) {
5247 SDValue N0 = N->getOperand(0);
5248 SDValue N1 = N->getOperand(1);
5249 EVT VT = N1.getValueType();
5256 if (VT.isVector()) {
5257 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5260 // fold (and x, 0) -> 0, vector edition
5261 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5262 // do not return N0, because undef node may exist in N0
5263 return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5264 SDLoc(N), N0.getValueType());
5265 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5266 // do not return N1, because undef node may exist in N1
5267 return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5268 SDLoc(N), N1.getValueType());
5270 // fold (and x, -1) -> x, vector edition
5271 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5273 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5277 // fold (and c1, c2) -> c1&c2
5278 ConstantSDNode *N1C = isConstOrConstSplat(N1);
5279 if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5282 // canonicalize constant to RHS
5283 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5284 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5285 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5287 // fold (and x, -1) -> x
5288 if (isAllOnesConstant(N1))
5291 // if (and x, c) is known to be zero, return 0
5292 unsigned BitWidth = VT.getScalarSizeInBits();
5293 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5294 APInt::getAllOnesValue(BitWidth)))
5295 return DAG.getConstant(0, SDLoc(N), VT);
5297 if (SDValue NewSel = foldBinOpIntoSelect(N))
5301 if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5304 // Try to convert a constant mask AND into a shuffle clear mask.
5306 if (SDValue Shuffle = XformToShuffleWithZero(N))
5309 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5312 // fold (and (or x, C), D) -> D if (C & D) == D
5313 auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5314 return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5316 if (N0.getOpcode() == ISD::OR &&
5317 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5319 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5320 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5321 SDValue N0Op0 = N0.getOperand(0);
5322 APInt Mask = ~N1C->getAPIntValue();
5323 Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5324 if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5325 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5326 N0.getValueType(), N0Op0);
5328 // Replace uses of the AND with uses of the Zero extend node.
5331 // We actually want to replace all uses of the any_extend with the
5332 // zero_extend, to avoid duplicating things. This will later cause this
5333 // AND to be folded.
5334 CombineTo(N0.getNode(), Zext);
5335 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5339 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5340 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5341 // already be zero by virtue of the width of the base type of the load.
5343 // the 'X' node here can either be nothing or an extract_vector_elt to catch
5345 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5346 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5347 N0.getOperand(0).getOpcode() == ISD::LOAD &&
5348 N0.getOperand(0).getResNo() == 0) ||
5349 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5350 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5351 N0 : N0.getOperand(0) );
5353 // Get the constant (if applicable) the zero'th operand is being ANDed with.
5354 // This can be a pure constant or a vector splat, in which case we treat the
5355 // vector as a scalar and use the splat value.
5356 APInt Constant = APInt::getNullValue(1);
5357 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5358 Constant = C->getAPIntValue();
5359 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5360 APInt SplatValue, SplatUndef;
5361 unsigned SplatBitSize;
5363 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5364 SplatBitSize, HasAnyUndefs);
5366 // Undef bits can contribute to a possible optimisation if set, so
5368 SplatValue |= SplatUndef;
5370 // The splat value may be something like "0x00FFFFFF", which means 0 for
5371 // the first vector value and FF for the rest, repeating. We need a mask
5372 // that will apply equally to all members of the vector, so AND all the
5373 // lanes of the constant together.
5374 unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5376 // If the splat value has been compressed to a bitlength lower
5377 // than the size of the vector lane, we need to re-expand it to
5379 if (EltBitWidth > SplatBitSize)
5380 for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5381 SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5382 SplatValue |= SplatValue.shl(SplatBitSize);
5384 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5385 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5386 if ((SplatBitSize % EltBitWidth) == 0) {
5387 Constant = APInt::getAllOnesValue(EltBitWidth);
5388 for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5389 Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5394 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5395 // actually legal and isn't going to get expanded, else this is a false
5397 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5398 Load->getValueType(0),
5399 Load->getMemoryVT());
5401 // Resize the constant to the same size as the original memory access before
5402 // extension. If it is still the AllOnesValue then this AND is completely
5404 Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5407 switch (Load->getExtensionType()) {
5408 default: B = false; break;
5409 case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5411 case ISD::NON_EXTLOAD: B = true; break;
5414 if (B && Constant.isAllOnesValue()) {
5415 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5416 // preserve semantics once we get rid of the AND.
5417 SDValue NewLoad(Load, 0);
5419 // Fold the AND away. NewLoad may get replaced immediately.
5420 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5422 if (Load->getExtensionType() == ISD::EXTLOAD) {
5423 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5424 Load->getValueType(0), SDLoc(Load),
5425 Load->getChain(), Load->getBasePtr(),
5426 Load->getOffset(), Load->getMemoryVT(),
5427 Load->getMemOperand());
5428 // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5429 if (Load->getNumValues() == 3) {
5430 // PRE/POST_INC loads have 3 values.
5431 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5432 NewLoad.getValue(2) };
5433 CombineTo(Load, To, 3, true);
5435 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5439 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5443 // fold (and (load x), 255) -> (zextload x, i8)
5444 // fold (and (extload x, i16), 255) -> (zextload x, i8)
5445 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5446 if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5447 (N0.getOpcode() == ISD::ANY_EXTEND &&
5448 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5449 if (SDValue Res = ReduceLoadWidth(N)) {
5450 LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5451 ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5453 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5454 return SDValue(N, 0);
5459 // Attempt to propagate the AND back up to the leaves which, if they're
5460 // loads, can be combined to narrow loads and the AND node can be removed.
5461 // Perform after legalization so that extend nodes will already be
5462 // combined into the loads.
5463 if (BackwardsPropagateMask(N))
5464 return SDValue(N, 0);
5467 if (SDValue Combined = visitANDLike(N0, N1, N))
5470 // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
5471 if (N0.getOpcode() == N1.getOpcode())
5472 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5475 // Masking the negated extension of a boolean is just the zero-extended
5477 // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5478 // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5480 // Note: the SimplifyDemandedBits fold below can make an information-losing
5481 // transform, and then we have no way to find this better fold.
5482 if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5483 if (isNullOrNullSplat(N0.getOperand(0))) {
5484 SDValue SubRHS = N0.getOperand(1);
5485 if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5486 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5488 if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5489 SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5490 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5494 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5495 // fold (and (sra)) -> (and (srl)) when possible.
5496 if (SimplifyDemandedBits(SDValue(N, 0)))
5497 return SDValue(N, 0);
5499 // fold (zext_inreg (extload x)) -> (zextload x)
5500 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5501 if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5502 (ISD::isEXTLoad(N0.getNode()) ||
5503 (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5504 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5505 EVT MemVT = LN0->getMemoryVT();
5506 // If we zero all the possible extended bits, then we can turn this into
5507 // a zextload if we are running before legalize or the operation is legal.
5508 unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5509 unsigned MemBitSize = MemVT.getScalarSizeInBits();
5510 APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5511 if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5512 ((!LegalOperations && LN0->isSimple()) ||
5513 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5515 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5516 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5518 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5519 return SDValue(N, 0); // Return N so it doesn't get rechecked!
5523 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5524 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5525 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5526 N0.getOperand(1), false))
5530 if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5533 if (TLI.hasBitTest(N0, N1))
5534 if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5540 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5541 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5542 bool DemandHighBits) {
5543 if (!LegalOperations)
5546 EVT VT = N->getValueType(0);
5547 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5549 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5552 // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5553 bool LookPassAnd0 = false;
5554 bool LookPassAnd1 = false;
5555 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5557 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5559 if (N0.getOpcode() == ISD::AND) {
5560 if (!N0.getNode()->hasOneUse())
5562 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5563 // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5564 // This is needed for X86.
5565 if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5566 N01C->getZExtValue() != 0xFFFF))
5568 N0 = N0.getOperand(0);
5569 LookPassAnd0 = true;
5572 if (N1.getOpcode() == ISD::AND) {
5573 if (!N1.getNode()->hasOneUse())
5575 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5576 if (!N11C || N11C->getZExtValue() != 0xFF)
5578 N1 = N1.getOperand(0);
5579 LookPassAnd1 = true;
5582 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5584 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5586 if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5589 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5590 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5593 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5596 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5597 SDValue N00 = N0->getOperand(0);
5598 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5599 if (!N00.getNode()->hasOneUse())
5601 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5602 if (!N001C || N001C->getZExtValue() != 0xFF)
5604 N00 = N00.getOperand(0);
5605 LookPassAnd0 = true;
5608 SDValue N10 = N1->getOperand(0);
5609 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5610 if (!N10.getNode()->hasOneUse())
5612 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5613 // Also allow 0xFFFF since the bits will be shifted out. This is needed
5615 if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5616 N101C->getZExtValue() != 0xFFFF))
5618 N10 = N10.getOperand(0);
5619 LookPassAnd1 = true;
5625 // Make sure everything beyond the low halfword gets set to zero since the SRL
5626 // 16 will clear the top bits.
5627 unsigned OpSizeInBits = VT.getSizeInBits();
5628 if (DemandHighBits && OpSizeInBits > 16) {
5629 // If the left-shift isn't masked out then the only way this is a bswap is
5630 // if all bits beyond the low 8 are 0. In that case the entire pattern
5631 // reduces to a left shift anyway: leave it for other parts of the combiner.
5635 // However, if the right shift isn't masked out then it might be because
5636 // it's not needed. See if we can spot that too.
5637 if (!LookPassAnd1 &&
5638 !DAG.MaskedValueIsZero(
5639 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5643 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5644 if (OpSizeInBits > 16) {
5646 Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5647 DAG.getConstant(OpSizeInBits - 16, DL,
5648 getShiftAmountTy(VT)));
5653 /// Return true if the specified node is an element that makes up a 32-bit
5654 /// packed halfword byteswap.
5655 /// ((x & 0x000000ff) << 8) |
5656 /// ((x & 0x0000ff00) >> 8) |
5657 /// ((x & 0x00ff0000) << 8) |
5658 /// ((x & 0xff000000) >> 8)
5659 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5660 if (!N.getNode()->hasOneUse())
5663 unsigned Opc = N.getOpcode();
5664 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5667 SDValue N0 = N.getOperand(0);
5668 unsigned Opc0 = N0.getOpcode();
5669 if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5672 ConstantSDNode *N1C = nullptr;
5673 // SHL or SRL: look upstream for AND mask operand
5674 if (Opc == ISD::AND)
5675 N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5676 else if (Opc0 == ISD::AND)
5677 N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5681 unsigned MaskByteOffset;
5682 switch (N1C->getZExtValue()) {
5685 case 0xFF: MaskByteOffset = 0; break;
5686 case 0xFF00: MaskByteOffset = 1; break;
5688 // In case demanded bits didn't clear the bits that will be shifted out.
5689 // This is needed for X86.
5690 if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5695 case 0xFF0000: MaskByteOffset = 2; break;
5696 case 0xFF000000: MaskByteOffset = 3; break;
5699 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5700 if (Opc == ISD::AND) {
5701 if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5703 // (x >> 8) & 0xff0000
5704 if (Opc0 != ISD::SRL)
5706 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5707 if (!C || C->getZExtValue() != 8)
5710 // (x << 8) & 0xff00
5711 // (x << 8) & 0xff000000
5712 if (Opc0 != ISD::SHL)
5714 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5715 if (!C || C->getZExtValue() != 8)
5718 } else if (Opc == ISD::SHL) {
5720 // (x & 0xff0000) << 8
5721 if (MaskByteOffset != 0 && MaskByteOffset != 2)
5723 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5724 if (!C || C->getZExtValue() != 8)
5726 } else { // Opc == ISD::SRL
5727 // (x & 0xff00) >> 8
5728 // (x & 0xff000000) >> 8
5729 if (MaskByteOffset != 1 && MaskByteOffset != 3)
5731 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5732 if (!C || C->getZExtValue() != 8)
5736 if (Parts[MaskByteOffset])
5739 Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5743 // Match 2 elements of a packed halfword bswap.
5744 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5745 if (N.getOpcode() == ISD::OR)
5746 return isBSwapHWordElement(N.getOperand(0), Parts) &&
5747 isBSwapHWordElement(N.getOperand(1), Parts);
5749 if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5750 ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5751 if (!C || C->getAPIntValue() != 16)
5753 Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5760 // Match this pattern:
5761 // (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
5762 // And rewrite this to:
5763 // (rotr (bswap A), 16)
5764 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
5765 SelectionDAG &DAG, SDNode *N, SDValue N0,
5766 SDValue N1, EVT VT, EVT ShiftAmountTy) {
5767 assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
5768 "MatchBSwapHWordOrAndAnd: expecting i32");
5769 if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5771 if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
5773 // TODO: this is too restrictive; lifting this restriction requires more tests
5774 if (!N0->hasOneUse() || !N1->hasOneUse())
5776 ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
5777 ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
5778 if (!Mask0 || !Mask1)
5780 if (Mask0->getAPIntValue() != 0xff00ff00 ||
5781 Mask1->getAPIntValue() != 0x00ff00ff)
5783 SDValue Shift0 = N0.getOperand(0);
5784 SDValue Shift1 = N1.getOperand(0);
5785 if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
5787 ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
5788 ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
5789 if (!ShiftAmt0 || !ShiftAmt1)
5791 if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
5793 if (Shift0.getOperand(0) != Shift1.getOperand(0))
5797 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
5798 SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
5799 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5802 /// Match a 32-bit packed halfword bswap. That is
5803 /// ((x & 0x000000ff) << 8) |
5804 /// ((x & 0x0000ff00) >> 8) |
5805 /// ((x & 0x00ff0000) << 8) |
5806 /// ((x & 0xff000000) >> 8)
5807 /// => (rotl (bswap x), 16)
5808 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5809 if (!LegalOperations)
5812 EVT VT = N->getValueType(0);
5815 if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5818 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
5819 getShiftAmountTy(VT)))
5822 // Try again with commuted operands.
5823 if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
5824 getShiftAmountTy(VT)))
5829 // (or (bswaphpair), (bswaphpair))
5830 // (or (or (bswaphpair), (and)), (and))
5831 // (or (or (and), (bswaphpair)), (and))
5832 SDNode *Parts[4] = {};
5834 if (isBSwapHWordPair(N0, Parts)) {
5835 // (or (or (and), (and)), (or (and), (and)))
5836 if (!isBSwapHWordPair(N1, Parts))
5838 } else if (N0.getOpcode() == ISD::OR) {
5839 // (or (or (or (and), (and)), (and)), (and))
5840 if (!isBSwapHWordElement(N1, Parts))
5842 SDValue N00 = N0.getOperand(0);
5843 SDValue N01 = N0.getOperand(1);
5844 if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5845 !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5850 // Make sure the parts are all coming from the same node.
5851 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5855 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5856 SDValue(Parts[0], 0));
5858 // Result of the bswap should be rotated by 16. If it's not legal, then
5859 // do (x << 16) | (x >> 16).
5860 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5861 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5862 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5863 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5864 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5865 return DAG.getNode(ISD::OR, DL, VT,
5866 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5867 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5870 /// This contains all DAGCombine rules which reduce two values combined by
5871 /// an Or operation to a single value \see visitANDLike().
5872 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5873 EVT VT = N1.getValueType();
5876 // fold (or x, undef) -> -1
5877 if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5878 return DAG.getAllOnesConstant(DL, VT);
5880 if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5883 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
5884 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5885 // Don't increase # computations.
5886 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5887 // We can only do this xform if we know that bits from X that are set in C2
5888 // but not in C1 are already zero. Likewise for Y.
5889 if (const ConstantSDNode *N0O1C =
5890 getAsNonOpaqueConstant(N0.getOperand(1))) {
5891 if (const ConstantSDNode *N1O1C =
5892 getAsNonOpaqueConstant(N1.getOperand(1))) {
5893 // We can only do this xform if we know that bits from X that are set in
5894 // C2 but not in C1 are already zero. Likewise for Y.
5895 const APInt &LHSMask = N0O1C->getAPIntValue();
5896 const APInt &RHSMask = N1O1C->getAPIntValue();
5898 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5899 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5900 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5901 N0.getOperand(0), N1.getOperand(0));
5902 return DAG.getNode(ISD::AND, DL, VT, X,
5903 DAG.getConstant(LHSMask | RHSMask, DL, VT));
5909 // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5910 if (N0.getOpcode() == ISD::AND &&
5911 N1.getOpcode() == ISD::AND &&
5912 N0.getOperand(0) == N1.getOperand(0) &&
5913 // Don't increase # computations.
5914 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5915 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5916 N0.getOperand(1), N1.getOperand(1));
5917 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5923 /// OR combines for which the commuted variant will be tried as well.
5924 static SDValue visitORCommutative(
5925 SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5926 EVT VT = N0.getValueType();
5927 if (N0.getOpcode() == ISD::AND) {
5928 // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5929 if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5930 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5932 // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5933 if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5934 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5940 SDValue DAGCombiner::visitOR(SDNode *N) {
5941 SDValue N0 = N->getOperand(0);
5942 SDValue N1 = N->getOperand(1);
5943 EVT VT = N1.getValueType();
5950 if (VT.isVector()) {
5951 if (SDValue FoldedVOp = SimplifyVBinOp(N))
5954 // fold (or x, 0) -> x, vector edition
5955 if (ISD::isBuildVectorAllZeros(N0.getNode()))
5957 if (ISD::isBuildVectorAllZeros(N1.getNode()))
5960 // fold (or x, -1) -> -1, vector edition
5961 if (ISD::isBuildVectorAllOnes(N0.getNode()))
5962 // do not return N0, because undef node may exist in N0
5963 return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5964 if (ISD::isBuildVectorAllOnes(N1.getNode()))
5965 // do not return N1, because undef node may exist in N1
5966 return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5968 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5969 // Do this only if the resulting shuffle is legal.
5970 if (isa<ShuffleVectorSDNode>(N0) &&
5971 isa<ShuffleVectorSDNode>(N1) &&
5972 // Avoid folding a node with illegal type.
5973 TLI.isTypeLegal(VT)) {
5974 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5975 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5976 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5977 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5978 // Ensure both shuffles have a zero input.
5979 if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5980 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5981 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5982 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5983 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5984 bool CanFold = true;
5985 int NumElts = VT.getVectorNumElements();
5986 SmallVector<int, 4> Mask(NumElts);
5988 for (int i = 0; i != NumElts; ++i) {
5989 int M0 = SV0->getMaskElt(i);
5990 int M1 = SV1->getMaskElt(i);
5992 // Determine if either index is pointing to a zero vector.
5993 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5994 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5996 // If one element is zero and the otherside is undef, keep undef.
5997 // This also handles the case that both are undef.
5998 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6003 // Make sure only one of the elements is zero.
6004 if (M0Zero == M1Zero) {
6009 assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6011 // We have a zero and non-zero element. If the non-zero came from
6012 // SV0 make the index a LHS index. If it came from SV1, make it
6013 // a RHS index. We need to mod by NumElts because we don't care
6014 // which operand it came from in the original shuffles.
6015 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6019 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6020 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6022 SDValue LegalShuffle =
6023 TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6026 return LegalShuffle;
6032 // fold (or c1, c2) -> c1|c2
6033 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6034 if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6037 // canonicalize constant to RHS
6038 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6039 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6040 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6042 // fold (or x, 0) -> x
6043 if (isNullConstant(N1))
6046 // fold (or x, -1) -> -1
6047 if (isAllOnesConstant(N1))
6050 if (SDValue NewSel = foldBinOpIntoSelect(N))
6053 // fold (or x, c) -> c iff (x & ~c) == 0
6054 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6057 if (SDValue Combined = visitORLike(N0, N1, N))
6060 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6063 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6064 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6066 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6070 if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6073 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6074 // iff (c1 & c2) != 0 or c1/c2 are undef.
6075 auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6076 return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6078 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6079 ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6080 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6081 {N1, N0.getOperand(1)})) {
6082 SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6083 AddToWorklist(IOR.getNode());
6084 return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6088 if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6090 if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6093 // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
6094 if (N0.getOpcode() == N1.getOpcode())
6095 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6098 // See if this is some rotate idiom.
6099 if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6102 if (SDValue Load = MatchLoadCombine(N))
6105 // Simplify the operands using demanded-bits information.
6106 if (SimplifyDemandedBits(SDValue(N, 0)))
6107 return SDValue(N, 0);
6109 // If OR can be rewritten into ADD, try combines based on ADD.
6110 if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6111 DAG.haveNoCommonBitsSet(N0, N1))
6112 if (SDValue Combined = visitADDLike(N))
6118 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6119 if (Op.getOpcode() == ISD::AND &&
6120 DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6121 Mask = Op.getOperand(1);
6122 return Op.getOperand(0);
6127 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6128 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6130 Op = stripConstantMask(DAG, Op, Mask);
6131 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6138 /// Helper function for visitOR to extract the needed side of a rotate idiom
6139 /// from a shl/srl/mul/udiv. This is meant to handle cases where
6140 /// InstCombine merged some outside op with one of the shifts from
6141 /// the rotate pattern.
6142 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6143 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6146 /// (or (add v v) (shrl v bitwidth-1)):
6147 /// expands (add v v) -> (shl v 1)
6149 /// (or (mul v c0) (shrl (mul v c1) c2)):
6150 /// expands (mul v c0) -> (shl (mul v c1) c3)
6152 /// (or (udiv v c0) (shl (udiv v c1) c2)):
6153 /// expands (udiv v c0) -> (shrl (udiv v c1) c3)
6155 /// (or (shl v c0) (shrl (shl v c1) c2)):
6156 /// expands (shl v c0) -> (shl (shl v c1) c3)
6158 /// (or (shrl v c0) (shl (shrl v c1) c2)):
6159 /// expands (shrl v c0) -> (shrl (shrl v c1) c3)
6161 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6162 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6163 SDValue ExtractFrom, SDValue &Mask,
6165 assert(OppShift && ExtractFrom && "Empty SDValue");
6167 (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6168 "Existing shift must be valid as a rotate half");
6170 ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6172 // Value and Type of the shift.
6173 SDValue OppShiftLHS = OppShift.getOperand(0);
6174 EVT ShiftedVT = OppShiftLHS.getValueType();
6176 // Amount of the existing shift.
6177 ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6179 // (add v v) -> (shl v 1)
6180 // TODO: Should this be a general DAG canonicalization?
6181 if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6182 ExtractFrom.getOpcode() == ISD::ADD &&
6183 ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6184 ExtractFrom.getOperand(0) == OppShiftLHS &&
6185 OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6186 return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6187 DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6190 // (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6192 // Find opcode of the needed shift to be extracted from (op0 v c0).
6193 unsigned Opcode = ISD::DELETED_NODE;
6194 bool IsMulOrDiv = false;
6195 // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6196 // opcode or its arithmetic (mul or udiv) variant.
6197 auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6198 IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6199 if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6201 Opcode = NeededShift;
6204 // op0 must be either the needed shift opcode or the mul/udiv equivalent
6205 // that the needed shift can be extracted from.
6206 if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6207 (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6210 // op0 must be the same opcode on both sides, have the same LHS argument,
6211 // and produce the same value type.
6212 if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6213 OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6214 ShiftedVT != ExtractFrom.getValueType())
6217 // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6218 ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6219 // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6220 ConstantSDNode *ExtractFromCst =
6221 isConstOrConstSplat(ExtractFrom.getOperand(1));
6222 // TODO: We should be able to handle non-uniform constant vectors for these values
6223 // Check that we have constant values.
6224 if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6225 !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6226 !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6229 // Compute the shift amount we need to extract to complete the rotate.
6230 const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6231 if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6233 APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6234 // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6235 APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6236 APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6237 zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6239 // Now try extract the needed shift from the ExtractFrom op and see if the
6240 // result matches up with the existing shift's LHS op.
6242 // Op to extract from is a mul or udiv by a constant.
6244 // c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6245 // c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6246 const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6247 NeededShiftAmt.getZExtValue());
6250 APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6251 if (Rem != 0 || ResultAmt != OppLHSAmt)
6254 // Op to extract from is a shift by a constant.
6256 // c2 - (bitwidth(op0 v c0) - c1) == c0
6257 if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6258 ExtractFromAmt.getBitWidth()))
6262 // Return the expanded shift op that should allow a rotate to be formed.
6263 EVT ShiftVT = OppShift.getOperand(1).getValueType();
6264 EVT ResVT = ExtractFrom.getValueType();
6265 SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6266 return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6269 // Return true if we can prove that, whenever Neg and Pos are both in the
6270 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
6271 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6273 // (or (shift1 X, Neg), (shift2 X, Pos))
6275 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6276 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
6277 // to consider shift amounts with defined behavior.
6278 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6279 SelectionDAG &DAG) {
6280 // If EltSize is a power of 2 then:
6282 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6283 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6285 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6286 // for the stronger condition:
6288 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
6290 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6291 // we can just replace Neg with Neg' for the rest of the function.
6293 // In other cases we check for the even stronger condition:
6295 // Neg == EltSize - Pos [B]
6297 // for all Neg and Pos. Note that the (or ...) then invokes undefined
6298 // behavior if Pos == 0 (and consequently Neg == EltSize).
6300 // We could actually use [A] whenever EltSize is a power of 2, but the
6301 // only extra cases that it would match are those uninteresting ones
6302 // where Neg and Pos are never in range at the same time. E.g. for
6303 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6304 // as well as (sub 32, Pos), but:
6306 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6308 // always invokes undefined behavior for 32-bit X.
6310 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6311 unsigned MaskLoBits = 0;
6312 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6313 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6314 KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6315 unsigned Bits = Log2_64(EltSize);
6316 if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6317 ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6318 Neg = Neg.getOperand(0);
6324 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6325 if (Neg.getOpcode() != ISD::SUB)
6327 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6330 SDValue NegOp1 = Neg.getOperand(1);
6332 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6333 // Pos'. The truncation is redundant for the purpose of the equality.
6334 if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6335 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6336 KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6337 if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6338 ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6340 Pos = Pos.getOperand(0);
6344 // The condition we need is now:
6346 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6348 // If NegOp1 == Pos then we need:
6350 // EltSize & Mask == NegC & Mask
6352 // (because "x & Mask" is a truncation and distributes through subtraction).
6354 // We also need to account for a potential truncation of NegOp1 if the amount
6355 // has already been legalized to a shift amount type.
6357 if ((Pos == NegOp1) ||
6358 (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6359 Width = NegC->getAPIntValue();
6361 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6362 // Then the condition we want to prove becomes:
6364 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6366 // which, again because "x & Mask" is a truncation, becomes:
6368 // NegC & Mask == (EltSize - PosC) & Mask
6369 // EltSize & Mask == (NegC + PosC) & Mask
6370 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6371 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6372 Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6378 // Now we just need to check that EltSize & Mask == Width & Mask.
6380 // EltSize & Mask is 0 since Mask is EltSize - 1.
6381 return Width.getLoBits(MaskLoBits) == 0;
6382 return Width == EltSize;
6385 // A subroutine of MatchRotate used once we have found an OR of two opposite
6386 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
6387 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6388 // former being preferred if supported. InnerPos and InnerNeg are Pos and
6389 // Neg with outer conversions stripped away.
6390 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6391 SDValue Neg, SDValue InnerPos,
6392 SDValue InnerNeg, unsigned PosOpcode,
6393 unsigned NegOpcode, const SDLoc &DL) {
6394 // fold (or (shl x, (*ext y)),
6395 // (srl x, (*ext (sub 32, y)))) ->
6396 // (rotl x, y) or (rotr x, (sub 32, y))
6398 // fold (or (shl x, (*ext (sub 32, y))),
6399 // (srl x, (*ext y))) ->
6400 // (rotr x, y) or (rotl x, (sub 32, y))
6401 EVT VT = Shifted.getValueType();
6402 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6403 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6404 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6405 HasPos ? Pos : Neg);
6411 // A subroutine of MatchRotate used once we have found an OR of two opposite
6412 // shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces
6413 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6414 // former being preferred if supported. InnerPos and InnerNeg are Pos and
6415 // Neg with outer conversions stripped away.
6416 // TODO: Merge with MatchRotatePosNeg.
6417 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6418 SDValue Neg, SDValue InnerPos,
6419 SDValue InnerNeg, unsigned PosOpcode,
6420 unsigned NegOpcode, const SDLoc &DL) {
6421 EVT VT = N0.getValueType();
6422 unsigned EltBits = VT.getScalarSizeInBits();
6424 // fold (or (shl x0, (*ext y)),
6425 // (srl x1, (*ext (sub 32, y)))) ->
6426 // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6428 // fold (or (shl x0, (*ext (sub 32, y))),
6429 // (srl x1, (*ext y))) ->
6430 // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6431 if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
6432 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6433 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6434 HasPos ? Pos : Neg);
6437 // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6438 // so for now just use the PosOpcode case if its legal.
6439 // TODO: When can we use the NegOpcode case?
6440 if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6441 auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6442 if (Op.getOpcode() != BinOpc)
6444 ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6445 return Cst && (Cst->getAPIntValue() == Imm);
6448 // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6449 // -> (fshl x0, x1, y)
6450 if (IsBinOpImm(N1, ISD::SRL, 1) &&
6451 IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6452 InnerPos == InnerNeg.getOperand(0) &&
6453 TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6454 return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6457 // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6458 // -> (fshr x0, x1, y)
6459 if (IsBinOpImm(N0, ISD::SHL, 1) &&
6460 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6461 InnerNeg == InnerPos.getOperand(0) &&
6462 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6463 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6466 // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6467 // -> (fshr x0, x1, y)
6468 // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6469 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6470 IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6471 InnerNeg == InnerPos.getOperand(0) &&
6472 TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6473 return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6480 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
6481 // idioms for rotate, and if the target supports rotation instructions, generate
6482 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6483 // with different shifted sources.
6484 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6485 // Must be a legal type. Expanded 'n promoted things won't work with rotates.
6486 EVT VT = LHS.getValueType();
6487 if (!TLI.isTypeLegal(VT))
6490 // The target must have at least one rotate/funnel flavor.
6491 bool HasROTL = hasOperation(ISD::ROTL, VT);
6492 bool HasROTR = hasOperation(ISD::ROTR, VT);
6493 bool HasFSHL = hasOperation(ISD::FSHL, VT);
6494 bool HasFSHR = hasOperation(ISD::FSHR, VT);
6495 if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6498 // Check for truncated rotate.
6499 if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6500 LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6501 assert(LHS.getValueType() == RHS.getValueType());
6502 if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6503 return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6507 // Match "(X shl/srl V1) & V2" where V2 may not be present.
6508 SDValue LHSShift; // The shift.
6509 SDValue LHSMask; // AND value if any.
6510 matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6512 SDValue RHSShift; // The shift.
6513 SDValue RHSMask; // AND value if any.
6514 matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6516 // If neither side matched a rotate half, bail
6517 if (!LHSShift && !RHSShift)
6520 // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6521 // side of the rotate, so try to handle that here. In all cases we need to
6522 // pass the matched shift from the opposite side to compute the opcode and
6523 // needed shift amount to extract. We still want to do this if both sides
6524 // matched a rotate half because one half may be a potential overshift that
6525 // can be broken down (ie if InstCombine merged two shl or srl ops into a
6528 // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6530 if (SDValue NewRHSShift =
6531 extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6532 RHSShift = NewRHSShift;
6533 // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6535 if (SDValue NewLHSShift =
6536 extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6537 LHSShift = NewLHSShift;
6539 // If a side is still missing, nothing else we can do.
6540 if (!RHSShift || !LHSShift)
6543 // At this point we've matched or extracted a shift op on each side.
6545 if (LHSShift.getOpcode() == RHSShift.getOpcode())
6546 return SDValue(); // Shifts must disagree.
6548 bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6549 if (!IsRotate && !(HasFSHL || HasFSHR))
6550 return SDValue(); // Requires funnel shift support.
6552 // Canonicalize shl to left side in a shl/srl pair.
6553 if (RHSShift.getOpcode() == ISD::SHL) {
6554 std::swap(LHS, RHS);
6555 std::swap(LHSShift, RHSShift);
6556 std::swap(LHSMask, RHSMask);
6559 unsigned EltSizeInBits = VT.getScalarSizeInBits();
6560 SDValue LHSShiftArg = LHSShift.getOperand(0);
6561 SDValue LHSShiftAmt = LHSShift.getOperand(1);
6562 SDValue RHSShiftArg = RHSShift.getOperand(0);
6563 SDValue RHSShiftAmt = RHSShift.getOperand(1);
6565 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6566 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6567 // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6568 // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6569 // iff C1+C2 == EltSizeInBits
6570 auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6571 ConstantSDNode *RHS) {
6572 return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6574 if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6576 if (IsRotate && (HasROTL || HasROTR))
6577 Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6578 HasROTL ? LHSShiftAmt : RHSShiftAmt);
6580 Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6581 RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6583 // If there is an AND of either shifted operand, apply it to the result.
6584 if (LHSMask.getNode() || RHSMask.getNode()) {
6585 SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6586 SDValue Mask = AllOnes;
6588 if (LHSMask.getNode()) {
6589 SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6590 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6591 DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6593 if (RHSMask.getNode()) {
6594 SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6595 Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6596 DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6599 Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6605 // If there is a mask here, and we have a variable shift, we can't be sure
6606 // that we're masking out the right stuff.
6607 if (LHSMask.getNode() || RHSMask.getNode())
6610 // If the shift amount is sign/zext/any-extended just peel it off.
6611 SDValue LExtOp0 = LHSShiftAmt;
6612 SDValue RExtOp0 = RHSShiftAmt;
6613 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6614 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6615 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6616 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6617 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6618 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6619 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6620 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6621 LExtOp0 = LHSShiftAmt.getOperand(0);
6622 RExtOp0 = RHSShiftAmt.getOperand(0);
6625 if (IsRotate && (HasROTL || HasROTR)) {
6627 MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6628 RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6633 MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6634 LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6640 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6641 LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
6646 MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6647 RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
6656 /// Represents known origin of an individual byte in load combine pattern. The
6657 /// value of the byte is either constant zero or comes from memory.
6658 struct ByteProvider {
6659 // For constant zero providers Load is set to nullptr. For memory providers
6660 // Load represents the node which loads the byte from memory.
6661 // ByteOffset is the offset of the byte in the value produced by the load.
6662 LoadSDNode *Load = nullptr;
6663 unsigned ByteOffset = 0;
6665 ByteProvider() = default;
6667 static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6668 return ByteProvider(Load, ByteOffset);
6671 static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6673 bool isConstantZero() const { return !Load; }
6674 bool isMemory() const { return Load; }
6676 bool operator==(const ByteProvider &Other) const {
6677 return Other.Load == Load && Other.ByteOffset == ByteOffset;
6681 ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6682 : Load(Load), ByteOffset(ByteOffset) {}
6685 } // end anonymous namespace
6687 /// Recursively traverses the expression calculating the origin of the requested
6688 /// byte of the given value. Returns None if the provider can't be calculated.
6690 /// For all the values except the root of the expression verifies that the value
6691 /// has exactly one use and if it's not true return None. This way if the origin
6692 /// of the byte is returned it's guaranteed that the values which contribute to
6693 /// the byte are not used outside of this expression.
6695 /// Because the parts of the expression are not allowed to have more than one
6696 /// use this function iterates over trees, not DAGs. So it never visits the same
6697 /// node more than once.
6698 static const Optional<ByteProvider>
6699 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6700 bool Root = false) {
6701 // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6705 if (!Root && !Op.hasOneUse())
6708 assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6709 unsigned BitWidth = Op.getValueSizeInBits();
6710 if (BitWidth % 8 != 0)
6712 unsigned ByteWidth = BitWidth / 8;
6713 assert(Index < ByteWidth && "invalid index requested");
6716 switch (Op.getOpcode()) {
6718 auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6721 auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6725 if (LHS->isConstantZero())
6727 if (RHS->isConstantZero())
6732 auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6736 uint64_t BitShift = ShiftOp->getZExtValue();
6737 if (BitShift % 8 != 0)
6739 uint64_t ByteShift = BitShift / 8;
6741 return Index < ByteShift
6742 ? ByteProvider::getConstantZero()
6743 : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6746 case ISD::ANY_EXTEND:
6747 case ISD::SIGN_EXTEND:
6748 case ISD::ZERO_EXTEND: {
6749 SDValue NarrowOp = Op->getOperand(0);
6750 unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6751 if (NarrowBitWidth % 8 != 0)
6753 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6755 if (Index >= NarrowByteWidth)
6756 return Op.getOpcode() == ISD::ZERO_EXTEND
6757 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6759 return calculateByteProvider(NarrowOp, Index, Depth + 1);
6762 return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6765 auto L = cast<LoadSDNode>(Op.getNode());
6766 if (!L->isSimple() || L->isIndexed())
6769 unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6770 if (NarrowBitWidth % 8 != 0)
6772 uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6774 if (Index >= NarrowByteWidth)
6775 return L->getExtensionType() == ISD::ZEXTLOAD
6776 ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6778 return ByteProvider::getMemory(L, Index);
6785 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6789 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6793 // Check if the bytes offsets we are looking at match with either big or
6794 // little endian value loaded. Return true for big endian, false for little
6795 // endian, and None if match failed.
6796 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
6797 int64_t FirstOffset) {
6798 // The endian can be decided only when it is 2 bytes at least.
6799 unsigned Width = ByteOffsets.size();
6803 bool BigEndian = true, LittleEndian = true;
6804 for (unsigned i = 0; i < Width; i++) {
6805 int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6806 LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6807 BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6808 if (!BigEndian && !LittleEndian)
6812 assert((BigEndian != LittleEndian) && "It should be either big endian or"
6817 static SDValue stripTruncAndExt(SDValue Value) {
6818 switch (Value.getOpcode()) {
6820 case ISD::ZERO_EXTEND:
6821 case ISD::SIGN_EXTEND:
6822 case ISD::ANY_EXTEND:
6823 return stripTruncAndExt(Value.getOperand(0));
6828 /// Match a pattern where a wide type scalar value is stored by several narrow
6829 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6832 /// Assuming little endian target:
6835 /// p[0] = (val >> 0) & 0xFF;
6836 /// p[1] = (val >> 8) & 0xFF;
6837 /// p[2] = (val >> 16) & 0xFF;
6838 /// p[3] = (val >> 24) & 0xFF;
6840 /// *((i32)p) = val;
6844 /// p[0] = (val >> 24) & 0xFF;
6845 /// p[1] = (val >> 16) & 0xFF;
6846 /// p[2] = (val >> 8) & 0xFF;
6847 /// p[3] = (val >> 0) & 0xFF;
6849 /// *((i32)p) = BSWAP(val);
6850 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6851 // Collect all the stores in the chain.
6853 SmallVector<StoreSDNode *, 8> Stores;
6854 for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6855 // TODO: Allow unordered atomics when wider type is legal (see D66309)
6856 if (Store->getMemoryVT() != MVT::i8 ||
6857 !Store->isSimple() || Store->isIndexed())
6859 Stores.push_back(Store);
6860 Chain = Store->getChain();
6862 // Handle the simple type only.
6863 unsigned Width = Stores.size();
6864 EVT VT = EVT::getIntegerVT(
6865 *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6866 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6869 if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6872 // Check if all the bytes of the combined value we are looking at are stored
6873 // to the same base address. Collect bytes offsets from Base address into
6875 SDValue CombinedValue;
6876 SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
6877 int64_t FirstOffset = INT64_MAX;
6878 StoreSDNode *FirstStore = nullptr;
6879 Optional<BaseIndexOffset> Base;
6880 for (auto Store : Stores) {
6881 // All the stores store different byte of the CombinedValue. A truncate is
6882 // required to get that byte value.
6883 SDValue Trunc = Store->getValue();
6884 if (Trunc.getOpcode() != ISD::TRUNCATE)
6886 // A shift operation is required to get the right byte offset, except the
6889 SDValue Value = Trunc.getOperand(0);
6890 if (Value.getOpcode() == ISD::SRL ||
6891 Value.getOpcode() == ISD::SRA) {
6892 auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1));
6893 // Trying to match the following pattern. The shift offset must be
6894 // a constant and a multiple of 8. It is the byte offset in "y".
6896 // x = srl y, offset
6899 if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6902 Offset = ShiftOffset->getSExtValue()/8;
6903 Value = Value.getOperand(0);
6906 // Stores must share the same combined value with different offsets.
6908 CombinedValue = Value;
6909 else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6912 // The trunc and all the extend operation should be stripped to get the
6913 // real value we are stored.
6914 else if (CombinedValue.getValueType() != VT) {
6915 if (Value.getValueType() == VT ||
6916 Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6917 CombinedValue = Value;
6918 // Give up if the combined value type is smaller than the store size.
6919 if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6923 // Stores must share the same base address
6924 BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6925 int64_t ByteOffsetFromBase = 0;
6928 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6931 // Remember the first byte store
6932 if (ByteOffsetFromBase < FirstOffset) {
6934 FirstOffset = ByteOffsetFromBase;
6936 // Map the offset in the store and the offset in the combined value, and
6937 // early return if it has been set before.
6938 if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6940 ByteOffsets[Offset] = ByteOffsetFromBase;
6943 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6944 assert(FirstStore && "First store must be set");
6946 // Check if the bytes of the combined value we are looking at match with
6947 // either big or little endian value store.
6948 Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6949 if (!IsBigEndian.hasValue())
6952 // The node we are looking at matches with the pattern, check if we can
6953 // replace it with a single bswap if needed and store.
6955 // If the store needs byte swap check if the target supports it
6956 bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6958 // Before legalize we can introduce illegal bswaps which will be later
6959 // converted to an explicit bswap sequence. This way we end up with a single
6960 // store and byte shuffling instead of several stores and byte shuffling.
6961 if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6964 // Check that a store of the wide type is both allowed and fast on the target
6967 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6968 *FirstStore->getMemOperand(), &Fast);
6969 if (!Allowed || !Fast)
6972 if (VT != CombinedValue.getValueType()) {
6973 assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6974 "Get unexpected store value to combine");
6975 CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6980 CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6983 DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
6984 FirstStore->getPointerInfo(), FirstStore->getAlignment());
6986 // Rely on other DAG combine rules to remove the other individual stores.
6987 DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6991 /// Match a pattern where a wide type scalar value is loaded by several narrow
6992 /// loads and combined by shifts and ors. Fold it into a single load or a load
6993 /// and a BSWAP if the targets supports it.
6995 /// Assuming little endian target:
6997 /// i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6999 /// i32 val = *((i32)a)
7002 /// i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7004 /// i32 val = BSWAP(*((i32)a))
7006 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7007 /// interact well with the worklist mechanism. When a part of the pattern is
7008 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7009 /// but the root node of the pattern which triggers the load combine is not
7010 /// necessarily a direct user of the changed node. For example, once the address
7011 /// of t28 load is reassociated load combine won't be triggered:
7012 /// t25: i32 = add t4, Constant:i32<2>
7013 /// t26: i64 = sign_extend t25
7014 /// t27: i64 = add t2, t26
7015 /// t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7016 /// t29: i32 = zero_extend t28
7017 /// t32: i32 = shl t29, Constant:i8<8>
7018 /// t33: i32 = or t23, t32
7019 /// As a possible fix visitLoad can check if the load can be a part of a load
7020 /// combine pattern and add corresponding OR roots to the worklist.
7021 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7022 assert(N->getOpcode() == ISD::OR &&
7023 "Can only match load combining against OR nodes");
7025 // Handles simple types only
7026 EVT VT = N->getValueType(0);
7027 if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7029 unsigned ByteWidth = VT.getSizeInBits() / 8;
7031 bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7032 auto MemoryByteOffset = [&] (ByteProvider P) {
7033 assert(P.isMemory() && "Must be a memory byte provider");
7034 unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7035 assert(LoadBitWidth % 8 == 0 &&
7036 "can only analyze providers for individual bytes not bit");
7037 unsigned LoadByteWidth = LoadBitWidth / 8;
7038 return IsBigEndianTarget
7039 ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
7040 : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
7043 Optional<BaseIndexOffset> Base;
7046 SmallPtrSet<LoadSDNode *, 8> Loads;
7047 Optional<ByteProvider> FirstByteProvider;
7048 int64_t FirstOffset = INT64_MAX;
7050 // Check if all the bytes of the OR we are looking at are loaded from the same
7051 // base address. Collect bytes offsets from Base address in ByteOffsets.
7052 SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7053 unsigned ZeroExtendedBytes = 0;
7054 for (int i = ByteWidth - 1; i >= 0; --i) {
7055 auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7059 if (P->isConstantZero()) {
7060 // It's OK for the N most significant bytes to be 0, we can just
7061 // zero-extend the load.
7062 if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7066 assert(P->isMemory() && "provenance should either be memory or zero");
7068 LoadSDNode *L = P->Load;
7069 assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7071 "Must be enforced by calculateByteProvider");
7072 assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7074 // All loads must share the same chain
7075 SDValue LChain = L->getChain();
7078 else if (Chain != LChain)
7081 // Loads must share the same base address
7082 BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7083 int64_t ByteOffsetFromBase = 0;
7086 else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7089 // Calculate the offset of the current byte from the base address
7090 ByteOffsetFromBase += MemoryByteOffset(*P);
7091 ByteOffsets[i] = ByteOffsetFromBase;
7093 // Remember the first byte load
7094 if (ByteOffsetFromBase < FirstOffset) {
7095 FirstByteProvider = P;
7096 FirstOffset = ByteOffsetFromBase;
7101 assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7102 "memory, so there must be at least one load which produces the value");
7103 assert(Base && "Base address of the accessed memory location must be set");
7104 assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7106 bool NeedsZext = ZeroExtendedBytes > 0;
7109 EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7111 if (!MemVT.isSimple())
7114 // Before legalize we can introduce too wide illegal loads which will be later
7115 // split into legal sized loads. This enables us to combine i64 load by i8
7116 // patterns to a couple of i32 loads on 32 bit targets.
7117 if (LegalOperations &&
7118 !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7122 // Check if the bytes of the OR we are looking at match with either big or
7123 // little endian value load
7124 Optional<bool> IsBigEndian = isBigEndian(
7125 makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7126 if (!IsBigEndian.hasValue())
7129 assert(FirstByteProvider && "must be set");
7131 // Ensure that the first byte is loaded from zero offset of the first load.
7132 // So the combined value can be loaded from the first load address.
7133 if (MemoryByteOffset(*FirstByteProvider) != 0)
7135 LoadSDNode *FirstLoad = FirstByteProvider->Load;
7137 // The node we are looking at matches with the pattern, check if we can
7138 // replace it with a single (possibly zero-extended) load and bswap + shift if
7141 // If the load needs byte swap check if the target supports it
7142 bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7144 // Before legalize we can introduce illegal bswaps which will be later
7145 // converted to an explicit bswap sequence. This way we end up with a single
7146 // load and byte shuffling instead of several loads and byte shuffling.
7147 // We do not introduce illegal bswaps when zero-extending as this tends to
7148 // introduce too many arithmetic instructions.
7149 if (NeedsBswap && (LegalOperations || NeedsZext) &&
7150 !TLI.isOperationLegal(ISD::BSWAP, VT))
7153 // If we need to bswap and zero extend, we have to insert a shift. Check that
7155 if (NeedsBswap && NeedsZext && LegalOperations &&
7156 !TLI.isOperationLegal(ISD::SHL, VT))
7159 // Check that a load of the wide type is both allowed and fast on the target
7162 TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7163 *FirstLoad->getMemOperand(), &Fast);
7164 if (!Allowed || !Fast)
7167 SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7168 SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
7169 FirstLoad->getPointerInfo(), MemVT,
7170 FirstLoad->getAlignment());
7172 // Transfer chain users from old loads to the new load.
7173 for (LoadSDNode *L : Loads)
7174 DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7179 SDValue ShiftedLoad =
7181 ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7182 DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7183 SDLoc(N), LegalOperations))
7185 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7188 // If the target has andn, bsl, or a similar bit-select instruction,
7189 // we want to unfold masked merge, with canonical pattern of:
7191 // ((x ^ y) & m) ^ y
7194 // (x & m) | (y & ~m)
7195 // If y is a constant, and the 'andn' does not work with immediates,
7196 // we unfold into a different pattern:
7197 // ~(~x & m) & (m | y)
7198 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7199 // the very least that breaks andnpd / andnps patterns, and because those
7200 // patterns are simplified in IR and shouldn't be created in the DAG
7201 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7202 assert(N->getOpcode() == ISD::XOR);
7204 // Don't touch 'not' (i.e. where y = -1).
7205 if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7208 EVT VT = N->getValueType(0);
7210 // There are 3 commutable operators in the pattern,
7211 // so we have to deal with 8 possible variants of the basic pattern.
7213 auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7214 if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7216 SDValue Xor = And.getOperand(XorIdx);
7217 if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7219 SDValue Xor0 = Xor.getOperand(0);
7220 SDValue Xor1 = Xor.getOperand(1);
7221 // Don't touch 'not' (i.e. where y = -1).
7222 if (isAllOnesOrAllOnesSplat(Xor1))
7225 std::swap(Xor0, Xor1);
7230 M = And.getOperand(XorIdx ? 0 : 1);
7234 SDValue N0 = N->getOperand(0);
7235 SDValue N1 = N->getOperand(1);
7236 if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7237 !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7240 // Don't do anything if the mask is constant. This should not be reachable.
7241 // InstCombine should have already unfolded this pattern, and DAGCombiner
7242 // probably shouldn't produce it, too.
7243 if (isa<ConstantSDNode>(M.getNode()))
7246 // We can transform if the target has AndNot
7247 if (!TLI.hasAndNot(M))
7252 // If Y is a constant, check that 'andn' works with immediates.
7253 if (!TLI.hasAndNot(Y)) {
7254 assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7255 // If not, we need to do a bit more work to make sure andn is still used.
7256 SDValue NotX = DAG.getNOT(DL, X, VT);
7257 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7258 SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7259 SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7260 return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7263 SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7264 SDValue NotM = DAG.getNOT(DL, M, VT);
7265 SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7267 return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7270 SDValue DAGCombiner::visitXOR(SDNode *N) {
7271 SDValue N0 = N->getOperand(0);
7272 SDValue N1 = N->getOperand(1);
7273 EVT VT = N0.getValueType();
7276 if (VT.isVector()) {
7277 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7280 // fold (xor x, 0) -> x, vector edition
7281 if (ISD::isBuildVectorAllZeros(N0.getNode()))
7283 if (ISD::isBuildVectorAllZeros(N1.getNode()))
7287 // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7289 if (N0.isUndef() && N1.isUndef())
7290 return DAG.getConstant(0, DL, VT);
7292 // fold (xor x, undef) -> undef
7298 // fold (xor c1, c2) -> c1^c2
7299 if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7302 // canonicalize constant to RHS
7303 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7304 !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7305 return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7307 // fold (xor x, 0) -> x
7308 if (isNullConstant(N1))
7311 if (SDValue NewSel = foldBinOpIntoSelect(N))
7315 if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7318 // fold !(x cc y) -> (x !cc y)
7319 unsigned N0Opcode = N0.getOpcode();
7320 SDValue LHS, RHS, CC;
7321 if (TLI.isConstTrueVal(N1.getNode()) &&
7322 isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7323 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7324 LHS.getValueType());
7325 if (!LegalOperations ||
7326 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7329 llvm_unreachable("Unhandled SetCC Equivalent!");
7331 return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7332 case ISD::SELECT_CC:
7333 return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7334 N0.getOperand(3), NotCC);
7335 case ISD::STRICT_FSETCC:
7336 case ISD::STRICT_FSETCCS: {
7337 if (N0.hasOneUse()) {
7338 // FIXME Can we handle multiple uses? Could we token factor the chain
7339 // results from the new/old setcc?
7340 SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7342 N0Opcode == ISD::STRICT_FSETCCS);
7343 CombineTo(N, SetCC);
7344 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7345 recursivelyDeleteUnusedNodes(N0.getNode());
7346 return SDValue(N, 0); // Return N so it doesn't get rechecked!
7354 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7355 if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7356 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7357 SDValue V = N0.getOperand(0);
7359 V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7360 DAG.getConstant(1, DL0, V.getValueType()));
7361 AddToWorklist(V.getNode());
7362 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7365 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7366 if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7367 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7368 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7369 if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7370 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7371 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7372 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7373 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7374 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7377 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7378 if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7379 (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7380 SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7381 if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7382 unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7383 N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7384 N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7385 AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7386 return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7390 // fold (not (neg x)) -> (add X, -1)
7391 // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7392 // Y is a constant or the subtract has a single use.
7393 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7394 isNullConstant(N0.getOperand(0))) {
7395 return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7396 DAG.getAllOnesConstant(DL, VT));
7399 // fold (not (add X, -1)) -> (neg X)
7400 if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7401 isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7402 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7406 // fold (xor (and x, y), y) -> (and (not x), y)
7407 if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7408 SDValue X = N0.getOperand(0);
7409 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7410 AddToWorklist(NotX.getNode());
7411 return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7414 if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7415 ConstantSDNode *XorC = isConstOrConstSplat(N1);
7416 ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7417 unsigned BitWidth = VT.getScalarSizeInBits();
7418 if (XorC && ShiftC) {
7419 // Don't crash on an oversized shift. We can not guarantee that a bogus
7420 // shift has been simplified to undef.
7421 uint64_t ShiftAmt = ShiftC->getLimitedValue();
7422 if (ShiftAmt < BitWidth) {
7423 APInt Ones = APInt::getAllOnesValue(BitWidth);
7424 Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7425 if (XorC->getAPIntValue() == Ones) {
7426 // If the xor constant is a shifted -1, do a 'not' before the shift:
7427 // xor (X << ShiftC), XorC --> (not X) << ShiftC
7428 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7429 SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7430 return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7436 // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7437 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7438 SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7439 SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7440 if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7441 SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7442 SDValue S0 = S.getOperand(0);
7443 if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7444 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7445 if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7446 if (C->getAPIntValue() == (OpSizeInBits - 1))
7447 return DAG.getNode(ISD::ABS, DL, VT, S0);
7452 // fold (xor x, x) -> 0
7454 return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7456 // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7457 // Here is a concrete example of this equivalence:
7459 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
7460 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7464 // i16 ~1 == 0b1111111111111110
7465 // i16 rol(~1, 14) == 0b1011111111111111
7467 // Some additional tips to help conceptualize this transform:
7468 // - Try to see the operation as placing a single zero in a value of all ones.
7469 // - There exists no value for x which would allow the result to contain zero.
7470 // - Values of x larger than the bitwidth are undefined and do not require a
7471 // consistent result.
7472 // - Pushing the zero left requires shifting one bits in from the right.
7473 // A rotate left of ~1 is a nice way of achieving the desired result.
7474 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7475 isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7476 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7480 // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
7481 if (N0Opcode == N1.getOpcode())
7482 if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7485 // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable
7486 if (SDValue MM = unfoldMaskedMerge(N))
7489 // Simplify the expression using non-local knowledge.
7490 if (SimplifyDemandedBits(SDValue(N, 0)))
7491 return SDValue(N, 0);
7493 if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7499 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7500 /// shift-by-constant operand with identical opcode, we may be able to convert
7501 /// that into 2 independent shifts followed by the logic op. This is a
7502 /// throughput improvement.
7503 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7504 // Match a one-use bitwise logic op.
7505 SDValue LogicOp = Shift->getOperand(0);
7506 if (!LogicOp.hasOneUse())
7509 unsigned LogicOpcode = LogicOp.getOpcode();
7510 if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7511 LogicOpcode != ISD::XOR)
7514 // Find a matching one-use shift by constant.
7515 unsigned ShiftOpcode = Shift->getOpcode();
7516 SDValue C1 = Shift->getOperand(1);
7517 ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7518 assert(C1Node && "Expected a shift with constant operand");
7519 const APInt &C1Val = C1Node->getAPIntValue();
7520 auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7521 const APInt *&ShiftAmtVal) {
7522 if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7525 ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7529 // Capture the shifted operand and shift amount value.
7530 ShiftOp = V.getOperand(0);
7531 ShiftAmtVal = &ShiftCNode->getAPIntValue();
7533 // Shift amount types do not have to match their operand type, so check that
7534 // the constants are the same width.
7535 if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7538 // The fold is not valid if the sum of the shift values exceeds bitwidth.
7539 if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7545 // Logic ops are commutative, so check each operand for a match.
7548 if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7549 Y = LogicOp.getOperand(1);
7550 else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7551 Y = LogicOp.getOperand(0);
7555 // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7557 EVT VT = Shift->getValueType(0);
7558 EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7559 SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7560 SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7561 SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7562 return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7565 /// Handle transforms common to the three shifts, when the shift amount is a
7567 /// We are looking for: (shift being one of shl/sra/srl)
7568 /// shift (binop X, C0), C1
7569 /// And want to transform into:
7570 /// binop (shift X, C1), (shift C0, C1)
7571 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7572 assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7574 // Do not turn a 'not' into a regular xor.
7575 if (isBitwiseNot(N->getOperand(0)))
7578 // The inner binop must be one-use, since we want to replace it.
7579 SDValue LHS = N->getOperand(0);
7580 if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7583 // TODO: This is limited to early combining because it may reveal regressions
7584 // otherwise. But since we just checked a target hook to see if this is
7585 // desirable, that should have filtered out cases where this interferes
7586 // with some other pattern matching.
7588 if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7591 // We want to pull some binops through shifts, so that we have (and (shift))
7592 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
7593 // thing happens with address calculations, so it's important to canonicalize
7595 switch (LHS.getOpcode()) {
7603 if (N->getOpcode() != ISD::SHL)
7604 return SDValue(); // only shl(add) not sr[al](add).
7608 // We require the RHS of the binop to be a constant and not opaque as well.
7609 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7613 // FIXME: disable this unless the input to the binop is a shift by a constant
7614 // or is copy/select. Enable this in other cases when figure out it's exactly
7616 SDValue BinOpLHSVal = LHS.getOperand(0);
7617 bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7618 BinOpLHSVal.getOpcode() == ISD::SRA ||
7619 BinOpLHSVal.getOpcode() == ISD::SRL) &&
7620 isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7621 bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7622 BinOpLHSVal.getOpcode() == ISD::SELECT;
7624 if (!IsShiftByConstant && !IsCopyOrSelect)
7627 if (IsCopyOrSelect && N->hasOneUse())
7630 // Fold the constants, shifting the binop RHS by the shift amount.
7632 EVT VT = N->getValueType(0);
7633 SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7635 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7637 SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7639 return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7642 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7643 assert(N->getOpcode() == ISD::TRUNCATE);
7644 assert(N->getOperand(0).getOpcode() == ISD::AND);
7646 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7647 EVT TruncVT = N->getValueType(0);
7648 if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7649 TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7650 SDValue N01 = N->getOperand(0).getOperand(1);
7651 if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7653 SDValue N00 = N->getOperand(0).getOperand(0);
7654 SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7655 SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7656 AddToWorklist(Trunc00.getNode());
7657 AddToWorklist(Trunc01.getNode());
7658 return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7665 SDValue DAGCombiner::visitRotate(SDNode *N) {
7667 SDValue N0 = N->getOperand(0);
7668 SDValue N1 = N->getOperand(1);
7669 EVT VT = N->getValueType(0);
7670 unsigned Bitsize = VT.getScalarSizeInBits();
7672 // fold (rot x, 0) -> x
7673 if (isNullOrNullSplat(N1))
7676 // fold (rot x, c) -> x iff (c % BitSize) == 0
7677 if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7678 APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7679 if (DAG.MaskedValueIsZero(N1, ModuloMask))
7683 // fold (rot x, c) -> (rot x, c % BitSize)
7684 bool OutOfRange = false;
7685 auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
7686 OutOfRange |= C->getAPIntValue().uge(Bitsize);
7689 if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
7690 EVT AmtVT = N1.getValueType();
7691 SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
7693 DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
7694 return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
7697 // rot i16 X, 8 --> bswap X
7698 auto *RotAmtC = isConstOrConstSplat(N1);
7699 if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
7700 VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
7701 return DAG.getNode(ISD::BSWAP, dl, VT, N0);
7703 // Simplify the operands using demanded-bits information.
7704 if (SimplifyDemandedBits(SDValue(N, 0)))
7705 return SDValue(N, 0);
7707 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7708 if (N1.getOpcode() == ISD::TRUNCATE &&
7709 N1.getOperand(0).getOpcode() == ISD::AND) {
7710 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7711 return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7714 unsigned NextOp = N0.getOpcode();
7715 // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7716 if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7717 SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7718 SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7719 if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7720 EVT ShiftVT = C1->getValueType(0);
7721 bool SameSide = (N->getOpcode() == NextOp);
7722 unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7723 if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7724 CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7725 SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7726 SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7727 ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
7728 return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7736 SDValue DAGCombiner::visitSHL(SDNode *N) {
7737 SDValue N0 = N->getOperand(0);
7738 SDValue N1 = N->getOperand(1);
7739 if (SDValue V = DAG.simplifyShift(N0, N1))
7742 EVT VT = N0.getValueType();
7743 EVT ShiftVT = N1.getValueType();
7744 unsigned OpSizeInBits = VT.getScalarSizeInBits();
7747 if (VT.isVector()) {
7748 if (SDValue FoldedVOp = SimplifyVBinOp(N))
7751 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7752 // If setcc produces all-one true value then:
7753 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7754 if (N1CV && N1CV->isConstant()) {
7755 if (N0.getOpcode() == ISD::AND) {
7756 SDValue N00 = N0->getOperand(0);
7757 SDValue N01 = N0->getOperand(1);
7758 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7760 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7761 TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7762 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7764 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
7765 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7771 ConstantSDNode *N1C = isConstOrConstSplat(N1);
7773 // fold (shl c1, c2) -> c1<<c2
7774 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
7777 if (SDValue NewSel = foldBinOpIntoSelect(N))
7780 // if (shl x, c) is known to be zero, return 0
7781 if (DAG.MaskedValueIsZero(SDValue(N, 0),
7782 APInt::getAllOnesValue(OpSizeInBits)))
7783 return DAG.getConstant(0, SDLoc(N), VT);
7785 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7786 if (N1.getOpcode() == ISD::TRUNCATE &&
7787 N1.getOperand(0).getOpcode() == ISD::AND) {
7788 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7789 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7792 if (SimplifyDemandedBits(SDValue(N, 0)))
7793 return SDValue(N, 0);
7795 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7796 if (N0.getOpcode() == ISD::SHL) {
7797 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7798 ConstantSDNode *RHS) {
7799 APInt c1 = LHS->getAPIntValue();
7800 APInt c2 = RHS->getAPIntValue();
7801 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7802 return (c1 + c2).uge(OpSizeInBits);
7804 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7805 return DAG.getConstant(0, SDLoc(N), VT);
7807 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7808 ConstantSDNode *RHS) {
7809 APInt c1 = LHS->getAPIntValue();
7810 APInt c2 = RHS->getAPIntValue();
7811 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7812 return (c1 + c2).ult(OpSizeInBits);
7814 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7816 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7817 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7821 // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7822 // For this to be valid, the second form must not preserve any of the bits
7823 // that are shifted out by the inner shift in the first form. This means
7824 // the outer shift size must be >= the number of bits added by the ext.
7825 // As a corollary, we don't care what kind of ext it is.
7826 if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7827 N0.getOpcode() == ISD::ANY_EXTEND ||
7828 N0.getOpcode() == ISD::SIGN_EXTEND) &&
7829 N0.getOperand(0).getOpcode() == ISD::SHL) {
7830 SDValue N0Op0 = N0.getOperand(0);
7831 SDValue InnerShiftAmt = N0Op0.getOperand(1);
7832 EVT InnerVT = N0Op0.getValueType();
7833 uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7835 auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7836 ConstantSDNode *RHS) {
7837 APInt c1 = LHS->getAPIntValue();
7838 APInt c2 = RHS->getAPIntValue();
7839 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7840 return c2.uge(OpSizeInBits - InnerBitwidth) &&
7841 (c1 + c2).uge(OpSizeInBits);
7843 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7844 /*AllowUndefs*/ false,
7845 /*AllowTypeMismatch*/ true))
7846 return DAG.getConstant(0, SDLoc(N), VT);
7848 auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7849 ConstantSDNode *RHS) {
7850 APInt c1 = LHS->getAPIntValue();
7851 APInt c2 = RHS->getAPIntValue();
7852 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7853 return c2.uge(OpSizeInBits - InnerBitwidth) &&
7854 (c1 + c2).ult(OpSizeInBits);
7856 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7857 /*AllowUndefs*/ false,
7858 /*AllowTypeMismatch*/ true)) {
7860 SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7861 SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7862 Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7863 return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7867 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7868 // Only fold this if the inner zext has no other uses to avoid increasing
7869 // the total number of instructions.
7870 if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7871 N0.getOperand(0).getOpcode() == ISD::SRL) {
7872 SDValue N0Op0 = N0.getOperand(0);
7873 SDValue InnerShiftAmt = N0Op0.getOperand(1);
7875 auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7876 APInt c1 = LHS->getAPIntValue();
7877 APInt c2 = RHS->getAPIntValue();
7878 zeroExtendToMatch(c1, c2);
7879 return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7881 if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7882 /*AllowUndefs*/ false,
7883 /*AllowTypeMismatch*/ true)) {
7885 EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7886 SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7887 NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7888 AddToWorklist(NewSHL.getNode());
7889 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7893 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
7894 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
7895 // TODO - support non-uniform vector shift amounts.
7896 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7897 N0->getFlags().hasExact()) {
7898 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7899 uint64_t C1 = N0C1->getZExtValue();
7900 uint64_t C2 = N1C->getZExtValue();
7903 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7904 DAG.getConstant(C2 - C1, DL, ShiftVT));
7905 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7906 DAG.getConstant(C1 - C2, DL, ShiftVT));
7910 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7911 // (and (srl x, (sub c1, c2), MASK)
7912 // Only fold this if the inner shift has no other uses -- if it does, folding
7913 // this will increase the total number of instructions.
7914 // TODO - drop hasOneUse requirement if c1 == c2?
7915 // TODO - support non-uniform vector shift amounts.
7916 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7917 TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7918 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7919 if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7920 uint64_t c1 = N0C1->getZExtValue();
7921 uint64_t c2 = N1C->getZExtValue();
7922 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7927 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7928 DAG.getConstant(c2 - c1, DL, ShiftVT));
7930 Mask.lshrInPlace(c1 - c2);
7932 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7933 DAG.getConstant(c1 - c2, DL, ShiftVT));
7936 return DAG.getNode(ISD::AND, DL, VT, Shift,
7937 DAG.getConstant(Mask, DL, VT));
7942 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7943 if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7944 isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7946 SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7947 SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7948 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7951 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7952 // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7953 // Variant of version done on multiply, except mul by a power of 2 is turned
7955 if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7956 N0.getNode()->hasOneUse() &&
7957 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7958 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7959 TLI.isDesirableToCommuteWithShift(N, Level)) {
7960 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7961 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7962 AddToWorklist(Shl0.getNode());
7963 AddToWorklist(Shl1.getNode());
7964 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7967 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7968 if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7969 isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7970 isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7971 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7972 if (isConstantOrConstantVector(Shl))
7973 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7976 if (N1C && !N1C->isOpaque())
7977 if (SDValue NewSHL = visitShiftByConstant(N))
7980 // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
7981 if (N0.getOpcode() == ISD::VSCALE)
7982 if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
7984 APInt C0 = N0.getConstantOperandAPInt(0);
7985 APInt C1 = NC1->getAPIntValue();
7986 return DAG.getVScale(DL, VT, C0 << C1);
7992 // Transform a right shift of a multiply into a multiply-high.
7994 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
7995 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
7996 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
7997 const TargetLowering &TLI) {
7998 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
7999 "SRL or SRA node is required here!");
8001 // Check the shift amount. Proceed with the transformation if the shift
8002 // amount is constant.
8003 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8009 // The operation feeding into the shift must be a multiply.
8010 SDValue ShiftOperand = N->getOperand(0);
8011 if (ShiftOperand.getOpcode() != ISD::MUL)
8014 // Both operands must be equivalent extend nodes.
8015 SDValue LeftOp = ShiftOperand.getOperand(0);
8016 SDValue RightOp = ShiftOperand.getOperand(1);
8017 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8018 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8020 if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8023 EVT WideVT1 = LeftOp.getValueType();
8024 EVT WideVT2 = RightOp.getValueType();
8026 // Proceed with the transformation if the wide types match.
8027 assert((WideVT1 == WideVT2) &&
8028 "Cannot have a multiply node with two different operand types.");
8030 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8031 // Check that the two extend nodes are the same type.
8032 if (NarrowVT != RightOp.getOperand(0).getValueType())
8035 // Only transform into mulh if mulh for the narrow type is cheaper than
8036 // a multiply followed by a shift. This should also check if mulh is
8037 // legal for NarrowVT on the target.
8038 if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
8041 // Proceed with the transformation if the wide type is twice as large
8042 // as the narrow type.
8043 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8044 if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8047 // Check the shift amount with the narrow type size.
8048 // Proceed with the transformation if the shift amount is the width
8049 // of the narrow type.
8050 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8051 if (ShiftAmt != NarrowVTSize)
8054 // If the operation feeding into the MUL is a sign extend (sext),
8055 // we use mulhs. Othewise, zero extends (zext) use mulhu.
8056 unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8058 SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8059 RightOp.getOperand(0));
8060 return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8061 : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8064 SDValue DAGCombiner::visitSRA(SDNode *N) {
8065 SDValue N0 = N->getOperand(0);
8066 SDValue N1 = N->getOperand(1);
8067 if (SDValue V = DAG.simplifyShift(N0, N1))
8070 EVT VT = N0.getValueType();
8071 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8073 // Arithmetic shifting an all-sign-bit value is a no-op.
8074 // fold (sra 0, x) -> 0
8075 // fold (sra -1, x) -> -1
8076 if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8081 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8084 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8086 // fold (sra c1, c2) -> (sra c1, c2)
8087 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8090 if (SDValue NewSel = foldBinOpIntoSelect(N))
8093 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8095 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8096 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8097 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8099 ExtVT = EVT::getVectorVT(*DAG.getContext(),
8100 ExtVT, VT.getVectorNumElements());
8101 if (!LegalOperations ||
8102 TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8103 TargetLowering::Legal)
8104 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8105 N0.getOperand(0), DAG.getValueType(ExtVT));
8108 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8109 // clamp (add c1, c2) to max shift.
8110 if (N0.getOpcode() == ISD::SRA) {
8112 EVT ShiftVT = N1.getValueType();
8113 EVT ShiftSVT = ShiftVT.getScalarType();
8114 SmallVector<SDValue, 16> ShiftValues;
8116 auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8117 APInt c1 = LHS->getAPIntValue();
8118 APInt c2 = RHS->getAPIntValue();
8119 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8120 APInt Sum = c1 + c2;
8122 Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8123 ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8126 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8129 ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8131 ShiftValue = ShiftValues[0];
8132 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8136 // fold (sra (shl X, m), (sub result_size, n))
8137 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8138 // result_size - n != m.
8139 // If truncate is free for the target sext(shl) is likely to result in better
8141 if (N0.getOpcode() == ISD::SHL && N1C) {
8142 // Get the two constanst of the shifts, CN0 = m, CN = n.
8143 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8145 LLVMContext &Ctx = *DAG.getContext();
8146 // Determine what the truncate's result bitsize and type would be.
8147 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8150 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8152 // Determine the residual right-shift amount.
8153 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8155 // If the shift is not a no-op (in which case this should be just a sign
8156 // extend already), the truncated to type is legal, sign_extend is legal
8157 // on that type, and the truncate to that type is both legal and free,
8158 // perform the transform.
8159 if ((ShiftAmt > 0) &&
8160 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8161 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8162 TLI.isTruncateFree(VT, TruncVT)) {
8164 SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8165 getShiftAmountTy(N0.getOperand(0).getValueType()));
8166 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8167 N0.getOperand(0), Amt);
8168 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8170 return DAG.getNode(ISD::SIGN_EXTEND, DL,
8171 N->getValueType(0), Trunc);
8176 // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8177 // sra (add (shl X, N1C), AddC), N1C -->
8178 // sext (add (trunc X to (width - N1C)), AddC')
8179 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8180 N0.getOperand(0).getOpcode() == ISD::SHL &&
8181 N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8182 if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8183 SDValue Shl = N0.getOperand(0);
8184 // Determine what the truncate's type would be and ask the target if that
8185 // is a free operation.
8186 LLVMContext &Ctx = *DAG.getContext();
8187 unsigned ShiftAmt = N1C->getZExtValue();
8188 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8190 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8192 // TODO: The simple type check probably belongs in the default hook
8193 // implementation and/or target-specific overrides (because
8194 // non-simple types likely require masking when legalized), but that
8195 // restriction may conflict with other transforms.
8196 if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8197 TLI.isTruncateFree(VT, TruncVT)) {
8199 SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8200 SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8201 trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8202 SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8203 return DAG.getSExtOrTrunc(Add, DL, VT);
8208 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8209 if (N1.getOpcode() == ISD::TRUNCATE &&
8210 N1.getOperand(0).getOpcode() == ISD::AND) {
8211 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8212 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8215 // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8216 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8217 // if c1 is equal to the number of bits the trunc removes
8218 // TODO - support non-uniform vector shift amounts.
8219 if (N0.getOpcode() == ISD::TRUNCATE &&
8220 (N0.getOperand(0).getOpcode() == ISD::SRL ||
8221 N0.getOperand(0).getOpcode() == ISD::SRA) &&
8222 N0.getOperand(0).hasOneUse() &&
8223 N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8224 SDValue N0Op0 = N0.getOperand(0);
8225 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8226 EVT LargeVT = N0Op0.getValueType();
8227 unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8228 if (LargeShift->getAPIntValue() == TruncBits) {
8230 SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8231 getShiftAmountTy(LargeVT));
8233 DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8234 return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8239 // Simplify, based on bits shifted out of the LHS.
8240 if (SimplifyDemandedBits(SDValue(N, 0)))
8241 return SDValue(N, 0);
8243 // If the sign bit is known to be zero, switch this to a SRL.
8244 if (DAG.SignBitIsZero(N0))
8245 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8247 if (N1C && !N1C->isOpaque())
8248 if (SDValue NewSRA = visitShiftByConstant(N))
8251 // Try to transform this shift into a multiply-high if
8252 // it matches the appropriate pattern detected in combineShiftToMULH.
8253 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8259 SDValue DAGCombiner::visitSRL(SDNode *N) {
8260 SDValue N0 = N->getOperand(0);
8261 SDValue N1 = N->getOperand(1);
8262 if (SDValue V = DAG.simplifyShift(N0, N1))
8265 EVT VT = N0.getValueType();
8266 unsigned OpSizeInBits = VT.getScalarSizeInBits();
8270 if (SDValue FoldedVOp = SimplifyVBinOp(N))
8273 ConstantSDNode *N1C = isConstOrConstSplat(N1);
8275 // fold (srl c1, c2) -> c1 >>u c2
8276 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8279 if (SDValue NewSel = foldBinOpIntoSelect(N))
8282 // if (srl x, c) is known to be zero, return 0
8283 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8284 APInt::getAllOnesValue(OpSizeInBits)))
8285 return DAG.getConstant(0, SDLoc(N), VT);
8287 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8288 if (N0.getOpcode() == ISD::SRL) {
8289 auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8290 ConstantSDNode *RHS) {
8291 APInt c1 = LHS->getAPIntValue();
8292 APInt c2 = RHS->getAPIntValue();
8293 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8294 return (c1 + c2).uge(OpSizeInBits);
8296 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8297 return DAG.getConstant(0, SDLoc(N), VT);
8299 auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8300 ConstantSDNode *RHS) {
8301 APInt c1 = LHS->getAPIntValue();
8302 APInt c2 = RHS->getAPIntValue();
8303 zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8304 return (c1 + c2).ult(OpSizeInBits);
8306 if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8308 EVT ShiftVT = N1.getValueType();
8309 SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8310 return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8314 if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8315 N0.getOperand(0).getOpcode() == ISD::SRL) {
8316 SDValue InnerShift = N0.getOperand(0);
8317 // TODO - support non-uniform vector shift amounts.
8318 if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8319 uint64_t c1 = N001C->getZExtValue();
8320 uint64_t c2 = N1C->getZExtValue();
8321 EVT InnerShiftVT = InnerShift.getValueType();
8322 EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8323 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8324 // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8325 // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8326 if (c1 + OpSizeInBits == InnerShiftSize) {
8328 if (c1 + c2 >= InnerShiftSize)
8329 return DAG.getConstant(0, DL, VT);
8330 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8331 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8332 InnerShift.getOperand(0), NewShiftAmt);
8333 return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8335 // In the more general case, we can clear the high bits after the shift:
8336 // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8337 if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8338 c1 + c2 < InnerShiftSize) {
8340 SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8341 SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8342 InnerShift.getOperand(0), NewShiftAmt);
8343 SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8346 SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8347 return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8352 // fold (srl (shl x, c), c) -> (and x, cst2)
8353 // TODO - (srl (shl x, c1), c2).
8354 if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8355 isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8358 DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8359 AddToWorklist(Mask.getNode());
8360 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8363 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8364 // TODO - support non-uniform vector shift amounts.
8365 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8366 // Shifting in all undef bits?
8367 EVT SmallVT = N0.getOperand(0).getValueType();
8368 unsigned BitSize = SmallVT.getScalarSizeInBits();
8369 if (N1C->getAPIntValue().uge(BitSize))
8370 return DAG.getUNDEF(VT);
8372 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8373 uint64_t ShiftAmt = N1C->getZExtValue();
8375 SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8377 DAG.getConstant(ShiftAmt, DL0,
8378 getShiftAmountTy(SmallVT)));
8379 AddToWorklist(SmallShift.getNode());
8380 APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8382 return DAG.getNode(ISD::AND, DL, VT,
8383 DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8384 DAG.getConstant(Mask, DL, VT));
8388 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
8389 // bit, which is unmodified by sra.
8390 if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8391 if (N0.getOpcode() == ISD::SRA)
8392 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8395 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
8396 if (N1C && N0.getOpcode() == ISD::CTLZ &&
8397 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8398 KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8400 // If any of the input bits are KnownOne, then the input couldn't be all
8401 // zeros, thus the result of the srl will always be zero.
8402 if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8404 // If all of the bits input the to ctlz node are known to be zero, then
8405 // the result of the ctlz is "32" and the result of the shift is one.
8406 APInt UnknownBits = ~Known.Zero;
8407 if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8409 // Otherwise, check to see if there is exactly one bit input to the ctlz.
8410 if (UnknownBits.isPowerOf2()) {
8411 // Okay, we know that only that the single bit specified by UnknownBits
8412 // could be set on input to the CTLZ node. If this bit is set, the SRL
8413 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8414 // to an SRL/XOR pair, which is likely to simplify more.
8415 unsigned ShAmt = UnknownBits.countTrailingZeros();
8416 SDValue Op = N0.getOperand(0);
8420 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8421 DAG.getConstant(ShAmt, DL,
8422 getShiftAmountTy(Op.getValueType())));
8423 AddToWorklist(Op.getNode());
8427 return DAG.getNode(ISD::XOR, DL, VT,
8428 Op, DAG.getConstant(1, DL, VT));
8432 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8433 if (N1.getOpcode() == ISD::TRUNCATE &&
8434 N1.getOperand(0).getOpcode() == ISD::AND) {
8435 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8436 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8439 // fold operands of srl based on knowledge that the low bits are not
8441 if (SimplifyDemandedBits(SDValue(N, 0)))
8442 return SDValue(N, 0);
8444 if (N1C && !N1C->isOpaque())
8445 if (SDValue NewSRL = visitShiftByConstant(N))
8448 // Attempt to convert a srl of a load into a narrower zero-extending load.
8449 if (SDValue NarrowLoad = ReduceLoadWidth(N))
8452 // Here is a common situation. We want to optimize:
8455 // %b = and i32 %a, 2
8456 // %c = srl i32 %b, 1
8457 // brcond i32 %c ...
8463 // %c = setcc eq %b, 0
8466 // However when after the source operand of SRL is optimized into AND, the SRL
8467 // itself may not be optimized further. Look for it and add the BRCOND into
8469 if (N->hasOneUse()) {
8470 SDNode *Use = *N->use_begin();
8471 if (Use->getOpcode() == ISD::BRCOND)
8473 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8474 // Also look pass the truncate.
8475 Use = *Use->use_begin();
8476 if (Use->getOpcode() == ISD::BRCOND)
8481 // Try to transform this shift into a multiply-high if
8482 // it matches the appropriate pattern detected in combineShiftToMULH.
8483 if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8489 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8490 EVT VT = N->getValueType(0);
8491 SDValue N0 = N->getOperand(0);
8492 SDValue N1 = N->getOperand(1);
8493 SDValue N2 = N->getOperand(2);
8494 bool IsFSHL = N->getOpcode() == ISD::FSHL;
8495 unsigned BitWidth = VT.getScalarSizeInBits();
8497 // fold (fshl N0, N1, 0) -> N0
8498 // fold (fshr N0, N1, 0) -> N1
8499 if (isPowerOf2_32(BitWidth))
8500 if (DAG.MaskedValueIsZero(
8501 N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8502 return IsFSHL ? N0 : N1;
8504 auto IsUndefOrZero = [](SDValue V) {
8505 return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8508 // TODO - support non-uniform vector shift amounts.
8509 if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8510 EVT ShAmtTy = N2.getValueType();
8512 // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8513 if (Cst->getAPIntValue().uge(BitWidth)) {
8514 uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8515 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8516 DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8519 unsigned ShAmt = Cst->getZExtValue();
8521 return IsFSHL ? N0 : N1;
8523 // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8524 // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8525 // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8526 // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8527 if (IsUndefOrZero(N0))
8528 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8529 DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8530 SDLoc(N), ShAmtTy));
8531 if (IsUndefOrZero(N1))
8532 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8533 DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8534 SDLoc(N), ShAmtTy));
8536 // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8537 // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8538 // TODO - bigendian support once we have test coverage.
8539 // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8540 // TODO - permit LHS EXTLOAD if extensions are shifted out.
8541 if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8542 !DAG.getDataLayout().isBigEndian()) {
8543 auto *LHS = dyn_cast<LoadSDNode>(N0);
8544 auto *RHS = dyn_cast<LoadSDNode>(N1);
8545 if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8546 LHS->getAddressSpace() == RHS->getAddressSpace() &&
8547 (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8548 ISD::isNON_EXTLoad(LHS)) {
8549 if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8552 IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8553 Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8555 if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8556 RHS->getAddressSpace(), NewAlign,
8557 RHS->getMemOperand()->getFlags(), &Fast) &&
8560 DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL);
8561 AddToWorklist(NewPtr.getNode());
8562 SDValue Load = DAG.getLoad(
8563 VT, DL, RHS->getChain(), NewPtr,
8564 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8565 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8566 // Replace the old load's chain with the new load's chain.
8567 WorklistRemover DeadNodes(*this);
8568 DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8576 // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8577 // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8578 // iff We know the shift amount is in range.
8579 // TODO: when is it worth doing SUB(BW, N2) as well?
8580 if (isPowerOf2_32(BitWidth)) {
8581 APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8582 if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8583 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8584 if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8585 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8588 // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8589 // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8590 // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8591 // is legal as well we might be better off avoiding non-constant (BW - N2).
8592 unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8593 if (N0 == N1 && hasOperation(RotOpc, VT))
8594 return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8596 // Simplify, based on bits shifted out of N0/N1.
8597 if (SimplifyDemandedBits(SDValue(N, 0)))
8598 return SDValue(N, 0);
8603 SDValue DAGCombiner::visitABS(SDNode *N) {
8604 SDValue N0 = N->getOperand(0);
8605 EVT VT = N->getValueType(0);
8607 // fold (abs c1) -> c2
8608 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8609 return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8610 // fold (abs (abs x)) -> (abs x)
8611 if (N0.getOpcode() == ISD::ABS)
8613 // fold (abs x) -> x iff not-negative
8614 if (DAG.SignBitIsZero(N0))
8619 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8620 SDValue N0 = N->getOperand(0);
8621 EVT VT = N->getValueType(0);
8623 // fold (bswap c1) -> c2
8624 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8625 return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8626 // fold (bswap (bswap x)) -> x
8627 if (N0.getOpcode() == ISD::BSWAP)
8628 return N0->getOperand(0);
8632 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8633 SDValue N0 = N->getOperand(0);
8634 EVT VT = N->getValueType(0);
8636 // fold (bitreverse c1) -> c2
8637 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8638 return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8639 // fold (bitreverse (bitreverse x)) -> x
8640 if (N0.getOpcode() == ISD::BITREVERSE)
8641 return N0.getOperand(0);
8645 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8646 SDValue N0 = N->getOperand(0);
8647 EVT VT = N->getValueType(0);
8649 // fold (ctlz c1) -> c2
8650 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8651 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8653 // If the value is known never to be zero, switch to the undef version.
8654 if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8655 if (DAG.isKnownNeverZero(N0))
8656 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8662 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8663 SDValue N0 = N->getOperand(0);
8664 EVT VT = N->getValueType(0);
8666 // fold (ctlz_zero_undef c1) -> c2
8667 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8668 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8672 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8673 SDValue N0 = N->getOperand(0);
8674 EVT VT = N->getValueType(0);
8676 // fold (cttz c1) -> c2
8677 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8678 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8680 // If the value is known never to be zero, switch to the undef version.
8681 if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8682 if (DAG.isKnownNeverZero(N0))
8683 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8689 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8690 SDValue N0 = N->getOperand(0);
8691 EVT VT = N->getValueType(0);
8693 // fold (cttz_zero_undef c1) -> c2
8694 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8695 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8699 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8700 SDValue N0 = N->getOperand(0);
8701 EVT VT = N->getValueType(0);
8703 // fold (ctpop c1) -> c2
8704 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8705 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8709 // FIXME: This should be checking for no signed zeros on individual operands, as
8711 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8713 const TargetLowering &TLI) {
8714 const TargetOptions &Options = DAG.getTarget().Options;
8715 EVT VT = LHS.getValueType();
8717 return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8718 TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8719 DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8722 /// Generate Min/Max node
8723 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8724 SDValue RHS, SDValue True, SDValue False,
8725 ISD::CondCode CC, const TargetLowering &TLI,
8726 SelectionDAG &DAG) {
8727 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8730 EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8738 // Since it's known never nan to get here already, either fminnum or
8739 // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8740 // expanded in terms of it.
8741 unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8742 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8743 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8745 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8746 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8747 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8756 unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8757 if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8758 return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8760 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8761 if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8762 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8770 /// If a (v)select has a condition value that is a sign-bit test, try to smear
8771 /// the condition operand sign-bit across the value width and use it as a mask.
8772 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8773 SDValue Cond = N->getOperand(0);
8774 SDValue C1 = N->getOperand(1);
8775 SDValue C2 = N->getOperand(2);
8776 assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
8777 "Expected select-of-constants");
8779 EVT VT = N->getValueType(0);
8780 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8781 VT != Cond.getOperand(0).getValueType())
8784 // The inverted-condition + commuted-select variants of these patterns are
8785 // canonicalized to these forms in IR.
8786 SDValue X = Cond.getOperand(0);
8787 SDValue CondC = Cond.getOperand(1);
8788 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8789 if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8790 isAllOnesOrAllOnesSplat(C2)) {
8791 // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8793 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8794 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8795 return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8797 if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8798 // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8800 SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8801 SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8802 return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8807 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8808 SDValue Cond = N->getOperand(0);
8809 SDValue N1 = N->getOperand(1);
8810 SDValue N2 = N->getOperand(2);
8811 EVT VT = N->getValueType(0);
8812 EVT CondVT = Cond.getValueType();
8815 if (!VT.isInteger())
8818 auto *C1 = dyn_cast<ConstantSDNode>(N1);
8819 auto *C2 = dyn_cast<ConstantSDNode>(N2);
8823 // Only do this before legalization to avoid conflicting with target-specific
8824 // transforms in the other direction (create a select from a zext/sext). There
8825 // is also a target-independent combine here in DAGCombiner in the other
8826 // direction for (select Cond, -1, 0) when the condition is not i1.
8827 if (CondVT == MVT::i1 && !LegalOperations) {
8828 if (C1->isNullValue() && C2->isOne()) {
8829 // select Cond, 0, 1 --> zext (!Cond)
8830 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8832 NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8835 if (C1->isNullValue() && C2->isAllOnesValue()) {
8836 // select Cond, 0, -1 --> sext (!Cond)
8837 SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8839 NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8842 if (C1->isOne() && C2->isNullValue()) {
8843 // select Cond, 1, 0 --> zext (Cond)
8845 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8848 if (C1->isAllOnesValue() && C2->isNullValue()) {
8849 // select Cond, -1, 0 --> sext (Cond)
8851 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8855 // Use a target hook because some targets may prefer to transform in the
8857 if (TLI.convertSelectOfConstantsToMath(VT)) {
8858 // For any constants that differ by 1, we can transform the select into an
8860 const APInt &C1Val = C1->getAPIntValue();
8861 const APInt &C2Val = C2->getAPIntValue();
8862 if (C1Val - 1 == C2Val) {
8863 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8865 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8866 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8868 if (C1Val + 1 == C2Val) {
8869 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8871 Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8872 return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8875 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
8876 if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
8878 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8879 SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
8880 return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
8883 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8890 // fold (select Cond, 0, 1) -> (xor Cond, 1)
8891 // We can't do this reliably if integer based booleans have different contents
8892 // to floating point based booleans. This is because we can't tell whether we
8893 // have an integer-based boolean or a floating-point-based boolean unless we
8894 // can find the SETCC that produced it and inspect its operands. This is
8895 // fairly easy if C is the SETCC node, but it can potentially be
8896 // undiscoverable (or not reasonably discoverable). For example, it could be
8897 // in another basic block or it could require searching a complicated
8899 if (CondVT.isInteger() &&
8900 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8901 TargetLowering::ZeroOrOneBooleanContent &&
8902 TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8903 TargetLowering::ZeroOrOneBooleanContent &&
8904 C1->isNullValue() && C2->isOne()) {
8906 DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8907 if (VT.bitsEq(CondVT))
8909 return DAG.getZExtOrTrunc(NotCond, DL, VT);
8915 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8916 SDValue N0 = N->getOperand(0);
8917 SDValue N1 = N->getOperand(1);
8918 SDValue N2 = N->getOperand(2);
8919 EVT VT = N->getValueType(0);
8920 EVT VT0 = N0.getValueType();
8922 SDNodeFlags Flags = N->getFlags();
8924 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8927 // fold (select X, X, Y) -> (or X, Y)
8928 // fold (select X, 1, Y) -> (or C, Y)
8929 if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8930 return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8932 if (SDValue V = foldSelectOfConstants(N))
8935 // fold (select C, 0, X) -> (and (not C), X)
8936 if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8937 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8938 AddToWorklist(NOTNode.getNode());
8939 return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8941 // fold (select C, X, 1) -> (or (not C), X)
8942 if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8943 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8944 AddToWorklist(NOTNode.getNode());
8945 return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8947 // fold (select X, Y, X) -> (and X, Y)
8948 // fold (select X, Y, 0) -> (and X, Y)
8949 if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8950 return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8952 // If we can fold this based on the true/false value, do so.
8953 if (SimplifySelectOps(N, N1, N2))
8954 return SDValue(N, 0); // Don't revisit N.
8956 if (VT0 == MVT::i1) {
8957 // The code in this block deals with the following 2 equivalences:
8958 // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8959 // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8960 // The target can specify its preferred form with the
8961 // shouldNormalizeToSelectSequence() callback. However we always transform
8962 // to the right anyway if we find the inner select exists in the DAG anyway
8963 // and we always transform to the left side if we know that we can further
8964 // optimize the combination of the conditions.
8965 bool normalizeToSequence =
8966 TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8967 // select (and Cond0, Cond1), X, Y
8968 // -> select Cond0, (select Cond1, X, Y), Y
8969 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8970 SDValue Cond0 = N0->getOperand(0);
8971 SDValue Cond1 = N0->getOperand(1);
8972 SDValue InnerSelect =
8973 DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8974 if (normalizeToSequence || !InnerSelect.use_empty())
8975 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8976 InnerSelect, N2, Flags);
8977 // Cleanup on failure.
8978 if (InnerSelect.use_empty())
8979 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8981 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8982 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8983 SDValue Cond0 = N0->getOperand(0);
8984 SDValue Cond1 = N0->getOperand(1);
8985 SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8986 Cond1, N1, N2, Flags);
8987 if (normalizeToSequence || !InnerSelect.use_empty())
8988 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8989 InnerSelect, Flags);
8990 // Cleanup on failure.
8991 if (InnerSelect.use_empty())
8992 recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8995 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8996 if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8997 SDValue N1_0 = N1->getOperand(0);
8998 SDValue N1_1 = N1->getOperand(1);
8999 SDValue N1_2 = N1->getOperand(2);
9000 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9001 // Create the actual and node if we can generate good code for it.
9002 if (!normalizeToSequence) {
9003 SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9004 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9007 // Otherwise see if we can optimize the "and" to a better pattern.
9008 if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9009 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9014 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9015 if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9016 SDValue N2_0 = N2->getOperand(0);
9017 SDValue N2_1 = N2->getOperand(1);
9018 SDValue N2_2 = N2->getOperand(2);
9019 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9020 // Create the actual or node if we can generate good code for it.
9021 if (!normalizeToSequence) {
9022 SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9023 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9026 // Otherwise see if we can optimize to a better pattern.
9027 if (SDValue Combined = visitORLike(N0, N2_0, N))
9028 return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9034 // select (not Cond), N1, N2 -> select Cond, N2, N1
9035 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9036 SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9037 SelectOp->setFlags(Flags);
9041 // Fold selects based on a setcc into other things, such as min/max/abs.
9042 if (N0.getOpcode() == ISD::SETCC) {
9043 SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9044 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9046 // select (fcmp lt x, y), x, y -> fminnum x, y
9047 // select (fcmp gt x, y), x, y -> fmaxnum x, y
9049 // This is OK if we don't care what happens if either operand is a NaN.
9050 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9051 if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9055 // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9056 // This is conservatively limited to pre-legal-operations to give targets
9057 // a chance to reverse the transform if they want to do that. Also, it is
9058 // unlikely that the pattern would be formed late, so it's probably not
9059 // worth going through the other checks.
9060 if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9061 CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9062 N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9063 auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9064 auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9065 if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9066 // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9067 // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9069 // The IR equivalent of this transform would have this form:
9071 // %c = icmp ugt %x, ~C
9072 // %r = select %c, -1, %a
9074 // %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9075 // %u0 = extractvalue %u, 0
9076 // %u1 = extractvalue %u, 1
9077 // %r = select %u1, -1, %u0
9078 SDVTList VTs = DAG.getVTList(VT, VT0);
9079 SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9080 return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9084 if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9085 (!LegalOperations &&
9086 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9087 // Any flags available in a select/setcc fold will be on the setcc as they
9088 // migrated from fcmp
9089 Flags = N0.getNode()->getFlags();
9090 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9091 N2, N0.getOperand(2));
9092 SelectNode->setFlags(Flags);
9096 return SimplifySelect(DL, N0, N1, N2);
9102 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9103 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9104 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9106 SDValue Cond = N->getOperand(0);
9107 SDValue LHS = N->getOperand(1);
9108 SDValue RHS = N->getOperand(2);
9109 EVT VT = N->getValueType(0);
9110 int NumElems = VT.getVectorNumElements();
9111 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9112 RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9113 Cond.getOpcode() == ISD::BUILD_VECTOR);
9115 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9116 // binary ones here.
9117 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9120 // We're sure we have an even number of elements due to the
9121 // concat_vectors we have as arguments to vselect.
9122 // Skip BV elements until we find one that's not an UNDEF
9123 // After we find an UNDEF element, keep looping until we get to half the
9124 // length of the BV and see if all the non-undef nodes are the same.
9125 ConstantSDNode *BottomHalf = nullptr;
9126 for (int i = 0; i < NumElems / 2; ++i) {
9127 if (Cond->getOperand(i)->isUndef())
9130 if (BottomHalf == nullptr)
9131 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9132 else if (Cond->getOperand(i).getNode() != BottomHalf)
9136 // Do the same for the second half of the BuildVector
9137 ConstantSDNode *TopHalf = nullptr;
9138 for (int i = NumElems / 2; i < NumElems; ++i) {
9139 if (Cond->getOperand(i)->isUndef())
9142 if (TopHalf == nullptr)
9143 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9144 else if (Cond->getOperand(i).getNode() != TopHalf)
9148 assert(TopHalf && BottomHalf &&
9149 "One half of the selector was all UNDEFs and the other was all the "
9150 "same value. This should have been addressed before this function.");
9152 ISD::CONCAT_VECTORS, DL, VT,
9153 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9154 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9157 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9158 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9159 SDValue Mask = MSC->getMask();
9160 SDValue Chain = MSC->getChain();
9163 // Zap scatters with a zero mask.
9164 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9170 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9171 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9172 SDValue Mask = MST->getMask();
9173 SDValue Chain = MST->getChain();
9176 // Zap masked stores with a zero mask.
9177 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9180 // Try transforming N to an indexed store.
9181 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9182 return SDValue(N, 0);
9187 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9188 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9189 SDValue Mask = MGT->getMask();
9192 // Zap gathers with a zero mask.
9193 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9194 return CombineTo(N, MGT->getPassThru(), MGT->getChain());
9199 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9200 MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9201 SDValue Mask = MLD->getMask();
9204 // Zap masked loads with a zero mask.
9205 if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9206 return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9208 // Try transforming N to an indexed load.
9209 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9210 return SDValue(N, 0);
9215 /// A vector select of 2 constant vectors can be simplified to math/logic to
9216 /// avoid a variable select instruction and possibly avoid constant loads.
9217 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9218 SDValue Cond = N->getOperand(0);
9219 SDValue N1 = N->getOperand(1);
9220 SDValue N2 = N->getOperand(2);
9221 EVT VT = N->getValueType(0);
9222 if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9223 !TLI.convertSelectOfConstantsToMath(VT) ||
9224 !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9225 !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9228 // Check if we can use the condition value to increment/decrement a single
9229 // constant value. This simplifies a select to an add and removes a constant
9230 // load/materialization from the general case.
9231 bool AllAddOne = true;
9232 bool AllSubOne = true;
9233 unsigned Elts = VT.getVectorNumElements();
9234 for (unsigned i = 0; i != Elts; ++i) {
9235 SDValue N1Elt = N1.getOperand(i);
9236 SDValue N2Elt = N2.getOperand(i);
9237 if (N1Elt.isUndef() || N2Elt.isUndef())
9239 if (N1Elt.getValueType() != N2Elt.getValueType())
9242 const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9243 const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9250 // Further simplifications for the extra-special cases where the constants are
9251 // all 0 or all -1 should be implemented as folds of these patterns.
9253 if (AllAddOne || AllSubOne) {
9254 // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9255 // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9256 auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9257 SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9258 return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9261 // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9263 if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9264 isNullOrNullSplat(N2)) {
9265 SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9266 SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9267 return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9270 if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9273 // The general case for select-of-constants:
9274 // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9275 // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9276 // leave that to a machine-specific pass.
9280 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9281 SDValue N0 = N->getOperand(0);
9282 SDValue N1 = N->getOperand(1);
9283 SDValue N2 = N->getOperand(2);
9284 EVT VT = N->getValueType(0);
9287 if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9290 // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9291 if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9292 return DAG.getSelect(DL, VT, F, N2, N1);
9294 // Canonicalize integer abs.
9295 // vselect (setg[te] X, 0), X, -X ->
9296 // vselect (setgt X, -1), X, -X ->
9297 // vselect (setl[te] X, 0), -X, X ->
9298 // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9299 if (N0.getOpcode() == ISD::SETCC) {
9300 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9301 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9303 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9305 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9306 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9307 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9308 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9309 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9310 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9311 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9314 if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9315 return DAG.getNode(ISD::ABS, DL, VT, LHS);
9317 SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9318 DAG.getConstant(VT.getScalarSizeInBits() - 1,
9319 DL, getShiftAmountTy(VT)));
9320 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9321 AddToWorklist(Shift.getNode());
9322 AddToWorklist(Add.getNode());
9323 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9326 // vselect x, y (fcmp lt x, y) -> fminnum x, y
9327 // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9329 // This is OK if we don't care about what happens if either operand is a
9332 if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9333 if (SDValue FMinMax =
9334 combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9338 // If this select has a condition (setcc) with narrower operands than the
9339 // select, try to widen the compare to match the select width.
9340 // TODO: This should be extended to handle any constant.
9341 // TODO: This could be extended to handle non-loading patterns, but that
9342 // requires thorough testing to avoid regressions.
9343 if (isNullOrNullSplat(RHS)) {
9344 EVT NarrowVT = LHS.getValueType();
9345 EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9346 EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9347 unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9348 unsigned WideWidth = WideVT.getScalarSizeInBits();
9349 bool IsSigned = isSignedIntSetCC(CC);
9350 auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9351 if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9352 SetCCWidth != 1 && SetCCWidth < WideWidth &&
9353 TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9354 TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9355 // Both compare operands can be widened for free. The LHS can use an
9356 // extended load, and the RHS is a constant:
9357 // vselect (ext (setcc load(X), C)), N1, N2 -->
9358 // vselect (setcc extload(X), C'), N1, N2
9359 auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9360 SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9361 SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9362 EVT WideSetCCVT = getSetCCResultType(WideVT);
9363 SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9364 return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9369 if (SimplifySelectOps(N, N1, N2))
9370 return SDValue(N, 0); // Don't revisit N.
9372 // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9373 if (ISD::isBuildVectorAllOnes(N0.getNode()))
9375 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9376 if (ISD::isBuildVectorAllZeros(N0.getNode()))
9379 // The ConvertSelectToConcatVector function is assuming both the above
9380 // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9382 if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9383 N2.getOpcode() == ISD::CONCAT_VECTORS &&
9384 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9385 if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9389 if (SDValue V = foldVSelectOfConstants(N))
9395 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
9396 SDValue N0 = N->getOperand(0);
9397 SDValue N1 = N->getOperand(1);
9398 SDValue N2 = N->getOperand(2);
9399 SDValue N3 = N->getOperand(3);
9400 SDValue N4 = N->getOperand(4);
9401 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9403 // fold select_cc lhs, rhs, x, x, cc -> x
9407 // Determine if the condition we're dealing with is constant
9408 if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9409 CC, SDLoc(N), false)) {
9410 AddToWorklist(SCC.getNode());
9412 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9413 if (!SCCC->isNullValue())
9414 return N2; // cond always true -> true val
9416 return N3; // cond always false -> false val
9417 } else if (SCC->isUndef()) {
9418 // When the condition is UNDEF, just return the first operand. This is
9419 // coherent the DAG creation, no setcc node is created in this case
9421 } else if (SCC.getOpcode() == ISD::SETCC) {
9422 // Fold to a simpler select_cc
9423 SDValue SelectOp = DAG.getNode(
9424 ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9425 SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9426 SelectOp->setFlags(SCC->getFlags());
9431 // If we can fold this based on the true/false value, do so.
9432 if (SimplifySelectOps(N, N2, N3))
9433 return SDValue(N, 0); // Don't revisit N.
9435 // fold select_cc into other things, such as min/max/abs
9436 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9439 SDValue DAGCombiner::visitSETCC(SDNode *N) {
9440 // setcc is very commonly used as an argument to brcond. This pattern
9441 // also lend itself to numerous combines and, as a result, it is desired
9442 // we keep the argument to a brcond as a setcc as much as possible.
9444 N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9446 SDValue Combined = SimplifySetCC(
9447 N->getValueType(0), N->getOperand(0), N->getOperand(1),
9448 cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9453 // If we prefer to have a setcc, and we don't, we'll try our best to
9454 // recreate one using rebuildSetCC.
9455 if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9456 SDValue NewSetCC = rebuildSetCC(Combined);
9458 // We don't have anything interesting to combine to.
9459 if (NewSetCC.getNode() == N)
9469 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9470 SDValue LHS = N->getOperand(0);
9471 SDValue RHS = N->getOperand(1);
9472 SDValue Carry = N->getOperand(2);
9473 SDValue Cond = N->getOperand(3);
9475 // If Carry is false, fold to a regular SETCC.
9476 if (isNullConstant(Carry))
9477 return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9482 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9483 /// a build_vector of constants.
9484 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9485 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9486 /// Vector extends are not folded if operations are legal; this is to
9487 /// avoid introducing illegal build_vector dag nodes.
9488 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9489 SelectionDAG &DAG, bool LegalTypes) {
9490 unsigned Opcode = N->getOpcode();
9491 SDValue N0 = N->getOperand(0);
9492 EVT VT = N->getValueType(0);
9495 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9496 Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9497 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9498 && "Expected EXTEND dag node in input!");
9500 // fold (sext c1) -> c1
9501 // fold (zext c1) -> c1
9502 // fold (aext c1) -> c1
9503 if (isa<ConstantSDNode>(N0))
9504 return DAG.getNode(Opcode, DL, VT, N0);
9506 // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9507 // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9508 // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9509 if (N0->getOpcode() == ISD::SELECT) {
9510 SDValue Op1 = N0->getOperand(1);
9511 SDValue Op2 = N0->getOperand(2);
9512 if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9513 (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9514 // For any_extend, choose sign extension of the constants to allow a
9515 // possible further transform to sign_extend_inreg.i.e.
9517 // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9518 // t2: i64 = any_extend t1
9520 // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9522 // t4: i64 = sign_extend_inreg t3
9523 unsigned FoldOpc = Opcode;
9524 if (FoldOpc == ISD::ANY_EXTEND)
9525 FoldOpc = ISD::SIGN_EXTEND;
9526 return DAG.getSelect(DL, VT, N0->getOperand(0),
9527 DAG.getNode(FoldOpc, DL, VT, Op1),
9528 DAG.getNode(FoldOpc, DL, VT, Op2));
9532 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9533 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9534 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9535 EVT SVT = VT.getScalarType();
9536 if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9537 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9540 // We can fold this node into a build_vector.
9541 unsigned VTBits = SVT.getSizeInBits();
9542 unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9543 SmallVector<SDValue, 8> Elts;
9544 unsigned NumElts = VT.getVectorNumElements();
9546 // For zero-extensions, UNDEF elements still guarantee to have the upper
9547 // bits set to zero.
9549 Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9551 for (unsigned i = 0; i != NumElts; ++i) {
9552 SDValue Op = N0.getOperand(i);
9554 Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9559 // Get the constant value and if needed trunc it to the size of the type.
9560 // Nodes like build_vector might have constants wider than the scalar type.
9561 APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9562 if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9563 Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9565 Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9568 return DAG.getBuildVector(VT, DL, Elts);
9571 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9572 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9573 // transformation. Returns true if extension are possible and the above
9574 // mentioned transformation is profitable.
9575 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9577 SmallVectorImpl<SDNode *> &ExtendNodes,
9578 const TargetLowering &TLI) {
9579 bool HasCopyToRegUses = false;
9580 bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9581 for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9582 UE = N0.getNode()->use_end();
9587 if (UI.getUse().getResNo() != N0.getResNo())
9589 // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9590 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9591 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9592 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9593 // Sign bits will be lost after a zext.
9596 for (unsigned i = 0; i != 2; ++i) {
9597 SDValue UseOp = User->getOperand(i);
9600 if (!isa<ConstantSDNode>(UseOp))
9605 ExtendNodes.push_back(User);
9608 // If truncates aren't free and there are users we can't
9609 // extend, it isn't worthwhile.
9612 // Remember if this value is live-out.
9613 if (User->getOpcode() == ISD::CopyToReg)
9614 HasCopyToRegUses = true;
9617 if (HasCopyToRegUses) {
9618 bool BothLiveOut = false;
9619 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9621 SDUse &Use = UI.getUse();
9622 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9628 // Both unextended and extended values are live out. There had better be
9629 // a good reason for the transformation.
9630 return ExtendNodes.size();
9635 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9636 SDValue OrigLoad, SDValue ExtLoad,
9637 ISD::NodeType ExtType) {
9638 // Extend SetCC uses if necessary.
9640 for (SDNode *SetCC : SetCCs) {
9641 SmallVector<SDValue, 4> Ops;
9643 for (unsigned j = 0; j != 2; ++j) {
9644 SDValue SOp = SetCC->getOperand(j);
9645 if (SOp == OrigLoad)
9646 Ops.push_back(ExtLoad);
9648 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9651 Ops.push_back(SetCC->getOperand(2));
9652 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9656 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9657 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9658 SDValue N0 = N->getOperand(0);
9659 EVT DstVT = N->getValueType(0);
9660 EVT SrcVT = N0.getValueType();
9662 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9663 N->getOpcode() == ISD::ZERO_EXTEND) &&
9664 "Unexpected node type (not an extend)!");
9666 // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9667 // For example, on a target with legal v4i32, but illegal v8i32, turn:
9668 // (v8i32 (sext (v8i16 (load x))))
9670 // (v8i32 (concat_vectors (v4i32 (sextload x)),
9671 // (v4i32 (sextload (x + 16)))))
9672 // Where uses of the original load, i.e.:
9674 // are replaced with:
9676 // (v8i32 (concat_vectors (v4i32 (sextload x)),
9677 // (v4i32 (sextload (x + 16)))))))
9679 // This combine is only applicable to illegal, but splittable, vectors.
9680 // All legal types, and illegal non-vector types, are handled elsewhere.
9681 // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9683 if (N0->getOpcode() != ISD::LOAD)
9686 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9688 if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9689 !N0.hasOneUse() || !LN0->isSimple() ||
9690 !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9691 !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9694 SmallVector<SDNode *, 4> SetCCs;
9695 if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9698 ISD::LoadExtType ExtType =
9699 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9701 // Try to split the vector types to get down to legal types.
9702 EVT SplitSrcVT = SrcVT;
9703 EVT SplitDstVT = DstVT;
9704 while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9705 SplitSrcVT.getVectorNumElements() > 1) {
9706 SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9707 SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9710 if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9713 assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
9716 const unsigned NumSplits =
9717 DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9718 const unsigned Stride = SplitSrcVT.getStoreSize();
9719 SmallVector<SDValue, 4> Loads;
9720 SmallVector<SDValue, 4> Chains;
9722 SDValue BasePtr = LN0->getBasePtr();
9723 for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9724 const unsigned Offset = Idx * Stride;
9725 const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9727 SDValue SplitLoad = DAG.getExtLoad(
9728 ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9729 LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9730 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9732 BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
9734 Loads.push_back(SplitLoad.getValue(0));
9735 Chains.push_back(SplitLoad.getValue(1));
9738 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9739 SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9742 AddToWorklist(NewChain.getNode());
9744 CombineTo(N, NewValue);
9746 // Replace uses of the original load (before extension)
9747 // with a truncate of the concatenated sextloaded vectors.
9749 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9750 ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9751 CombineTo(N0.getNode(), Trunc, NewChain);
9752 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9755 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9756 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9757 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9758 assert(N->getOpcode() == ISD::ZERO_EXTEND);
9759 EVT VT = N->getValueType(0);
9760 EVT OrigVT = N->getOperand(0).getValueType();
9761 if (TLI.isZExtFree(OrigVT, VT))
9765 SDValue N0 = N->getOperand(0);
9766 if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9767 N0.getOpcode() == ISD::XOR) ||
9768 N0.getOperand(1).getOpcode() != ISD::Constant ||
9769 (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9773 SDValue N1 = N0->getOperand(0);
9774 if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9775 N1.getOperand(1).getOpcode() != ISD::Constant ||
9776 (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9780 if (!isa<LoadSDNode>(N1.getOperand(0)))
9782 LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9783 EVT MemVT = Load->getMemoryVT();
9784 if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9785 Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9789 // If the shift op is SHL, the logic op must be AND, otherwise the result
9791 if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9794 if (!N0.hasOneUse() || !N1.hasOneUse())
9797 SmallVector<SDNode*, 4> SetCCs;
9798 if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9799 ISD::ZERO_EXTEND, SetCCs, TLI))
9802 // Actually do the transformation.
9803 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9804 Load->getChain(), Load->getBasePtr(),
9805 Load->getMemoryVT(), Load->getMemOperand());
9808 SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9811 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
9813 SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9814 DAG.getConstant(Mask, DL0, VT));
9816 ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9818 if (SDValue(Load, 0).hasOneUse()) {
9819 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9821 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9822 Load->getValueType(0), ExtLoad);
9823 CombineTo(Load, Trunc, ExtLoad.getValue(1));
9826 // N0 is dead at this point.
9827 recursivelyDeleteUnusedNodes(N0.getNode());
9829 return SDValue(N,0); // Return N so it doesn't get rechecked!
9832 /// If we're narrowing or widening the result of a vector select and the final
9833 /// size is the same size as a setcc (compare) feeding the select, then try to
9834 /// apply the cast operation to the select's operands because matching vector
9835 /// sizes for a select condition and other operands should be more efficient.
9836 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9837 unsigned CastOpcode = Cast->getOpcode();
9838 assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9839 CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9840 CastOpcode == ISD::FP_ROUND) &&
9841 "Unexpected opcode for vector select narrowing/widening");
9843 // We only do this transform before legal ops because the pattern may be
9844 // obfuscated by target-specific operations after legalization. Do not create
9845 // an illegal select op, however, because that may be difficult to lower.
9846 EVT VT = Cast->getValueType(0);
9847 if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9850 SDValue VSel = Cast->getOperand(0);
9851 if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9852 VSel.getOperand(0).getOpcode() != ISD::SETCC)
9855 // Does the setcc have the same vector size as the casted select?
9856 SDValue SetCC = VSel.getOperand(0);
9857 EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9858 if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9861 // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9862 SDValue A = VSel.getOperand(1);
9863 SDValue B = VSel.getOperand(2);
9864 SDValue CastA, CastB;
9866 if (CastOpcode == ISD::FP_ROUND) {
9867 // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9868 CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9869 CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9871 CastA = DAG.getNode(CastOpcode, DL, VT, A);
9872 CastB = DAG.getNode(CastOpcode, DL, VT, B);
9874 return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9877 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9878 // fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9879 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9880 const TargetLowering &TLI, EVT VT,
9881 bool LegalOperations, SDNode *N,
9882 SDValue N0, ISD::LoadExtType ExtLoadType) {
9883 SDNode *N0Node = N0.getNode();
9884 bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9885 : ISD::isZEXTLoad(N0Node);
9886 if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9887 !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9890 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9891 EVT MemVT = LN0->getMemoryVT();
9892 if ((LegalOperations || !LN0->isSimple() ||
9894 !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9898 DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9899 LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9900 Combiner.CombineTo(N, ExtLoad);
9901 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9902 if (LN0->use_empty())
9903 Combiner.recursivelyDeleteUnusedNodes(LN0);
9904 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9907 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9908 // Only generate vector extloads when 1) they're legal, and 2) they are
9909 // deemed desirable by the target.
9910 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9911 const TargetLowering &TLI, EVT VT,
9912 bool LegalOperations, SDNode *N, SDValue N0,
9913 ISD::LoadExtType ExtLoadType,
9914 ISD::NodeType ExtOpc) {
9915 if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9916 !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9917 ((LegalOperations || VT.isVector() ||
9918 !cast<LoadSDNode>(N0)->isSimple()) &&
9919 !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9922 bool DoXform = true;
9923 SmallVector<SDNode *, 4> SetCCs;
9924 if (!N0.hasOneUse())
9925 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9927 DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9931 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9932 SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9933 LN0->getBasePtr(), N0.getValueType(),
9934 LN0->getMemOperand());
9935 Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9936 // If the load value is used only by N, replace it via CombineTo N.
9937 bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9938 Combiner.CombineTo(N, ExtLoad);
9939 if (NoReplaceTrunc) {
9940 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9941 Combiner.recursivelyDeleteUnusedNodes(LN0);
9944 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9945 Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9947 return SDValue(N, 0); // Return N so it doesn't get rechecked!
9950 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
9951 const TargetLowering &TLI, EVT VT,
9952 SDNode *N, SDValue N0,
9953 ISD::LoadExtType ExtLoadType,
9954 ISD::NodeType ExtOpc) {
9955 if (!N0.hasOneUse())
9958 MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
9959 if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
9962 if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
9965 if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9969 SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
9970 SDValue NewLoad = DAG.getMaskedLoad(
9971 VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
9972 PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
9973 ExtLoadType, Ld->isExpandingLoad());
9974 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
9978 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9979 bool LegalOperations) {
9980 assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9981 N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9983 SDValue SetCC = N->getOperand(0);
9984 if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9985 !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9988 SDValue X = SetCC.getOperand(0);
9989 SDValue Ones = SetCC.getOperand(1);
9990 ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9991 EVT VT = N->getValueType(0);
9992 EVT XVT = X.getValueType();
9993 // setge X, C is canonicalized to setgt, so we do not need to match that
9994 // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9995 // not require the 'not' op.
9996 if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9997 // Invert and smear/shift the sign bit:
9998 // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9999 // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10001 unsigned ShCt = VT.getSizeInBits() - 1;
10002 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10003 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10004 SDValue NotX = DAG.getNOT(DL, X, VT);
10005 SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10007 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10008 return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10014 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10015 SDValue N0 = N->getOperand(0);
10016 EVT VT = N->getValueType(0);
10019 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10022 // fold (sext (sext x)) -> (sext x)
10023 // fold (sext (aext x)) -> (sext x)
10024 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10025 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10027 if (N0.getOpcode() == ISD::TRUNCATE) {
10028 // fold (sext (truncate (load x))) -> (sext (smaller load x))
10029 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10030 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10031 SDNode *oye = N0.getOperand(0).getNode();
10032 if (NarrowLoad.getNode() != N0.getNode()) {
10033 CombineTo(N0.getNode(), NarrowLoad);
10034 // CombineTo deleted the truncate, if needed, but not what's under it.
10035 AddToWorklist(oye);
10037 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10040 // See if the value being truncated is already sign extended. If so, just
10041 // eliminate the trunc/sext pair.
10042 SDValue Op = N0.getOperand(0);
10043 unsigned OpBits = Op.getScalarValueSizeInBits();
10044 unsigned MidBits = N0.getScalarValueSizeInBits();
10045 unsigned DestBits = VT.getScalarSizeInBits();
10046 unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10048 if (OpBits == DestBits) {
10049 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
10050 // bits, it is already ready.
10051 if (NumSignBits > DestBits-MidBits)
10053 } else if (OpBits < DestBits) {
10054 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
10055 // bits, just sext from i32.
10056 if (NumSignBits > OpBits-MidBits)
10057 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10059 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
10060 // bits, just truncate to i32.
10061 if (NumSignBits > OpBits-MidBits)
10062 return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10065 // fold (sext (truncate x)) -> (sextinreg x).
10066 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10067 N0.getValueType())) {
10068 if (OpBits < DestBits)
10069 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10070 else if (OpBits > DestBits)
10071 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10072 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10073 DAG.getValueType(N0.getValueType()));
10077 // Try to simplify (sext (load x)).
10078 if (SDValue foldedExt =
10079 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10080 ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10083 if (SDValue foldedExt =
10084 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10088 // fold (sext (load x)) to multiple smaller sextloads.
10089 // Only on illegal but splittable vectors.
10090 if (SDValue ExtLoad = CombineExtLoad(N))
10093 // Try to simplify (sext (sextload x)).
10094 if (SDValue foldedExt = tryToFoldExtOfExtload(
10095 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10098 // fold (sext (and/or/xor (load x), cst)) ->
10099 // (and/or/xor (sextload x), (sext cst))
10100 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10101 N0.getOpcode() == ISD::XOR) &&
10102 isa<LoadSDNode>(N0.getOperand(0)) &&
10103 N0.getOperand(1).getOpcode() == ISD::Constant &&
10104 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10105 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10106 EVT MemVT = LN00->getMemoryVT();
10107 if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10108 LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10109 SmallVector<SDNode*, 4> SetCCs;
10110 bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10111 ISD::SIGN_EXTEND, SetCCs, TLI);
10113 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10114 LN00->getChain(), LN00->getBasePtr(),
10115 LN00->getMemoryVT(),
10116 LN00->getMemOperand());
10117 APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10118 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10119 ExtLoad, DAG.getConstant(Mask, DL, VT));
10120 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10121 bool NoReplaceTruncAnd = !N0.hasOneUse();
10122 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10124 // If N0 has multiple uses, change other uses as well.
10125 if (NoReplaceTruncAnd) {
10127 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10128 CombineTo(N0.getNode(), TruncAnd);
10130 if (NoReplaceTrunc) {
10131 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10133 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10134 LN00->getValueType(0), ExtLoad);
10135 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10137 return SDValue(N,0); // Return N so it doesn't get rechecked!
10142 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10145 if (N0.getOpcode() == ISD::SETCC) {
10146 SDValue N00 = N0.getOperand(0);
10147 SDValue N01 = N0.getOperand(1);
10148 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10149 EVT N00VT = N0.getOperand(0).getValueType();
10151 // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10152 // Only do this before legalize for now.
10153 if (VT.isVector() && !LegalOperations &&
10154 TLI.getBooleanContents(N00VT) ==
10155 TargetLowering::ZeroOrNegativeOneBooleanContent) {
10156 // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10157 // of the same size as the compared operands. Only optimize sext(setcc())
10158 // if this is the case.
10159 EVT SVT = getSetCCResultType(N00VT);
10161 // If we already have the desired type, don't change it.
10162 if (SVT != N0.getValueType()) {
10163 // We know that the # elements of the results is the same as the
10164 // # elements of the compare (and the # elements of the compare result
10165 // for that matter). Check to see that they are the same size. If so,
10166 // we know that the element size of the sext'd result matches the
10167 // element size of the compare operands.
10168 if (VT.getSizeInBits() == SVT.getSizeInBits())
10169 return DAG.getSetCC(DL, VT, N00, N01, CC);
10171 // If the desired elements are smaller or larger than the source
10172 // elements, we can use a matching integer vector type and then
10173 // truncate/sign extend.
10174 EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10175 if (SVT == MatchingVecType) {
10176 SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10177 return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10182 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10183 // Here, T can be 1 or -1, depending on the type of the setcc and
10184 // getBooleanContents().
10185 unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10187 // To determine the "true" side of the select, we need to know the high bit
10188 // of the value returned by the setcc if it evaluates to true.
10189 // If the type of the setcc is i1, then the true case of the select is just
10190 // sext(i1 1), that is, -1.
10191 // If the type of the setcc is larger (say, i8) then the value of the high
10192 // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10193 // of the appropriate width.
10194 SDValue ExtTrueVal = (SetCCWidth == 1)
10195 ? DAG.getAllOnesConstant(DL, VT)
10196 : DAG.getBoolConstant(true, DL, VT, N00VT);
10197 SDValue Zero = DAG.getConstant(0, DL, VT);
10199 SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10202 if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10203 EVT SetCCVT = getSetCCResultType(N00VT);
10204 // Don't do this transform for i1 because there's a select transform
10205 // that would reverse it.
10206 // TODO: We should not do this transform at all without a target hook
10207 // because a sext is likely cheaper than a select?
10208 if (SetCCVT.getScalarSizeInBits() != 1 &&
10209 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10210 SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10211 return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10216 // fold (sext x) -> (zext x) if the sign bit is known zero.
10217 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10218 DAG.SignBitIsZero(N0))
10219 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10221 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10224 // Eliminate this sign extend by doing a negation in the destination type:
10225 // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10226 if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10227 isNullOrNullSplat(N0.getOperand(0)) &&
10228 N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10229 TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10230 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10231 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10233 // Eliminate this sign extend by doing a decrement in the destination type:
10234 // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10235 if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10236 isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10237 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10238 TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10239 SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10240 return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10246 // isTruncateOf - If N is a truncate of some other value, return true, record
10247 // the value being truncated in Op and which of Op's bits are zero/one in Known.
10248 // This function computes KnownBits to avoid a duplicated call to
10249 // computeKnownBits in the caller.
10250 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10251 KnownBits &Known) {
10252 if (N->getOpcode() == ISD::TRUNCATE) {
10253 Op = N->getOperand(0);
10254 Known = DAG.computeKnownBits(Op);
10258 if (N.getOpcode() != ISD::SETCC ||
10259 N.getValueType().getScalarType() != MVT::i1 ||
10260 cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10263 SDValue Op0 = N->getOperand(0);
10264 SDValue Op1 = N->getOperand(1);
10265 assert(Op0.getValueType() == Op1.getValueType());
10267 if (isNullOrNullSplat(Op0))
10269 else if (isNullOrNullSplat(Op1))
10274 Known = DAG.computeKnownBits(Op);
10276 return (Known.Zero | 1).isAllOnesValue();
10279 /// Given an extending node with a pop-count operand, if the target does not
10280 /// support a pop-count in the narrow source type but does support it in the
10281 /// destination type, widen the pop-count to the destination type.
10282 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10283 assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
10284 Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
10286 SDValue CtPop = Extend->getOperand(0);
10287 if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10290 EVT VT = Extend->getValueType(0);
10291 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10292 if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10293 !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
10296 // zext (ctpop X) --> ctpop (zext X)
10298 SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
10299 return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
10302 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
10303 SDValue N0 = N->getOperand(0);
10304 EVT VT = N->getValueType(0);
10306 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10309 // fold (zext (zext x)) -> (zext x)
10310 // fold (zext (aext x)) -> (zext x)
10311 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10312 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
10315 // fold (zext (truncate x)) -> (zext x) or
10316 // (zext (truncate x)) -> (truncate x)
10317 // This is valid when the truncated bits of x are already zero.
10320 if (isTruncateOf(DAG, N0, Op, Known)) {
10321 APInt TruncatedBits =
10322 (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
10323 APInt(Op.getScalarValueSizeInBits(), 0) :
10324 APInt::getBitsSet(Op.getScalarValueSizeInBits(),
10325 N0.getScalarValueSizeInBits(),
10326 std::min(Op.getScalarValueSizeInBits(),
10327 VT.getScalarSizeInBits()));
10328 if (TruncatedBits.isSubsetOf(Known.Zero))
10329 return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10332 // fold (zext (truncate x)) -> (and x, mask)
10333 if (N0.getOpcode() == ISD::TRUNCATE) {
10334 // fold (zext (truncate (load x))) -> (zext (smaller load x))
10335 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
10336 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10337 SDNode *oye = N0.getOperand(0).getNode();
10338 if (NarrowLoad.getNode() != N0.getNode()) {
10339 CombineTo(N0.getNode(), NarrowLoad);
10340 // CombineTo deleted the truncate, if needed, but not what's under it.
10341 AddToWorklist(oye);
10343 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10346 EVT SrcVT = N0.getOperand(0).getValueType();
10347 EVT MinVT = N0.getValueType();
10349 // Try to mask before the extension to avoid having to generate a larger mask,
10350 // possibly over several sub-vectors.
10351 if (SrcVT.bitsLT(VT) && VT.isVector()) {
10352 if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
10353 TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
10354 SDValue Op = N0.getOperand(0);
10355 Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10356 AddToWorklist(Op.getNode());
10357 SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10358 // Transfer the debug info; the new node is equivalent to N0.
10359 DAG.transferDbgValues(N0, ZExtOrTrunc);
10360 return ZExtOrTrunc;
10364 if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
10365 SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10366 AddToWorklist(Op.getNode());
10367 SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10368 // We may safely transfer the debug info describing the truncate node over
10369 // to the equivalent and operation.
10370 DAG.transferDbgValues(N0, And);
10375 // Fold (zext (and (trunc x), cst)) -> (and x, cst),
10376 // if either of the casts is not free.
10377 if (N0.getOpcode() == ISD::AND &&
10378 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10379 N0.getOperand(1).getOpcode() == ISD::Constant &&
10380 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10381 N0.getValueType()) ||
10382 !TLI.isZExtFree(N0.getValueType(), VT))) {
10383 SDValue X = N0.getOperand(0).getOperand(0);
10384 X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
10385 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10387 return DAG.getNode(ISD::AND, DL, VT,
10388 X, DAG.getConstant(Mask, DL, VT));
10391 // Try to simplify (zext (load x)).
10392 if (SDValue foldedExt =
10393 tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10394 ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10397 if (SDValue foldedExt =
10398 tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10402 // fold (zext (load x)) to multiple smaller zextloads.
10403 // Only on illegal but splittable vectors.
10404 if (SDValue ExtLoad = CombineExtLoad(N))
10407 // fold (zext (and/or/xor (load x), cst)) ->
10408 // (and/or/xor (zextload x), (zext cst))
10409 // Unless (and (load x) cst) will match as a zextload already and has
10410 // additional users.
10411 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10412 N0.getOpcode() == ISD::XOR) &&
10413 isa<LoadSDNode>(N0.getOperand(0)) &&
10414 N0.getOperand(1).getOpcode() == ISD::Constant &&
10415 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10416 LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10417 EVT MemVT = LN00->getMemoryVT();
10418 if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10419 LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10420 bool DoXform = true;
10421 SmallVector<SDNode*, 4> SetCCs;
10422 if (!N0.hasOneUse()) {
10423 if (N0.getOpcode() == ISD::AND) {
10424 auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10425 EVT LoadResultTy = AndC->getValueType(0);
10427 if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10432 DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10433 ISD::ZERO_EXTEND, SetCCs, TLI);
10435 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10436 LN00->getChain(), LN00->getBasePtr(),
10437 LN00->getMemoryVT(),
10438 LN00->getMemOperand());
10439 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10441 SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10442 ExtLoad, DAG.getConstant(Mask, DL, VT));
10443 ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10444 bool NoReplaceTruncAnd = !N0.hasOneUse();
10445 bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10447 // If N0 has multiple uses, change other uses as well.
10448 if (NoReplaceTruncAnd) {
10450 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10451 CombineTo(N0.getNode(), TruncAnd);
10453 if (NoReplaceTrunc) {
10454 DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10456 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10457 LN00->getValueType(0), ExtLoad);
10458 CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10460 return SDValue(N,0); // Return N so it doesn't get rechecked!
10465 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10466 // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10467 if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10470 // Try to simplify (zext (zextload x)).
10471 if (SDValue foldedExt = tryToFoldExtOfExtload(
10472 DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10475 if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10478 if (N0.getOpcode() == ISD::SETCC) {
10479 // Only do this before legalize for now.
10480 if (!LegalOperations && VT.isVector() &&
10481 N0.getValueType().getVectorElementType() == MVT::i1) {
10482 EVT N00VT = N0.getOperand(0).getValueType();
10483 if (getSetCCResultType(N00VT) == N0.getValueType())
10486 // We know that the # elements of the results is the same as the #
10487 // elements of the compare (and the # elements of the compare result for
10488 // that matter). Check to see that they are the same size. If so, we know
10489 // that the element size of the sext'd result matches the element size of
10490 // the compare operands.
10492 if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10493 // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
10494 SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10495 N0.getOperand(1), N0.getOperand(2));
10496 return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
10499 // If the desired elements are smaller or larger than the source
10500 // elements we can use a matching integer vector type and then
10501 // truncate/any extend followed by zext_in_reg.
10502 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10504 DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
10505 N0.getOperand(1), N0.getOperand(2));
10506 return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
10507 N0.getValueType());
10510 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10512 if (SDValue SCC = SimplifySelectCC(
10513 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10514 DAG.getConstant(0, DL, VT),
10515 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10519 // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
10520 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
10521 isa<ConstantSDNode>(N0.getOperand(1)) &&
10522 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10524 SDValue ShAmt = N0.getOperand(1);
10525 if (N0.getOpcode() == ISD::SHL) {
10526 SDValue InnerZExt = N0.getOperand(0);
10527 // If the original shl may be shifting out bits, do not perform this
10529 unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10530 InnerZExt.getOperand(0).getValueSizeInBits();
10531 if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10537 // Ensure that the shift amount is wide enough for the shifted value.
10538 if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
10539 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10541 return DAG.getNode(N0.getOpcode(), DL, VT,
10542 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10546 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10549 if (SDValue NewCtPop = widenCtPop(N, DAG))
10555 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10556 SDValue N0 = N->getOperand(0);
10557 EVT VT = N->getValueType(0);
10559 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10562 // fold (aext (aext x)) -> (aext x)
10563 // fold (aext (zext x)) -> (zext x)
10564 // fold (aext (sext x)) -> (sext x)
10565 if (N0.getOpcode() == ISD::ANY_EXTEND ||
10566 N0.getOpcode() == ISD::ZERO_EXTEND ||
10567 N0.getOpcode() == ISD::SIGN_EXTEND)
10568 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10570 // fold (aext (truncate (load x))) -> (aext (smaller load x))
10571 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10572 if (N0.getOpcode() == ISD::TRUNCATE) {
10573 if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10574 SDNode *oye = N0.getOperand(0).getNode();
10575 if (NarrowLoad.getNode() != N0.getNode()) {
10576 CombineTo(N0.getNode(), NarrowLoad);
10577 // CombineTo deleted the truncate, if needed, but not what's under it.
10578 AddToWorklist(oye);
10580 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10584 // fold (aext (truncate x))
10585 if (N0.getOpcode() == ISD::TRUNCATE)
10586 return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10588 // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10589 // if the trunc is not free.
10590 if (N0.getOpcode() == ISD::AND &&
10591 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10592 N0.getOperand(1).getOpcode() == ISD::Constant &&
10593 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10594 N0.getValueType())) {
10596 SDValue X = N0.getOperand(0).getOperand(0);
10597 X = DAG.getAnyExtOrTrunc(X, DL, VT);
10598 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10599 return DAG.getNode(ISD::AND, DL, VT,
10600 X, DAG.getConstant(Mask, DL, VT));
10603 // fold (aext (load x)) -> (aext (truncate (extload x)))
10604 // None of the supported targets knows how to perform load and any_ext
10605 // on vectors in one instruction. We only perform this transformation on
10607 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10608 ISD::isUNINDEXEDLoad(N0.getNode()) &&
10609 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10610 bool DoXform = true;
10611 SmallVector<SDNode*, 4> SetCCs;
10612 if (!N0.hasOneUse())
10613 DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10616 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10617 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10619 LN0->getBasePtr(), N0.getValueType(),
10620 LN0->getMemOperand());
10621 ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10622 // If the load value is used only by N, replace it via CombineTo N.
10623 bool NoReplaceTrunc = N0.hasOneUse();
10624 CombineTo(N, ExtLoad);
10625 if (NoReplaceTrunc) {
10626 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10627 recursivelyDeleteUnusedNodes(LN0);
10629 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10630 N0.getValueType(), ExtLoad);
10631 CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10633 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10637 // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10638 // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10639 // fold (aext ( extload x)) -> (aext (truncate (extload x)))
10640 if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10641 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10642 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10643 ISD::LoadExtType ExtType = LN0->getExtensionType();
10644 EVT MemVT = LN0->getMemoryVT();
10645 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10646 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10647 VT, LN0->getChain(), LN0->getBasePtr(),
10648 MemVT, LN0->getMemOperand());
10649 CombineTo(N, ExtLoad);
10650 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10651 recursivelyDeleteUnusedNodes(LN0);
10652 return SDValue(N, 0); // Return N so it doesn't get rechecked!
10656 if (N0.getOpcode() == ISD::SETCC) {
10658 // aext(setcc) -> vsetcc
10659 // aext(setcc) -> truncate(vsetcc)
10660 // aext(setcc) -> aext(vsetcc)
10661 // Only do this before legalize for now.
10662 if (VT.isVector() && !LegalOperations) {
10663 EVT N00VT = N0.getOperand(0).getValueType();
10664 if (getSetCCResultType(N00VT) == N0.getValueType())
10667 // We know that the # elements of the results is the same as the
10668 // # elements of the compare (and the # elements of the compare result
10669 // for that matter). Check to see that they are the same size. If so,
10670 // we know that the element size of the sext'd result matches the
10671 // element size of the compare operands.
10672 if (VT.getSizeInBits() == N00VT.getSizeInBits())
10673 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10675 cast<CondCodeSDNode>(N0.getOperand(2))->get());
10677 // If the desired elements are smaller or larger than the source
10678 // elements we can use a matching integer vector type and then
10679 // truncate/any extend
10680 EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10682 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10684 cast<CondCodeSDNode>(N0.getOperand(2))->get());
10685 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10688 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10690 if (SDValue SCC = SimplifySelectCC(
10691 DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10692 DAG.getConstant(0, DL, VT),
10693 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10697 if (SDValue NewCtPop = widenCtPop(N, DAG))
10703 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10704 unsigned Opcode = N->getOpcode();
10705 SDValue N0 = N->getOperand(0);
10706 SDValue N1 = N->getOperand(1);
10707 EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10709 // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10710 if (N0.getOpcode() == Opcode &&
10711 AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10714 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10715 N0.getOperand(0).getOpcode() == Opcode) {
10716 // We have an assert, truncate, assert sandwich. Make one stronger assert
10717 // by asserting on the smallest asserted type to the larger source type.
10718 // This eliminates the later assert:
10719 // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10720 // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10721 SDValue BigA = N0.getOperand(0);
10722 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10723 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10724 "Asserting zero/sign-extended bits to a type larger than the "
10725 "truncated destination does not provide information");
10728 EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10729 SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10730 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10731 BigA.getOperand(0), MinAssertVTVal);
10732 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10735 // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10736 // than X. Just move the AssertZext in front of the truncate and drop the
10738 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10739 N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10740 Opcode == ISD::AssertZext) {
10741 SDValue BigA = N0.getOperand(0);
10742 EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10743 assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10744 "Asserting zero/sign-extended bits to a type larger than the "
10745 "truncated destination does not provide information");
10747 if (AssertVT.bitsLT(BigA_AssertVT)) {
10749 SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10750 BigA.getOperand(0), N1);
10751 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10758 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
10761 Align AL = cast<AssertAlignSDNode>(N)->getAlign();
10762 SDValue N0 = N->getOperand(0);
10764 // Fold (assertalign (assertalign x, AL0), AL1) ->
10765 // (assertalign x, max(AL0, AL1))
10766 if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
10767 return DAG.getAssertAlign(DL, N0.getOperand(0),
10768 std::max(AL, AAN->getAlign()));
10770 // In rare cases, there are trivial arithmetic ops in source operands. Sink
10771 // this assert down to source operands so that those arithmetic ops could be
10772 // exposed to the DAG combining.
10773 switch (N0.getOpcode()) {
10778 unsigned AlignShift = Log2(AL);
10779 SDValue LHS = N0.getOperand(0);
10780 SDValue RHS = N0.getOperand(1);
10781 unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
10782 unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10783 if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
10784 if (LHSAlignShift < AlignShift)
10785 LHS = DAG.getAssertAlign(DL, LHS, AL);
10786 if (RHSAlignShift < AlignShift)
10787 RHS = DAG.getAssertAlign(DL, RHS, AL);
10788 return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
10797 /// If the result of a wider load is shifted to right of N bits and then
10798 /// truncated to a narrower type and where N is a multiple of number of bits of
10799 /// the narrower type, transform it to a narrower load from address + N / num of
10800 /// bits of new type. Also narrow the load if the result is masked with an AND
10801 /// to effectively produce a smaller type. If the result is to be extended, also
10802 /// fold the extension to form a extending load.
10803 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10804 unsigned Opc = N->getOpcode();
10806 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10807 SDValue N0 = N->getOperand(0);
10808 EVT VT = N->getValueType(0);
10811 // This transformation isn't valid for vector loads.
10815 unsigned ShAmt = 0;
10816 bool HasShiftedOffset = false;
10817 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10819 if (Opc == ISD::SIGN_EXTEND_INREG) {
10820 ExtType = ISD::SEXTLOAD;
10821 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10822 } else if (Opc == ISD::SRL) {
10823 // Another special-case: SRL is basically zero-extending a narrower value,
10824 // or it maybe shifting a higher subword, half or byte into the lowest
10826 ExtType = ISD::ZEXTLOAD;
10827 N0 = SDValue(N, 0);
10829 auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10830 auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10834 uint64_t ShiftAmt = N01->getZExtValue();
10835 uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10836 if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10837 ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10839 ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10840 VT.getSizeInBits() - ShiftAmt);
10841 } else if (Opc == ISD::AND) {
10842 // An AND with a constant mask is the same as a truncate + zero-extend.
10843 auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10847 const APInt &Mask = AndC->getAPIntValue();
10848 unsigned ActiveBits = 0;
10849 if (Mask.isMask()) {
10850 ActiveBits = Mask.countTrailingOnes();
10851 } else if (Mask.isShiftedMask()) {
10852 ShAmt = Mask.countTrailingZeros();
10853 APInt ShiftedMask = Mask.lshr(ShAmt);
10854 ActiveBits = ShiftedMask.countTrailingOnes();
10855 HasShiftedOffset = true;
10859 ExtType = ISD::ZEXTLOAD;
10860 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10863 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10865 if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10866 ShAmt = ConstShift->getZExtValue();
10867 unsigned EVTBits = ExtVT.getSizeInBits();
10868 // Is the shift amount a multiple of size of VT?
10869 if ((ShAmt & (EVTBits-1)) == 0) {
10870 N0 = N0.getOperand(0);
10871 // Is the load width a multiple of size of VT?
10872 if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10876 // At this point, we must have a load or else we can't do the transform.
10877 auto *LN0 = dyn_cast<LoadSDNode>(N0);
10878 if (!LN0) return SDValue();
10880 // Because a SRL must be assumed to *need* to zero-extend the high bits
10881 // (as opposed to anyext the high bits), we can't combine the zextload
10882 // lowering of SRL and an sextload.
10883 if (LN0->getExtensionType() == ISD::SEXTLOAD)
10886 // If the shift amount is larger than the input type then we're not
10887 // accessing any of the loaded bytes. If the load was a zextload/extload
10888 // then the result of the shift+trunc is zero/undef (handled elsewhere).
10889 if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10892 // If the SRL is only used by a masking AND, we may be able to adjust
10893 // the ExtVT to make the AND redundant.
10894 SDNode *Mask = *(SRL->use_begin());
10895 if (Mask->getOpcode() == ISD::AND &&
10896 isa<ConstantSDNode>(Mask->getOperand(1))) {
10897 const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
10898 if (ShiftMask.isMask()) {
10899 EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10900 ShiftMask.countTrailingOnes());
10901 // If the mask is smaller, recompute the type.
10902 if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10903 TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10910 // If the load is shifted left (and the result isn't shifted back right),
10911 // we can fold the truncate through the shift.
10912 unsigned ShLeftAmt = 0;
10913 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10914 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10915 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10916 ShLeftAmt = N01->getZExtValue();
10917 N0 = N0.getOperand(0);
10921 // If we haven't found a load, we can't narrow it.
10922 if (!isa<LoadSDNode>(N0))
10925 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10926 // Reducing the width of a volatile load is illegal. For atomics, we may be
10927 // able to reduce the width provided we never widen again. (see D66309)
10928 if (!LN0->isSimple() ||
10929 !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10932 auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10933 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10934 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10935 return LVTStoreBits - EVTStoreBits - ShAmt;
10938 // For big endian targets, we need to adjust the offset to the pointer to
10939 // load the correct bytes.
10940 if (DAG.getDataLayout().isBigEndian())
10941 ShAmt = AdjustBigEndianShift(ShAmt);
10943 uint64_t PtrOff = ShAmt / 8;
10944 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10946 // The original load itself didn't wrap, so an offset within it doesn't.
10948 Flags.setNoUnsignedWrap(true);
10950 DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
10951 AddToWorklist(NewPtr.getNode());
10954 if (ExtType == ISD::NON_EXTLOAD)
10955 Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10956 LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10957 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10959 Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10960 LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10961 NewAlign, LN0->getMemOperand()->getFlags(),
10964 // Replace the old load's chain with the new load's chain.
10965 WorklistRemover DeadNodes(*this);
10966 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10968 // Shift the result left, if we've swallowed a left shift.
10969 SDValue Result = Load;
10970 if (ShLeftAmt != 0) {
10971 EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10972 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10974 // If the shift amount is as large as the result size (but, presumably,
10975 // no larger than the source) then the useful bits of the result are
10976 // zero; we can't simply return the shortened shift, because the result
10977 // of that operation is undefined.
10978 if (ShLeftAmt >= VT.getSizeInBits())
10979 Result = DAG.getConstant(0, DL, VT);
10981 Result = DAG.getNode(ISD::SHL, DL, VT,
10982 Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10985 if (HasShiftedOffset) {
10986 // Recalculate the shift amount after it has been altered to calculate
10988 if (DAG.getDataLayout().isBigEndian())
10989 ShAmt = AdjustBigEndianShift(ShAmt);
10991 // We're using a shifted mask, so the load now has an offset. This means
10992 // that data has been loaded into the lower bytes than it would have been
10993 // before, so we need to shl the loaded data into the correct position in the
10995 SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10996 Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10997 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11000 // Return the new loaded value.
11004 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11005 SDValue N0 = N->getOperand(0);
11006 SDValue N1 = N->getOperand(1);
11007 EVT VT = N->getValueType(0);
11008 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11009 unsigned VTBits = VT.getScalarSizeInBits();
11010 unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11012 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11014 return DAG.getConstant(0, SDLoc(N), VT);
11016 // fold (sext_in_reg c1) -> c1
11017 if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11018 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11020 // If the input is already sign extended, just drop the extension.
11021 if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11024 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11025 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11026 ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11027 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11030 // fold (sext_in_reg (sext x)) -> (sext x)
11031 // fold (sext_in_reg (aext x)) -> (sext x)
11032 // if x is small enough or if we know that x has more than 1 sign bit and the
11033 // sign_extend_inreg is extending from one of them.
11034 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11035 SDValue N00 = N0.getOperand(0);
11036 unsigned N00Bits = N00.getScalarValueSizeInBits();
11037 if ((N00Bits <= ExtVTBits ||
11038 (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11039 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11040 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11043 // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11044 if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11045 N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11046 N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11047 N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11048 if (!LegalOperations ||
11049 TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11050 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11054 // fold (sext_in_reg (zext x)) -> (sext x)
11055 // iff we are extending the source sign bit.
11056 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11057 SDValue N00 = N0.getOperand(0);
11058 if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11059 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11060 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11063 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11064 if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11065 return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11067 // fold operands of sext_in_reg based on knowledge that the top bits are not
11069 if (SimplifyDemandedBits(SDValue(N, 0)))
11070 return SDValue(N, 0);
11072 // fold (sext_in_reg (load x)) -> (smaller sextload x)
11073 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11074 if (SDValue NarrowLoad = ReduceLoadWidth(N))
11077 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11078 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11079 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11080 if (N0.getOpcode() == ISD::SRL) {
11081 if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11082 if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11083 // We can turn this into an SRA iff the input to the SRL is already sign
11084 // extended enough.
11085 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11086 if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11087 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11092 // fold (sext_inreg (extload x)) -> (sextload x)
11093 // If sextload is not supported by target, we can only do the combine when
11094 // load has one use. Doing otherwise can block folding the extload with other
11095 // extends that the target does support.
11096 if (ISD::isEXTLoad(N0.getNode()) &&
11097 ISD::isUNINDEXEDLoad(N0.getNode()) &&
11098 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11099 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11101 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11102 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11103 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11105 LN0->getBasePtr(), ExtVT,
11106 LN0->getMemOperand());
11107 CombineTo(N, ExtLoad);
11108 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11109 AddToWorklist(ExtLoad.getNode());
11110 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11112 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11113 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11115 ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11116 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11117 TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11118 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11119 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11121 LN0->getBasePtr(), ExtVT,
11122 LN0->getMemOperand());
11123 CombineTo(N, ExtLoad);
11124 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11125 return SDValue(N, 0); // Return N so it doesn't get rechecked!
11128 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11129 if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11130 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11131 N0.getOperand(1), false))
11132 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11138 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11139 SDValue N0 = N->getOperand(0);
11140 EVT VT = N->getValueType(0);
11142 // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11144 return DAG.getConstant(0, SDLoc(N), VT);
11146 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11149 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11150 return SDValue(N, 0);
11155 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11156 SDValue N0 = N->getOperand(0);
11157 EVT VT = N->getValueType(0);
11159 // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11161 return DAG.getConstant(0, SDLoc(N), VT);
11163 if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11166 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11167 return SDValue(N, 0);
11172 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11173 SDValue N0 = N->getOperand(0);
11174 EVT VT = N->getValueType(0);
11175 EVT SrcVT = N0.getValueType();
11176 bool isLE = DAG.getDataLayout().isLittleEndian();
11182 // fold (truncate (truncate x)) -> (truncate x)
11183 if (N0.getOpcode() == ISD::TRUNCATE)
11184 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11186 // fold (truncate c1) -> c1
11187 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11188 SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11189 if (C.getNode() != N)
11193 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11194 if (N0.getOpcode() == ISD::ZERO_EXTEND ||
11195 N0.getOpcode() == ISD::SIGN_EXTEND ||
11196 N0.getOpcode() == ISD::ANY_EXTEND) {
11197 // if the source is smaller than the dest, we still need an extend.
11198 if (N0.getOperand(0).getValueType().bitsLT(VT))
11199 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11200 // if the source is larger than the dest, than we just need the truncate.
11201 if (N0.getOperand(0).getValueType().bitsGT(VT))
11202 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11203 // if the source and dest are the same type, we can drop both the extend
11204 // and the truncate.
11205 return N0.getOperand(0);
11208 // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11209 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11212 // Fold extract-and-trunc into a narrow extract. For example:
11213 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11214 // i32 y = TRUNCATE(i64 x)
11216 // v16i8 b = BITCAST (v2i64 val)
11217 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11219 // Note: We only run this optimization after type legalization (which often
11220 // creates this pattern) and before operation legalization after which
11221 // we need to be more careful about the vector instructions that we generate.
11222 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11223 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11224 EVT VecTy = N0.getOperand(0).getValueType();
11225 EVT ExTy = N0.getValueType();
11226 EVT TrTy = N->getValueType(0);
11228 unsigned NumElem = VecTy.getVectorNumElements();
11229 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11231 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
11232 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
11234 SDValue EltNo = N0->getOperand(1);
11235 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11236 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11237 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
11240 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11241 DAG.getBitcast(NVT, N0.getOperand(0)),
11242 DAG.getVectorIdxConstant(Index, DL));
11246 // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
11247 if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
11248 if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
11249 TLI.isTruncateFree(SrcVT, VT)) {
11251 SDValue Cond = N0.getOperand(0);
11252 SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11253 SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
11254 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
11258 // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
11259 if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11260 (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11261 TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
11262 SDValue Amt = N0.getOperand(1);
11263 KnownBits Known = DAG.computeKnownBits(Amt);
11264 unsigned Size = VT.getScalarSizeInBits();
11265 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
11267 EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
11269 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11270 if (AmtVT != Amt.getValueType()) {
11271 Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
11272 AddToWorklist(Amt.getNode());
11274 return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
11278 // Attempt to pre-truncate BUILD_VECTOR sources.
11279 if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
11280 TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
11281 // Avoid creating illegal types if running after type legalizer.
11282 (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
11284 EVT SVT = VT.getScalarType();
11285 SmallVector<SDValue, 8> TruncOps;
11286 for (const SDValue &Op : N0->op_values()) {
11287 SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
11288 TruncOps.push_back(TruncOp);
11290 return DAG.getBuildVector(VT, DL, TruncOps);
11293 // Fold a series of buildvector, bitcast, and truncate if possible.
11294 // For example fold
11295 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
11296 // (2xi32 (buildvector x, y)).
11297 if (Level == AfterLegalizeVectorOps && VT.isVector() &&
11298 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
11299 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
11300 N0.getOperand(0).hasOneUse()) {
11301 SDValue BuildVect = N0.getOperand(0);
11302 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
11303 EVT TruncVecEltTy = VT.getVectorElementType();
11305 // Check that the element types match.
11306 if (BuildVectEltTy == TruncVecEltTy) {
11307 // Now we only need to compute the offset of the truncated elements.
11308 unsigned BuildVecNumElts = BuildVect.getNumOperands();
11309 unsigned TruncVecNumElts = VT.getVectorNumElements();
11310 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
11312 assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
11313 "Invalid number of elements");
11315 SmallVector<SDValue, 8> Opnds;
11316 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
11317 Opnds.push_back(BuildVect.getOperand(i));
11319 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
11323 // See if we can simplify the input to this truncate through knowledge that
11324 // only the low bits are being used.
11325 // For example "trunc (or (shl x, 8), y)" // -> trunc y
11326 // Currently we only perform this optimization on scalars because vectors
11327 // may have different active low bits.
11328 if (!VT.isVector()) {
11330 APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
11331 if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
11332 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
11335 // fold (truncate (load x)) -> (smaller load x)
11336 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
11337 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
11338 if (SDValue Reduced = ReduceLoadWidth(N))
11341 // Handle the case where the load remains an extending load even
11342 // after truncation.
11343 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
11344 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11345 if (LN0->isSimple() &&
11346 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
11347 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
11348 VT, LN0->getChain(), LN0->getBasePtr(),
11349 LN0->getMemoryVT(),
11350 LN0->getMemOperand());
11351 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
11357 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
11358 // where ... are all 'undef'.
11359 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
11360 SmallVector<EVT, 8> VTs;
11363 unsigned NumDefs = 0;
11365 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
11366 SDValue X = N0.getOperand(i);
11367 if (!X.isUndef()) {
11372 // Stop if more than one members are non-undef.
11376 VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
11377 VT.getVectorElementType(),
11378 X.getValueType().getVectorElementCount()));
11382 return DAG.getUNDEF(VT);
11384 if (NumDefs == 1) {
11385 assert(V.getNode() && "The single defined operand is empty!");
11386 SmallVector<SDValue, 8> Opnds;
11387 for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
11389 Opnds.push_back(DAG.getUNDEF(VTs[i]));
11392 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
11393 AddToWorklist(NV.getNode());
11394 Opnds.push_back(NV);
11396 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
11400 // Fold truncate of a bitcast of a vector to an extract of the low vector
11403 // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
11404 if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
11405 SDValue VecSrc = N0.getOperand(0);
11406 EVT VecSrcVT = VecSrc.getValueType();
11407 if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
11408 (!LegalOperations ||
11409 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
11412 unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
11413 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
11414 DAG.getVectorIdxConstant(Idx, SL));
11418 // Simplify the operands using demanded-bits information.
11419 if (!VT.isVector() &&
11420 SimplifyDemandedBits(SDValue(N, 0)))
11421 return SDValue(N, 0);
11423 // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
11424 // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
11425 // When the adde's carry is not used.
11426 if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
11427 N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
11428 // We only do for addcarry before legalize operation
11429 ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
11430 TLI.isOperationLegal(N0.getOpcode(), VT))) {
11432 auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11433 auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11434 auto VTs = DAG.getVTList(VT, N0->getValueType(1));
11435 return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
11438 // fold (truncate (extract_subvector(ext x))) ->
11439 // (extract_subvector x)
11440 // TODO: This can be generalized to cover cases where the truncate and extract
11441 // do not fully cancel each other out.
11442 if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11443 SDValue N00 = N0.getOperand(0);
11444 if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11445 N00.getOpcode() == ISD::ZERO_EXTEND ||
11446 N00.getOpcode() == ISD::ANY_EXTEND) {
11447 if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11448 VT.getVectorElementType())
11449 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11450 N00.getOperand(0), N0.getOperand(1));
11454 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11457 // Narrow a suitable binary operation with a non-opaque constant operand by
11458 // moving it ahead of the truncate. This is limited to pre-legalization
11459 // because targets may prefer a wider type during later combines and invert
11461 switch (N0.getOpcode()) {
11468 if (!LegalOperations && N0.hasOneUse() &&
11469 (isConstantOrConstantVector(N0.getOperand(0), true) ||
11470 isConstantOrConstantVector(N0.getOperand(1), true))) {
11471 // TODO: We already restricted this to pre-legalization, but for vectors
11472 // we are extra cautious to not create an unsupported operation.
11473 // Target-specific changes are likely needed to avoid regressions here.
11474 if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
11476 SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
11477 SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
11478 return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
11486 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
11487 SDValue Elt = N->getOperand(i);
11488 if (Elt.getOpcode() != ISD::MERGE_VALUES)
11489 return Elt.getNode();
11490 return Elt.getOperand(Elt.getResNo()).getNode();
11493 /// build_pair (load, load) -> load
11494 /// if load locations are consecutive.
11495 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
11496 assert(N->getOpcode() == ISD::BUILD_PAIR);
11498 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
11499 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
11501 // A BUILD_PAIR is always having the least significant part in elt 0 and the
11502 // most significant part in elt 1. So when combining into one large load, we
11503 // need to consider the endianness.
11504 if (DAG.getDataLayout().isBigEndian())
11505 std::swap(LD1, LD2);
11507 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
11508 LD1->getAddressSpace() != LD2->getAddressSpace())
11510 EVT LD1VT = LD1->getValueType(0);
11511 unsigned LD1Bytes = LD1VT.getStoreSize();
11512 if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
11513 DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
11514 Align Alignment = LD1->getAlign();
11515 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
11516 VT.getTypeForEVT(*DAG.getContext()));
11518 if (NewAlign <= Alignment &&
11519 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
11520 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
11521 LD1->getPointerInfo(), Alignment);
11527 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
11528 // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
11529 // and Lo parts; on big-endian machines it doesn't.
11530 return DAG.getDataLayout().isBigEndian() ? 1 : 0;
11533 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
11534 const TargetLowering &TLI) {
11535 // If this is not a bitcast to an FP type or if the target doesn't have
11536 // IEEE754-compliant FP logic, we're done.
11537 EVT VT = N->getValueType(0);
11538 if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
11541 // TODO: Handle cases where the integer constant is a different scalar
11542 // bitwidth to the FP.
11543 SDValue N0 = N->getOperand(0);
11544 EVT SourceVT = N0.getValueType();
11545 if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
11550 switch (N0.getOpcode()) {
11552 FPOpcode = ISD::FABS;
11553 SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
11556 FPOpcode = ISD::FNEG;
11557 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11560 FPOpcode = ISD::FABS;
11561 SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
11567 // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11568 // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11569 // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11571 SDValue LogicOp0 = N0.getOperand(0);
11572 ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11573 if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11574 LogicOp0.getOpcode() == ISD::BITCAST &&
11575 LogicOp0.getOperand(0).getValueType() == VT) {
11576 SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11577 NumFPLogicOpsConv++;
11578 if (N0.getOpcode() == ISD::OR)
11579 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11586 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11587 SDValue N0 = N->getOperand(0);
11588 EVT VT = N->getValueType(0);
11591 return DAG.getUNDEF(VT);
11593 // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11594 // Only do this before legalize types, unless both types are integer and the
11595 // scalar type is legal. Only do this before legalize ops, since the target
11596 // maybe depending on the bitcast.
11597 // First check to see if this is all constant.
11598 // TODO: Support FP bitcasts after legalize types.
11599 if (VT.isVector() &&
11601 (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11602 TLI.isTypeLegal(VT.getVectorElementType()))) &&
11603 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11604 cast<BuildVectorSDNode>(N0)->isConstant())
11605 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11606 VT.getVectorElementType());
11608 // If the input is a constant, let getNode fold it.
11609 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11610 // If we can't allow illegal operations, we need to check that this is just
11611 // a fp -> int or int -> conversion and that the resulting operation will
11613 if (!LegalOperations ||
11614 (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11615 TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11616 (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11617 TLI.isOperationLegal(ISD::Constant, VT))) {
11618 SDValue C = DAG.getBitcast(VT, N0);
11619 if (C.getNode() != N)
11624 // (conv (conv x, t1), t2) -> (conv x, t2)
11625 if (N0.getOpcode() == ISD::BITCAST)
11626 return DAG.getBitcast(VT, N0.getOperand(0));
11628 // fold (conv (load x)) -> (load (conv*)x)
11629 // If the resultant load doesn't need a higher alignment than the original!
11630 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11631 // Do not remove the cast if the types differ in endian layout.
11632 TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11633 TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11634 // If the load is volatile, we only want to change the load type if the
11635 // resulting load is legal. Otherwise we might increase the number of
11636 // memory accesses. We don't care if the original type was legal or not
11637 // as we assume software couldn't rely on the number of accesses of an
11639 ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
11640 TLI.isOperationLegal(ISD::LOAD, VT))) {
11641 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11643 if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11644 *LN0->getMemOperand())) {
11646 DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11647 LN0->getPointerInfo(), LN0->getAlignment(),
11648 LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11649 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11654 if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11657 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11658 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11661 // fold (bitcast (fneg x)) ->
11662 // flipbit = signbit
11663 // (xor (bitcast x) (build_pair flipbit, flipbit))
11665 // fold (bitcast (fabs x)) ->
11666 // flipbit = (and (extract_element (bitcast x), 0), signbit)
11667 // (xor (bitcast x) (build_pair flipbit, flipbit))
11668 // This often reduces constant pool loads.
11669 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11670 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11671 N0.getNode()->hasOneUse() && VT.isInteger() &&
11672 !VT.isVector() && !N0.getValueType().isVector()) {
11673 SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11674 AddToWorklist(NewConv.getNode());
11677 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11678 assert(VT.getSizeInBits() == 128);
11679 SDValue SignBit = DAG.getConstant(
11680 APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11682 if (N0.getOpcode() == ISD::FNEG) {
11684 AddToWorklist(FlipBit.getNode());
11686 assert(N0.getOpcode() == ISD::FABS);
11688 DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11689 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11691 AddToWorklist(Hi.getNode());
11692 FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11693 AddToWorklist(FlipBit.getNode());
11696 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11697 AddToWorklist(FlipBits.getNode());
11698 return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11700 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11701 if (N0.getOpcode() == ISD::FNEG)
11702 return DAG.getNode(ISD::XOR, DL, VT,
11703 NewConv, DAG.getConstant(SignBit, DL, VT));
11704 assert(N0.getOpcode() == ISD::FABS);
11705 return DAG.getNode(ISD::AND, DL, VT,
11706 NewConv, DAG.getConstant(~SignBit, DL, VT));
11709 // fold (bitconvert (fcopysign cst, x)) ->
11710 // (or (and (bitconvert x), sign), (and cst, (not sign)))
11711 // Note that we don't handle (copysign x, cst) because this can always be
11712 // folded to an fneg or fabs.
11715 // fold (bitcast (fcopysign cst, x)) ->
11716 // flipbit = (and (extract_element
11717 // (xor (bitcast cst), (bitcast x)), 0),
11719 // (xor (bitcast cst) (build_pair flipbit, flipbit))
11720 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11721 isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11722 VT.isInteger() && !VT.isVector()) {
11723 unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11724 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11725 if (isTypeLegal(IntXVT)) {
11726 SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11727 AddToWorklist(X.getNode());
11729 // If X has a different width than the result/lhs, sext it or truncate it.
11730 unsigned VTWidth = VT.getSizeInBits();
11731 if (OrigXWidth < VTWidth) {
11732 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11733 AddToWorklist(X.getNode());
11734 } else if (OrigXWidth > VTWidth) {
11735 // To get the sign bit in the right place, we have to shift it right
11736 // before truncating.
11738 X = DAG.getNode(ISD::SRL, DL,
11739 X.getValueType(), X,
11740 DAG.getConstant(OrigXWidth-VTWidth, DL,
11741 X.getValueType()));
11742 AddToWorklist(X.getNode());
11743 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11744 AddToWorklist(X.getNode());
11747 if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11748 APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11749 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11750 AddToWorklist(Cst.getNode());
11751 SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11752 AddToWorklist(X.getNode());
11753 SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11754 AddToWorklist(XorResult.getNode());
11755 SDValue XorResult64 = DAG.getNode(
11756 ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11757 DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11758 SDLoc(XorResult)));
11759 AddToWorklist(XorResult64.getNode());
11761 DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11762 DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11763 AddToWorklist(FlipBit.getNode());
11765 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11766 AddToWorklist(FlipBits.getNode());
11767 return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11769 APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11770 X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11771 X, DAG.getConstant(SignBit, SDLoc(X), VT));
11772 AddToWorklist(X.getNode());
11774 SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11775 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11776 Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11777 AddToWorklist(Cst.getNode());
11779 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11783 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11784 if (N0.getOpcode() == ISD::BUILD_PAIR)
11785 if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11788 // Remove double bitcasts from shuffles - this is often a legacy of
11789 // XformToShuffleWithZero being used to combine bitmaskings (of
11790 // float vectors bitcast to integer vectors) into shuffles.
11791 // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11792 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11793 N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11794 VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11795 !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11796 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11798 // If operands are a bitcast, peek through if it casts the original VT.
11799 // If operands are a constant, just bitcast back to original VT.
11800 auto PeekThroughBitcast = [&](SDValue Op) {
11801 if (Op.getOpcode() == ISD::BITCAST &&
11802 Op.getOperand(0).getValueType() == VT)
11803 return SDValue(Op.getOperand(0));
11804 if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11805 ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11806 return DAG.getBitcast(VT, Op);
11810 // FIXME: If either input vector is bitcast, try to convert the shuffle to
11811 // the result type of this bitcast. This would eliminate at least one
11812 // bitcast. See the transform in InstCombine.
11813 SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11814 SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11819 VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11820 SmallVector<int, 8> NewMask;
11821 for (int M : SVN->getMask())
11822 for (int i = 0; i != MaskScale; ++i)
11823 NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11825 SDValue LegalShuffle =
11826 TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11828 return LegalShuffle;
11834 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11835 EVT VT = N->getValueType(0);
11836 return CombineConsecutiveLoads(N, VT);
11839 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
11840 SDValue N0 = N->getOperand(0);
11842 // (freeze (freeze x)) -> (freeze x)
11843 if (N0.getOpcode() == ISD::FREEZE)
11846 // If the input is a constant, return it.
11847 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
11853 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11854 /// operands. DstEltVT indicates the destination element value type.
11855 SDValue DAGCombiner::
11856 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11857 EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11859 // If this is already the right type, we're done.
11860 if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11862 unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11863 unsigned DstBitSize = DstEltVT.getSizeInBits();
11865 // If this is a conversion of N elements of one type to N elements of another
11866 // type, convert each element. This handles FP<->INT cases.
11867 if (SrcBitSize == DstBitSize) {
11868 SmallVector<SDValue, 8> Ops;
11869 for (SDValue Op : BV->op_values()) {
11870 // If the vector element type is not legal, the BUILD_VECTOR operands
11871 // are promoted and implicitly truncated. Make that explicit here.
11872 if (Op.getValueType() != SrcEltVT)
11873 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11874 Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11875 AddToWorklist(Ops.back().getNode());
11877 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11878 BV->getValueType(0).getVectorNumElements());
11879 return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11882 // Otherwise, we're growing or shrinking the elements. To avoid having to
11883 // handle annoying details of growing/shrinking FP values, we convert them to
11885 if (SrcEltVT.isFloatingPoint()) {
11886 // Convert the input float vector to a int vector where the elements are the
11888 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11889 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11893 // Now we know the input is an integer vector. If the output is a FP type,
11894 // convert to integer first, then to FP of the right size.
11895 if (DstEltVT.isFloatingPoint()) {
11896 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11897 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11899 // Next, convert to FP elements of the same size.
11900 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11905 // Okay, we know the src/dst types are both integers of differing types.
11906 // Handling growing first.
11907 assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11908 if (SrcBitSize < DstBitSize) {
11909 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11911 SmallVector<SDValue, 8> Ops;
11912 for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11913 i += NumInputsPerOutput) {
11914 bool isLE = DAG.getDataLayout().isLittleEndian();
11915 APInt NewBits = APInt(DstBitSize, 0);
11916 bool EltIsUndef = true;
11917 for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11918 // Shift the previously computed bits over.
11919 NewBits <<= SrcBitSize;
11920 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11921 if (Op.isUndef()) continue;
11922 EltIsUndef = false;
11924 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11925 zextOrTrunc(SrcBitSize).zext(DstBitSize);
11929 Ops.push_back(DAG.getUNDEF(DstEltVT));
11931 Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11934 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11935 return DAG.getBuildVector(VT, DL, Ops);
11938 // Finally, this must be the case where we are shrinking elements: each input
11939 // turns into multiple outputs.
11940 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11941 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11942 NumOutputsPerInput*BV->getNumOperands());
11943 SmallVector<SDValue, 8> Ops;
11945 for (const SDValue &Op : BV->op_values()) {
11946 if (Op.isUndef()) {
11947 Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11951 APInt OpVal = cast<ConstantSDNode>(Op)->
11952 getAPIntValue().zextOrTrunc(SrcBitSize);
11954 for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11955 APInt ThisVal = OpVal.trunc(DstBitSize);
11956 Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11957 OpVal.lshrInPlace(DstBitSize);
11960 // For big endian targets, swap the order of the pieces of each element.
11961 if (DAG.getDataLayout().isBigEndian())
11962 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11965 return DAG.getBuildVector(VT, DL, Ops);
11968 static bool isContractable(SDNode *N) {
11969 SDNodeFlags F = N->getFlags();
11970 return F.hasAllowContract() || F.hasAllowReassociation();
11973 /// Try to perform FMA combining on a given FADD node.
11974 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11975 SDValue N0 = N->getOperand(0);
11976 SDValue N1 = N->getOperand(1);
11977 EVT VT = N->getValueType(0);
11980 const TargetOptions &Options = DAG.getTarget().Options;
11982 // Floating-point multiply-add with intermediate rounding.
11983 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
11985 // Floating-point multiply-add without intermediate rounding.
11987 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
11988 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11990 // No valid opcode, do not combine.
11991 if (!HasFMAD && !HasFMA)
11994 SDNodeFlags Flags = N->getFlags();
11995 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11996 bool CanReassociate =
11997 Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
11998 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11999 CanFuse || HasFMAD);
12000 // If the addition is not contractable, do not combine.
12001 if (!AllowFusionGlobally && !isContractable(N))
12004 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12007 // Always prefer FMAD to FMA for precision.
12008 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12009 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12011 // Is the node an FMUL and contractable either due to global flags or
12013 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12014 if (N.getOpcode() != ISD::FMUL)
12016 return AllowFusionGlobally || isContractable(N.getNode());
12018 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12019 // prefer to fold the multiply with fewer uses.
12020 if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12021 if (N0.getNode()->use_size() > N1.getNode()->use_size())
12025 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12026 if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12027 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12028 N0.getOperand(0), N0.getOperand(1), N1, Flags);
12031 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12032 // Note: Commutes FADD operands.
12033 if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12034 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12035 N1.getOperand(0), N1.getOperand(1), N0, Flags);
12038 // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12039 // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12040 // This requires reassociation because it changes the order of operations.
12042 if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12043 N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12044 N0.getOperand(2).hasOneUse()) {
12047 } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12048 N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12049 N1.getOperand(2).hasOneUse()) {
12054 SDValue A = FMA.getOperand(0);
12055 SDValue B = FMA.getOperand(1);
12056 SDValue C = FMA.getOperand(2).getOperand(0);
12057 SDValue D = FMA.getOperand(2).getOperand(1);
12058 SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags);
12059 return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags);
12062 // Look through FP_EXTEND nodes to do more combining.
12064 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12065 if (N0.getOpcode() == ISD::FP_EXTEND) {
12066 SDValue N00 = N0.getOperand(0);
12067 if (isContractableFMUL(N00) &&
12068 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12069 N00.getValueType())) {
12070 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12071 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12072 N00.getOperand(0)),
12073 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12074 N00.getOperand(1)), N1, Flags);
12078 // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12079 // Note: Commutes FADD operands.
12080 if (N1.getOpcode() == ISD::FP_EXTEND) {
12081 SDValue N10 = N1.getOperand(0);
12082 if (isContractableFMUL(N10) &&
12083 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12084 N10.getValueType())) {
12085 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12086 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12087 N10.getOperand(0)),
12088 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12089 N10.getOperand(1)), N0, Flags);
12093 // More folding opportunities when target permits.
12095 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12096 // -> (fma x, y, (fma (fpext u), (fpext v), z))
12097 auto FoldFAddFMAFPExtFMul = [&] (
12098 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12099 SDNodeFlags Flags) {
12100 return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12101 DAG.getNode(PreferredFusedOpcode, SL, VT,
12102 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12103 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12106 if (N0.getOpcode() == PreferredFusedOpcode) {
12107 SDValue N02 = N0.getOperand(2);
12108 if (N02.getOpcode() == ISD::FP_EXTEND) {
12109 SDValue N020 = N02.getOperand(0);
12110 if (isContractableFMUL(N020) &&
12111 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12112 N020.getValueType())) {
12113 return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12114 N020.getOperand(0), N020.getOperand(1),
12120 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12121 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12122 // FIXME: This turns two single-precision and one double-precision
12123 // operation into two double-precision operations, which might not be
12124 // interesting for all targets, especially GPUs.
12125 auto FoldFAddFPExtFMAFMul = [&] (
12126 SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
12127 SDNodeFlags Flags) {
12128 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12129 DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12130 DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12131 DAG.getNode(PreferredFusedOpcode, SL, VT,
12132 DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12133 DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12136 if (N0.getOpcode() == ISD::FP_EXTEND) {
12137 SDValue N00 = N0.getOperand(0);
12138 if (N00.getOpcode() == PreferredFusedOpcode) {
12139 SDValue N002 = N00.getOperand(2);
12140 if (isContractableFMUL(N002) &&
12141 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12142 N00.getValueType())) {
12143 return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12144 N002.getOperand(0), N002.getOperand(1),
12150 // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12151 // -> (fma y, z, (fma (fpext u), (fpext v), x))
12152 if (N1.getOpcode() == PreferredFusedOpcode) {
12153 SDValue N12 = N1.getOperand(2);
12154 if (N12.getOpcode() == ISD::FP_EXTEND) {
12155 SDValue N120 = N12.getOperand(0);
12156 if (isContractableFMUL(N120) &&
12157 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12158 N120.getValueType())) {
12159 return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12160 N120.getOperand(0), N120.getOperand(1),
12166 // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12167 // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12168 // FIXME: This turns two single-precision and one double-precision
12169 // operation into two double-precision operations, which might not be
12170 // interesting for all targets, especially GPUs.
12171 if (N1.getOpcode() == ISD::FP_EXTEND) {
12172 SDValue N10 = N1.getOperand(0);
12173 if (N10.getOpcode() == PreferredFusedOpcode) {
12174 SDValue N102 = N10.getOperand(2);
12175 if (isContractableFMUL(N102) &&
12176 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12177 N10.getValueType())) {
12178 return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12179 N102.getOperand(0), N102.getOperand(1),
12189 /// Try to perform FMA combining on a given FSUB node.
12190 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12191 SDValue N0 = N->getOperand(0);
12192 SDValue N1 = N->getOperand(1);
12193 EVT VT = N->getValueType(0);
12196 const TargetOptions &Options = DAG.getTarget().Options;
12197 // Floating-point multiply-add with intermediate rounding.
12198 bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12200 // Floating-point multiply-add without intermediate rounding.
12202 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12203 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12205 // No valid opcode, do not combine.
12206 if (!HasFMAD && !HasFMA)
12209 const SDNodeFlags Flags = N->getFlags();
12210 bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12211 bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12212 CanFuse || HasFMAD);
12214 // If the subtraction is not contractable, do not combine.
12215 if (!AllowFusionGlobally && !isContractable(N))
12218 if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12221 // Always prefer FMAD to FMA for precision.
12222 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12223 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12224 bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
12226 // Is the node an FMUL and contractable either due to global flags or
12228 auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12229 if (N.getOpcode() != ISD::FMUL)
12231 return AllowFusionGlobally || isContractable(N.getNode());
12234 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12235 auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
12236 if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
12237 return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
12238 XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z),
12244 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12245 // Note: Commutes FSUB operands.
12246 auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
12247 if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
12248 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12249 DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
12250 YZ.getOperand(1), X, Flags);
12255 // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
12256 // prefer to fold the multiply with fewer uses.
12257 if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
12258 (N0.getNode()->use_size() > N1.getNode()->use_size())) {
12259 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
12260 if (SDValue V = tryToFoldXSubYZ(N0, N1))
12262 // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
12263 if (SDValue V = tryToFoldXYSubZ(N0, N1))
12266 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12267 if (SDValue V = tryToFoldXYSubZ(N0, N1))
12269 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12270 if (SDValue V = tryToFoldXSubYZ(N0, N1))
12274 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
12275 if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
12276 (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
12277 SDValue N00 = N0.getOperand(0).getOperand(0);
12278 SDValue N01 = N0.getOperand(0).getOperand(1);
12279 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12280 DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
12281 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12284 // Look through FP_EXTEND nodes to do more combining.
12286 // fold (fsub (fpext (fmul x, y)), z)
12287 // -> (fma (fpext x), (fpext y), (fneg z))
12288 if (N0.getOpcode() == ISD::FP_EXTEND) {
12289 SDValue N00 = N0.getOperand(0);
12290 if (isContractableFMUL(N00) &&
12291 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12292 N00.getValueType())) {
12293 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12294 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12295 N00.getOperand(0)),
12296 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12297 N00.getOperand(1)),
12298 DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
12302 // fold (fsub x, (fpext (fmul y, z)))
12303 // -> (fma (fneg (fpext y)), (fpext z), x)
12304 // Note: Commutes FSUB operands.
12305 if (N1.getOpcode() == ISD::FP_EXTEND) {
12306 SDValue N10 = N1.getOperand(0);
12307 if (isContractableFMUL(N10) &&
12308 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12309 N10.getValueType())) {
12310 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12311 DAG.getNode(ISD::FNEG, SL, VT,
12312 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12313 N10.getOperand(0))),
12314 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12315 N10.getOperand(1)),
12320 // fold (fsub (fpext (fneg (fmul, x, y))), z)
12321 // -> (fneg (fma (fpext x), (fpext y), z))
12322 // Note: This could be removed with appropriate canonicalization of the
12323 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12324 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12325 // from implementing the canonicalization in visitFSUB.
12326 if (N0.getOpcode() == ISD::FP_EXTEND) {
12327 SDValue N00 = N0.getOperand(0);
12328 if (N00.getOpcode() == ISD::FNEG) {
12329 SDValue N000 = N00.getOperand(0);
12330 if (isContractableFMUL(N000) &&
12331 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12332 N00.getValueType())) {
12333 return DAG.getNode(ISD::FNEG, SL, VT,
12334 DAG.getNode(PreferredFusedOpcode, SL, VT,
12335 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12336 N000.getOperand(0)),
12337 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12338 N000.getOperand(1)),
12344 // fold (fsub (fneg (fpext (fmul, x, y))), z)
12345 // -> (fneg (fma (fpext x)), (fpext y), z)
12346 // Note: This could be removed with appropriate canonicalization of the
12347 // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12348 // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12349 // from implementing the canonicalization in visitFSUB.
12350 if (N0.getOpcode() == ISD::FNEG) {
12351 SDValue N00 = N0.getOperand(0);
12352 if (N00.getOpcode() == ISD::FP_EXTEND) {
12353 SDValue N000 = N00.getOperand(0);
12354 if (isContractableFMUL(N000) &&
12355 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12356 N000.getValueType())) {
12357 return DAG.getNode(ISD::FNEG, SL, VT,
12358 DAG.getNode(PreferredFusedOpcode, SL, VT,
12359 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12360 N000.getOperand(0)),
12361 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12362 N000.getOperand(1)),
12368 // More folding opportunities when target permits.
12370 // fold (fsub (fma x, y, (fmul u, v)), z)
12371 // -> (fma x, y (fma u, v, (fneg z)))
12372 if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
12373 isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
12374 N0.getOperand(2)->hasOneUse()) {
12375 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12376 N0.getOperand(0), N0.getOperand(1),
12377 DAG.getNode(PreferredFusedOpcode, SL, VT,
12378 N0.getOperand(2).getOperand(0),
12379 N0.getOperand(2).getOperand(1),
12380 DAG.getNode(ISD::FNEG, SL, VT,
12381 N1), Flags), Flags);
12384 // fold (fsub x, (fma y, z, (fmul u, v)))
12385 // -> (fma (fneg y), z, (fma (fneg u), v, x))
12386 if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
12387 isContractableFMUL(N1.getOperand(2)) &&
12388 N1->hasOneUse() && NoSignedZero) {
12389 SDValue N20 = N1.getOperand(2).getOperand(0);
12390 SDValue N21 = N1.getOperand(2).getOperand(1);
12391 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12392 DAG.getNode(ISD::FNEG, SL, VT,
12395 DAG.getNode(PreferredFusedOpcode, SL, VT,
12396 DAG.getNode(ISD::FNEG, SL, VT, N20),
12397 N21, N0, Flags), Flags);
12401 // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
12402 // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
12403 if (N0.getOpcode() == PreferredFusedOpcode &&
12405 SDValue N02 = N0.getOperand(2);
12406 if (N02.getOpcode() == ISD::FP_EXTEND) {
12407 SDValue N020 = N02.getOperand(0);
12408 if (isContractableFMUL(N020) &&
12409 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12410 N020.getValueType())) {
12411 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12412 N0.getOperand(0), N0.getOperand(1),
12413 DAG.getNode(PreferredFusedOpcode, SL, VT,
12414 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12415 N020.getOperand(0)),
12416 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12417 N020.getOperand(1)),
12418 DAG.getNode(ISD::FNEG, SL, VT,
12419 N1), Flags), Flags);
12424 // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
12425 // -> (fma (fpext x), (fpext y),
12426 // (fma (fpext u), (fpext v), (fneg z)))
12427 // FIXME: This turns two single-precision and one double-precision
12428 // operation into two double-precision operations, which might not be
12429 // interesting for all targets, especially GPUs.
12430 if (N0.getOpcode() == ISD::FP_EXTEND) {
12431 SDValue N00 = N0.getOperand(0);
12432 if (N00.getOpcode() == PreferredFusedOpcode) {
12433 SDValue N002 = N00.getOperand(2);
12434 if (isContractableFMUL(N002) &&
12435 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12436 N00.getValueType())) {
12437 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12438 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12439 N00.getOperand(0)),
12440 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12441 N00.getOperand(1)),
12442 DAG.getNode(PreferredFusedOpcode, SL, VT,
12443 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12444 N002.getOperand(0)),
12445 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12446 N002.getOperand(1)),
12447 DAG.getNode(ISD::FNEG, SL, VT,
12448 N1), Flags), Flags);
12453 // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
12454 // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
12455 if (N1.getOpcode() == PreferredFusedOpcode &&
12456 N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
12458 SDValue N120 = N1.getOperand(2).getOperand(0);
12459 if (isContractableFMUL(N120) &&
12460 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12461 N120.getValueType())) {
12462 SDValue N1200 = N120.getOperand(0);
12463 SDValue N1201 = N120.getOperand(1);
12464 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12465 DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
12467 DAG.getNode(PreferredFusedOpcode, SL, VT,
12468 DAG.getNode(ISD::FNEG, SL, VT,
12469 DAG.getNode(ISD::FP_EXTEND, SL,
12471 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12473 N0, Flags), Flags);
12477 // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12478 // -> (fma (fneg (fpext y)), (fpext z),
12479 // (fma (fneg (fpext u)), (fpext v), x))
12480 // FIXME: This turns two single-precision and one double-precision
12481 // operation into two double-precision operations, which might not be
12482 // interesting for all targets, especially GPUs.
12483 if (N1.getOpcode() == ISD::FP_EXTEND &&
12484 N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
12485 SDValue CvtSrc = N1.getOperand(0);
12486 SDValue N100 = CvtSrc.getOperand(0);
12487 SDValue N101 = CvtSrc.getOperand(1);
12488 SDValue N102 = CvtSrc.getOperand(2);
12489 if (isContractableFMUL(N102) &&
12490 TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12491 CvtSrc.getValueType())) {
12492 SDValue N1020 = N102.getOperand(0);
12493 SDValue N1021 = N102.getOperand(1);
12494 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12495 DAG.getNode(ISD::FNEG, SL, VT,
12496 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12498 DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
12499 DAG.getNode(PreferredFusedOpcode, SL, VT,
12500 DAG.getNode(ISD::FNEG, SL, VT,
12501 DAG.getNode(ISD::FP_EXTEND, SL,
12503 DAG.getNode(ISD::FP_EXTEND, SL, VT,
12505 N0, Flags), Flags);
12513 /// Try to perform FMA combining on a given FMUL node based on the distributive
12514 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
12515 /// subtraction instead of addition).
12516 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
12517 SDValue N0 = N->getOperand(0);
12518 SDValue N1 = N->getOperand(1);
12519 EVT VT = N->getValueType(0);
12521 const SDNodeFlags Flags = N->getFlags();
12523 assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
12525 const TargetOptions &Options = DAG.getTarget().Options;
12527 // The transforms below are incorrect when x == 0 and y == inf, because the
12528 // intermediate multiplication produces a nan.
12529 if (!Options.NoInfsFPMath)
12532 // Floating-point multiply-add without intermediate rounding.
12534 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
12535 TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12536 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12538 // Floating-point multiply-add with intermediate rounding. This can result
12539 // in a less precise result due to the changed rounding order.
12540 bool HasFMAD = Options.UnsafeFPMath &&
12541 (LegalOperations && TLI.isFMADLegal(DAG, N));
12543 // No valid opcode, do not combine.
12544 if (!HasFMAD && !HasFMA)
12547 // Always prefer FMAD to FMA for precision.
12548 unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12549 bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12551 // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
12552 // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
12553 auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12554 if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
12555 if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
12556 if (C->isExactlyValue(+1.0))
12557 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12559 if (C->isExactlyValue(-1.0))
12560 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12561 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12567 if (SDValue FMA = FuseFADD(N0, N1, Flags))
12569 if (SDValue FMA = FuseFADD(N1, N0, Flags))
12572 // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
12573 // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
12574 // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
12575 // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
12576 auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
12577 if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
12578 if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
12579 if (C0->isExactlyValue(+1.0))
12580 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12581 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12583 if (C0->isExactlyValue(-1.0))
12584 return DAG.getNode(PreferredFusedOpcode, SL, VT,
12585 DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
12586 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12588 if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
12589 if (C1->isExactlyValue(+1.0))
12590 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12591 DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
12592 if (C1->isExactlyValue(-1.0))
12593 return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
12600 if (SDValue FMA = FuseFSUB(N0, N1, Flags))
12602 if (SDValue FMA = FuseFSUB(N1, N0, Flags))
12608 SDValue DAGCombiner::visitFADD(SDNode *N) {
12609 SDValue N0 = N->getOperand(0);
12610 SDValue N1 = N->getOperand(1);
12611 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12612 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12613 EVT VT = N->getValueType(0);
12615 const TargetOptions &Options = DAG.getTarget().Options;
12616 const SDNodeFlags Flags = N->getFlags();
12618 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12623 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12626 // fold (fadd c1, c2) -> c1 + c2
12627 if (N0CFP && N1CFP)
12628 return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
12630 // canonicalize constant to RHS
12631 if (N0CFP && !N1CFP)
12632 return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
12634 // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12635 ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12636 if (N1C && N1C->isZero())
12637 if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12640 if (SDValue NewSel = foldBinOpIntoSelect(N))
12643 // fold (fadd A, (fneg B)) -> (fsub A, B)
12644 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12645 if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
12646 N1, DAG, LegalOperations, ForCodeSize))
12647 return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags);
12649 // fold (fadd (fneg A), B) -> (fsub B, A)
12650 if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
12651 if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
12652 N0, DAG, LegalOperations, ForCodeSize))
12653 return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags);
12655 auto isFMulNegTwo = [](SDValue FMul) {
12656 if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12658 auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12659 return C && C->isExactlyValue(-2.0);
12662 // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12663 if (isFMulNegTwo(N0)) {
12664 SDValue B = N0.getOperand(0);
12665 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12666 return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12668 // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12669 if (isFMulNegTwo(N1)) {
12670 SDValue B = N1.getOperand(0);
12671 SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12672 return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12675 // No FP constant should be created after legalization as Instruction
12676 // Selection pass has a hard time dealing with FP constants.
12677 bool AllowNewConst = (Level < AfterLegalizeDAG);
12679 // If nnan is enabled, fold lots of things.
12680 if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12681 // If allowed, fold (fadd (fneg x), x) -> 0.0
12682 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12683 return DAG.getConstantFP(0.0, DL, VT);
12685 // If allowed, fold (fadd x, (fneg x)) -> 0.0
12686 if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12687 return DAG.getConstantFP(0.0, DL, VT);
12690 // If 'unsafe math' or reassoc and nsz, fold lots of things.
12691 // TODO: break out portions of the transformations below for which Unsafe is
12692 // considered and which do not require both nsz and reassoc
12693 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12694 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12696 // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12697 if (N1CFP && N0.getOpcode() == ISD::FADD &&
12698 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12699 SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12700 return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12703 // We can fold chains of FADD's of the same value into multiplications.
12704 // This transform is not safe in general because we are reducing the number
12705 // of rounding steps.
12706 if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12707 if (N0.getOpcode() == ISD::FMUL) {
12708 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12709 bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12711 // (fadd (fmul x, c), x) -> (fmul x, c+1)
12712 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12713 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12714 DAG.getConstantFP(1.0, DL, VT), Flags);
12715 return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12718 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12719 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12720 N1.getOperand(0) == N1.getOperand(1) &&
12721 N0.getOperand(0) == N1.getOperand(0)) {
12722 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12723 DAG.getConstantFP(2.0, DL, VT), Flags);
12724 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12728 if (N1.getOpcode() == ISD::FMUL) {
12729 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12730 bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12732 // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12733 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12734 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12735 DAG.getConstantFP(1.0, DL, VT), Flags);
12736 return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12739 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12740 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12741 N0.getOperand(0) == N0.getOperand(1) &&
12742 N1.getOperand(0) == N0.getOperand(0)) {
12743 SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12744 DAG.getConstantFP(2.0, DL, VT), Flags);
12745 return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12749 if (N0.getOpcode() == ISD::FADD) {
12750 bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12751 // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12752 if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12753 (N0.getOperand(0) == N1)) {
12754 return DAG.getNode(ISD::FMUL, DL, VT,
12755 N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12759 if (N1.getOpcode() == ISD::FADD) {
12760 bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12761 // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12762 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12763 N1.getOperand(0) == N0) {
12764 return DAG.getNode(ISD::FMUL, DL, VT,
12765 N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12769 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12770 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12771 N0.getOperand(0) == N0.getOperand(1) &&
12772 N1.getOperand(0) == N1.getOperand(1) &&
12773 N0.getOperand(0) == N1.getOperand(0)) {
12774 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12775 DAG.getConstantFP(4.0, DL, VT), Flags);
12778 } // enable-unsafe-fp-math
12780 // FADD -> FMA combines:
12781 if (SDValue Fused = visitFADDForFMACombine(N)) {
12782 AddToWorklist(Fused.getNode());
12788 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12789 SDValue N0 = N->getOperand(0);
12790 SDValue N1 = N->getOperand(1);
12791 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12792 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12793 EVT VT = N->getValueType(0);
12795 const TargetOptions &Options = DAG.getTarget().Options;
12796 const SDNodeFlags Flags = N->getFlags();
12798 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12803 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12806 // fold (fsub c1, c2) -> c1-c2
12807 if (N0CFP && N1CFP)
12808 return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12810 if (SDValue NewSel = foldBinOpIntoSelect(N))
12813 // (fsub A, 0) -> A
12814 if (N1CFP && N1CFP->isZero()) {
12815 if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12816 Flags.hasNoSignedZeros()) {
12822 // (fsub x, x) -> 0.0
12823 if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12824 return DAG.getConstantFP(0.0f, DL, VT);
12827 // (fsub -0.0, N1) -> -N1
12828 // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12829 // FSUB does not specify the sign bit of a NaN. Also note that for
12830 // the same reason, the inverse transform is not safe, unless fast math
12831 // flags are in play.
12832 if (N0CFP && N0CFP->isZero()) {
12833 if (N0CFP->isNegative() ||
12834 (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12835 if (SDValue NegN1 =
12836 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12838 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12839 return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12843 if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12844 (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12845 N1.getOpcode() == ISD::FADD) {
12846 // X - (X + Y) -> -Y
12847 if (N0 == N1->getOperand(0))
12848 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12849 // X - (Y + X) -> -Y
12850 if (N0 == N1->getOperand(1))
12851 return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12854 // fold (fsub A, (fneg B)) -> (fadd A, B)
12855 if (SDValue NegN1 =
12856 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
12857 return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags);
12859 // FSUB -> FMA combines:
12860 if (SDValue Fused = visitFSUBForFMACombine(N)) {
12861 AddToWorklist(Fused.getNode());
12868 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12869 SDValue N0 = N->getOperand(0);
12870 SDValue N1 = N->getOperand(1);
12871 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12872 ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12873 EVT VT = N->getValueType(0);
12875 const TargetOptions &Options = DAG.getTarget().Options;
12876 const SDNodeFlags Flags = N->getFlags();
12878 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
12882 if (VT.isVector()) {
12883 // This just handles C1 * C2 for vectors. Other vector folds are below.
12884 if (SDValue FoldedVOp = SimplifyVBinOp(N))
12888 // fold (fmul c1, c2) -> c1*c2
12889 if (N0CFP && N1CFP)
12890 return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12892 // canonicalize constant to RHS
12893 if (isConstantFPBuildVectorOrConstantFP(N0) &&
12894 !isConstantFPBuildVectorOrConstantFP(N1))
12895 return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12897 if (SDValue NewSel = foldBinOpIntoSelect(N))
12900 if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12901 (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12902 // fold (fmul A, 0) -> 0
12903 if (N1CFP && N1CFP->isZero())
12907 if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12908 // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12909 if (isConstantFPBuildVectorOrConstantFP(N1) &&
12910 N0.getOpcode() == ISD::FMUL) {
12911 SDValue N00 = N0.getOperand(0);
12912 SDValue N01 = N0.getOperand(1);
12913 // Avoid an infinite loop by making sure that N00 is not a constant
12914 // (the inner multiply has not been constant folded yet).
12915 if (isConstantFPBuildVectorOrConstantFP(N01) &&
12916 !isConstantFPBuildVectorOrConstantFP(N00)) {
12917 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12918 return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12922 // Match a special-case: we convert X * 2.0 into fadd.
12923 // fmul (fadd X, X), C -> fmul X, 2.0 * C
12924 if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12925 N0.getOperand(0) == N0.getOperand(1)) {
12926 const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12927 SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12928 return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12932 // fold (fmul X, 2.0) -> (fadd X, X)
12933 if (N1CFP && N1CFP->isExactlyValue(+2.0))
12934 return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12936 // fold (fmul X, -1.0) -> (fneg X)
12937 if (N1CFP && N1CFP->isExactlyValue(-1.0))
12938 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12939 return DAG.getNode(ISD::FNEG, DL, VT, N0);
12941 // -N0 * -N1 --> N0 * N1
12942 TargetLowering::NegatibleCost CostN0 =
12943 TargetLowering::NegatibleCost::Expensive;
12944 TargetLowering::NegatibleCost CostN1 =
12945 TargetLowering::NegatibleCost::Expensive;
12947 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
12949 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
12950 if (NegN0 && NegN1 &&
12951 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
12952 CostN1 == TargetLowering::NegatibleCost::Cheaper))
12953 return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12955 // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12956 // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12957 if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12958 (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12959 TLI.isOperationLegal(ISD::FABS, VT)) {
12960 SDValue Select = N0, X = N1;
12961 if (Select.getOpcode() != ISD::SELECT)
12962 std::swap(Select, X);
12964 SDValue Cond = Select.getOperand(0);
12965 auto TrueOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12966 auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12968 if (TrueOpnd && FalseOpnd &&
12969 Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12970 isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12971 cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12972 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12981 std::swap(TrueOpnd, FalseOpnd);
12989 if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12990 TLI.isOperationLegal(ISD::FNEG, VT))
12991 return DAG.getNode(ISD::FNEG, DL, VT,
12992 DAG.getNode(ISD::FABS, DL, VT, X));
12993 if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12994 return DAG.getNode(ISD::FABS, DL, VT, X);
13001 // FMUL -> FMA combines:
13002 if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13003 AddToWorklist(Fused.getNode());
13010 SDValue DAGCombiner::visitFMA(SDNode *N) {
13011 SDValue N0 = N->getOperand(0);
13012 SDValue N1 = N->getOperand(1);
13013 SDValue N2 = N->getOperand(2);
13014 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13015 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13016 EVT VT = N->getValueType(0);
13018 const TargetOptions &Options = DAG.getTarget().Options;
13020 // FMA nodes have flags that propagate to the created nodes.
13021 const SDNodeFlags Flags = N->getFlags();
13022 bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
13024 // Constant fold FMA.
13025 if (isa<ConstantFPSDNode>(N0) &&
13026 isa<ConstantFPSDNode>(N1) &&
13027 isa<ConstantFPSDNode>(N2)) {
13028 return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13031 // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13032 TargetLowering::NegatibleCost CostN0 =
13033 TargetLowering::NegatibleCost::Expensive;
13034 TargetLowering::NegatibleCost CostN1 =
13035 TargetLowering::NegatibleCost::Expensive;
13037 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13039 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13040 if (NegN0 && NegN1 &&
13041 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13042 CostN1 == TargetLowering::NegatibleCost::Cheaper))
13043 return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
13045 if (UnsafeFPMath) {
13046 if (N0CFP && N0CFP->isZero())
13048 if (N1CFP && N1CFP->isZero())
13051 // TODO: The FMA node should have flags that propagate to these nodes.
13052 if (N0CFP && N0CFP->isExactlyValue(1.0))
13053 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13054 if (N1CFP && N1CFP->isExactlyValue(1.0))
13055 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13057 // Canonicalize (fma c, x, y) -> (fma x, c, y)
13058 if (isConstantFPBuildVectorOrConstantFP(N0) &&
13059 !isConstantFPBuildVectorOrConstantFP(N1))
13060 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13062 if (UnsafeFPMath) {
13063 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13064 if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13065 isConstantFPBuildVectorOrConstantFP(N1) &&
13066 isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13067 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13068 DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
13072 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13073 if (N0.getOpcode() == ISD::FMUL &&
13074 isConstantFPBuildVectorOrConstantFP(N1) &&
13075 isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13076 return DAG.getNode(ISD::FMA, DL, VT,
13078 DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
13084 // (fma x, 1, y) -> (fadd x, y)
13085 // (fma x, -1, y) -> (fadd (fneg x), y)
13087 if (N1CFP->isExactlyValue(1.0))
13088 // TODO: The FMA node should have flags that propagate to this node.
13089 return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13091 if (N1CFP->isExactlyValue(-1.0) &&
13092 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13093 SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13094 AddToWorklist(RHSNeg.getNode());
13095 // TODO: The FMA node should have flags that propagate to this node.
13096 return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13099 // fma (fneg x), K, y -> fma x -K, y
13100 if (N0.getOpcode() == ISD::FNEG &&
13101 (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13102 (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13104 return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13105 DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
13109 if (UnsafeFPMath) {
13110 // (fma x, c, x) -> (fmul x, (c+1))
13111 if (N1CFP && N0 == N2) {
13112 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13113 DAG.getNode(ISD::FADD, DL, VT, N1,
13114 DAG.getConstantFP(1.0, DL, VT), Flags),
13118 // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13119 if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13120 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13121 DAG.getNode(ISD::FADD, DL, VT, N1,
13122 DAG.getConstantFP(-1.0, DL, VT), Flags),
13127 // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13128 // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13129 if (!TLI.isFNegFree(VT))
13130 if (SDValue Neg = TLI.getCheaperNegatedExpression(
13131 SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13132 return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags);
13136 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13138 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13139 // Notice that this is not always beneficial. One reason is different targets
13140 // may have different costs for FDIV and FMUL, so sometimes the cost of two
13141 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13142 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13143 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13144 // TODO: Limit this transform based on optsize/minsize - it always creates at
13145 // least 1 extra instruction. But the perf win may be substantial enough
13146 // that only minsize should restrict this.
13147 bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13148 const SDNodeFlags Flags = N->getFlags();
13149 if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13152 // Skip if current node is a reciprocal/fneg-reciprocal.
13153 SDValue N0 = N->getOperand(0);
13154 ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
13155 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
13158 // Exit early if the target does not want this transform or if there can't
13159 // possibly be enough uses of the divisor to make the transform worthwhile.
13160 SDValue N1 = N->getOperand(1);
13161 unsigned MinUses = TLI.combineRepeatedFPDivisors();
13163 // For splat vectors, scale the number of uses by the splat factor. If we can
13164 // convert the division into a scalar op, that will likely be much faster.
13165 unsigned NumElts = 1;
13166 EVT VT = N->getValueType(0);
13167 if (VT.isVector() && DAG.isSplatValue(N1))
13168 NumElts = VT.getVectorNumElements();
13170 if (!MinUses || (N1->use_size() * NumElts) < MinUses)
13173 // Find all FDIV users of the same divisor.
13174 // Use a set because duplicates may be present in the user list.
13175 SetVector<SDNode *> Users;
13176 for (auto *U : N1->uses()) {
13177 if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13178 // This division is eligible for optimization only if global unsafe math
13179 // is enabled or if this division allows reciprocal formation.
13180 if (UnsafeMath || U->getFlags().hasAllowReciprocal())
13185 // Now that we have the actual number of divisor uses, make sure it meets
13186 // the minimum threshold specified by the target.
13187 if ((Users.size() * NumElts) < MinUses)
13191 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13192 SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13194 // Dividend / Divisor -> Dividend * Reciprocal
13195 for (auto *U : Users) {
13196 SDValue Dividend = U->getOperand(0);
13197 if (Dividend != FPOne) {
13198 SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13199 Reciprocal, Flags);
13200 CombineTo(U, NewNode);
13201 } else if (U != Reciprocal.getNode()) {
13202 // In the absence of fast-math-flags, this user node is always the
13203 // same node as Reciprocal, but with FMF they may be different nodes.
13204 CombineTo(U, Reciprocal);
13207 return SDValue(N, 0); // N was replaced.
13210 SDValue DAGCombiner::visitFDIV(SDNode *N) {
13211 SDValue N0 = N->getOperand(0);
13212 SDValue N1 = N->getOperand(1);
13213 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13214 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13215 EVT VT = N->getValueType(0);
13217 const TargetOptions &Options = DAG.getTarget().Options;
13218 SDNodeFlags Flags = N->getFlags();
13220 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13225 if (SDValue FoldedVOp = SimplifyVBinOp(N))
13228 // fold (fdiv c1, c2) -> c1/c2
13229 if (N0CFP && N1CFP)
13230 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
13232 if (SDValue NewSel = foldBinOpIntoSelect(N))
13235 if (SDValue V = combineRepeatedFPDivisors(N))
13238 if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
13239 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
13241 // Compute the reciprocal 1.0 / c2.
13242 const APFloat &N1APF = N1CFP->getValueAPF();
13243 APFloat Recip(N1APF.getSemantics(), 1); // 1.0
13244 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
13245 // Only do the transform if the reciprocal is a legal fp immediate that
13246 // isn't too nasty (eg NaN, denormal, ...).
13247 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
13248 (!LegalOperations ||
13249 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
13250 // backend)... we should handle this gracefully after Legalize.
13251 // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
13252 TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13253 TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
13254 return DAG.getNode(ISD::FMUL, DL, VT, N0,
13255 DAG.getConstantFP(Recip, DL, VT), Flags);
13258 // If this FDIV is part of a reciprocal square root, it may be folded
13259 // into a target-specific square root estimate instruction.
13260 if (N1.getOpcode() == ISD::FSQRT) {
13261 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
13262 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13263 } else if (N1.getOpcode() == ISD::FP_EXTEND &&
13264 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13265 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13267 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
13268 AddToWorklist(RV.getNode());
13269 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13271 } else if (N1.getOpcode() == ISD::FP_ROUND &&
13272 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13273 if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
13275 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
13276 AddToWorklist(RV.getNode());
13277 return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
13279 } else if (N1.getOpcode() == ISD::FMUL) {
13280 // Look through an FMUL. Even though this won't remove the FDIV directly,
13281 // it's still worthwhile to get rid of the FSQRT if possible.
13283 if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13284 Sqrt = N1.getOperand(0);
13285 Y = N1.getOperand(1);
13286 } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
13287 Sqrt = N1.getOperand(1);
13288 Y = N1.getOperand(0);
13290 if (Sqrt.getNode()) {
13291 // If the other multiply operand is known positive, pull it into the
13292 // sqrt. That will eliminate the division if we convert to an estimate:
13293 // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
13294 // TODO: Also fold the case where A == Z (fabs is missing).
13295 if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
13296 N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() &&
13297 Y.getOpcode() == ISD::FABS && Y.hasOneUse()) {
13298 SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0),
13299 Y.getOperand(0), Flags);
13301 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags);
13302 if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
13303 return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags);
13305 // Estimate creation failed. Clean up speculatively created nodes.
13306 recursivelyDeleteUnusedNodes(AAZ.getNode());
13309 // We found a FSQRT, so try to make this fold:
13310 // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
13311 if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
13312 SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags);
13313 AddToWorklist(Div.getNode());
13314 return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags);
13319 // Fold into a reciprocal estimate and multiply instead of a real divide.
13320 if (Options.NoInfsFPMath || Flags.hasNoInfs())
13321 if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
13325 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
13326 TargetLowering::NegatibleCost CostN0 =
13327 TargetLowering::NegatibleCost::Expensive;
13328 TargetLowering::NegatibleCost CostN1 =
13329 TargetLowering::NegatibleCost::Expensive;
13331 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13333 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13334 if (NegN0 && NegN1 &&
13335 (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13336 CostN1 == TargetLowering::NegatibleCost::Cheaper))
13337 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags);
13342 SDValue DAGCombiner::visitFREM(SDNode *N) {
13343 SDValue N0 = N->getOperand(0);
13344 SDValue N1 = N->getOperand(1);
13345 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13346 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13347 EVT VT = N->getValueType(0);
13348 SDNodeFlags Flags = N->getFlags();
13350 if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13353 // fold (frem c1, c2) -> fmod(c1,c2)
13354 if (N0CFP && N1CFP)
13355 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
13357 if (SDValue NewSel = foldBinOpIntoSelect(N))
13363 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
13364 SDNodeFlags Flags = N->getFlags();
13365 const TargetOptions &Options = DAG.getTarget().Options;
13367 // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
13368 // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
13369 if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
13370 (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
13373 SDValue N0 = N->getOperand(0);
13374 if (TLI.isFsqrtCheap(N0, DAG))
13377 // FSQRT nodes have flags that propagate to the created nodes.
13378 return buildSqrtEstimate(N0, Flags);
13381 /// copysign(x, fp_extend(y)) -> copysign(x, y)
13382 /// copysign(x, fp_round(y)) -> copysign(x, y)
13383 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
13384 SDValue N1 = N->getOperand(1);
13385 if ((N1.getOpcode() == ISD::FP_EXTEND ||
13386 N1.getOpcode() == ISD::FP_ROUND)) {
13387 // Do not optimize out type conversion of f128 type yet.
13388 // For some targets like x86_64, configuration is changed to keep one f128
13389 // value in one SSE register, but instruction selection cannot handle
13390 // FCOPYSIGN on SSE registers yet.
13391 EVT N1VT = N1->getValueType(0);
13392 EVT N1Op0VT = N1->getOperand(0).getValueType();
13393 return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
13398 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
13399 SDValue N0 = N->getOperand(0);
13400 SDValue N1 = N->getOperand(1);
13401 bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13402 bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13403 EVT VT = N->getValueType(0);
13405 if (N0CFP && N1CFP) // Constant fold
13406 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
13408 if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
13409 const APFloat &V = N1C->getValueAPF();
13410 // copysign(x, c1) -> fabs(x) iff ispos(c1)
13411 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
13412 if (!V.isNegative()) {
13413 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
13414 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13416 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13417 return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
13418 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
13422 // copysign(fabs(x), y) -> copysign(x, y)
13423 // copysign(fneg(x), y) -> copysign(x, y)
13424 // copysign(copysign(x,z), y) -> copysign(x, y)
13425 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
13426 N0.getOpcode() == ISD::FCOPYSIGN)
13427 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
13429 // copysign(x, abs(y)) -> abs(x)
13430 if (N1.getOpcode() == ISD::FABS)
13431 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13433 // copysign(x, copysign(y,z)) -> copysign(x, z)
13434 if (N1.getOpcode() == ISD::FCOPYSIGN)
13435 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
13437 // copysign(x, fp_extend(y)) -> copysign(x, y)
13438 // copysign(x, fp_round(y)) -> copysign(x, y)
13439 if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
13440 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
13445 SDValue DAGCombiner::visitFPOW(SDNode *N) {
13446 ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
13450 // Try to convert x ** (1/3) into cube root.
13451 // TODO: Handle the various flavors of long double.
13452 // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
13453 // Some range near 1/3 should be fine.
13454 EVT VT = N->getValueType(0);
13455 if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
13456 (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
13457 // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
13458 // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
13459 // pow(-val, 1/3) = nan; cbrt(-val) = -num.
13460 // For regular numbers, rounding may cause the results to differ.
13461 // Therefore, we require { nsz ninf nnan afn } for this transform.
13462 // TODO: We could select out the special cases if we don't have nsz/ninf.
13463 SDNodeFlags Flags = N->getFlags();
13464 if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
13465 !Flags.hasApproximateFuncs())
13468 // Do not create a cbrt() libcall if the target does not have it, and do not
13469 // turn a pow that has lowering support into a cbrt() libcall.
13470 if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
13471 (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
13472 DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
13475 return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
13478 // Try to convert x ** (1/4) and x ** (3/4) into square roots.
13479 // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
13480 // TODO: This could be extended (using a target hook) to handle smaller
13481 // power-of-2 fractional exponents.
13482 bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
13483 bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
13484 if (ExponentIs025 || ExponentIs075) {
13485 // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
13486 // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
13487 // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
13488 // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
13489 // For regular numbers, rounding may cause the results to differ.
13490 // Therefore, we require { nsz ninf afn } for this transform.
13491 // TODO: We could select out the special cases if we don't have nsz/ninf.
13492 SDNodeFlags Flags = N->getFlags();
13494 // We only need no signed zeros for the 0.25 case.
13495 if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
13496 !Flags.hasApproximateFuncs())
13499 // Don't double the number of libcalls. We are trying to inline fast code.
13500 if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
13503 // Assume that libcalls are the smallest code.
13504 // TODO: This restriction should probably be lifted for vectors.
13508 // pow(X, 0.25) --> sqrt(sqrt(X))
13510 SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
13511 SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
13514 // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
13515 return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
13521 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
13522 const TargetLowering &TLI) {
13523 // This optimization is guarded by a function attribute because it may produce
13524 // unexpected results. Ie, programs may be relying on the platform-specific
13525 // undefined behavior when the float-to-int conversion overflows.
13526 const Function &F = DAG.getMachineFunction().getFunction();
13527 Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
13528 if (StrictOverflow.getValueAsString().equals("false"))
13531 // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
13532 // replacing casts with a libcall. We also must be allowed to ignore -0.0
13533 // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
13534 // conversions would return +0.0.
13535 // FIXME: We should be able to use node-level FMF here.
13536 // TODO: If strict math, should we use FABS (+ range check for signed cast)?
13537 EVT VT = N->getValueType(0);
13538 if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
13539 !DAG.getTarget().Options.NoSignedZerosFPMath)
13542 // fptosi/fptoui round towards zero, so converting from FP to integer and
13543 // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
13544 SDValue N0 = N->getOperand(0);
13545 if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
13546 N0.getOperand(0).getValueType() == VT)
13547 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13549 if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
13550 N0.getOperand(0).getValueType() == VT)
13551 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
13556 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
13557 SDValue N0 = N->getOperand(0);
13558 EVT VT = N->getValueType(0);
13559 EVT OpVT = N0.getValueType();
13561 // [us]itofp(undef) = 0, because the result value is bounded.
13563 return DAG.getConstantFP(0.0, SDLoc(N), VT);
13565 // fold (sint_to_fp c1) -> c1fp
13566 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13567 // ...but only if the target supports immediate floating-point values
13568 (!LegalOperations ||
13569 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13570 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13572 // If the input is a legal type, and SINT_TO_FP is not legal on this target,
13573 // but UINT_TO_FP is legal on this target, try to convert.
13574 if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
13575 hasOperation(ISD::UINT_TO_FP, OpVT)) {
13576 // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
13577 if (DAG.SignBitIsZero(N0))
13578 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13581 // The next optimizations are desirable only if SELECT_CC can be lowered.
13582 // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
13583 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
13585 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13587 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
13588 DAG.getConstantFP(0.0, DL, VT));
13591 // fold (sint_to_fp (zext (setcc x, y, cc))) ->
13592 // (select (setcc x, y, cc), 1.0, 0.0)
13593 if (N0.getOpcode() == ISD::ZERO_EXTEND &&
13594 N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
13595 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13597 return DAG.getSelect(DL, VT, N0.getOperand(0),
13598 DAG.getConstantFP(1.0, DL, VT),
13599 DAG.getConstantFP(0.0, DL, VT));
13602 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13608 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
13609 SDValue N0 = N->getOperand(0);
13610 EVT VT = N->getValueType(0);
13611 EVT OpVT = N0.getValueType();
13613 // [us]itofp(undef) = 0, because the result value is bounded.
13615 return DAG.getConstantFP(0.0, SDLoc(N), VT);
13617 // fold (uint_to_fp c1) -> c1fp
13618 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
13619 // ...but only if the target supports immediate floating-point values
13620 (!LegalOperations ||
13621 TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
13622 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
13624 // If the input is a legal type, and UINT_TO_FP is not legal on this target,
13625 // but SINT_TO_FP is legal on this target, try to convert.
13626 if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
13627 hasOperation(ISD::SINT_TO_FP, OpVT)) {
13628 // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
13629 if (DAG.SignBitIsZero(N0))
13630 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
13633 // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
13634 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
13635 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
13637 return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
13638 DAG.getConstantFP(0.0, DL, VT));
13641 if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13647 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13648 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13649 SDValue N0 = N->getOperand(0);
13650 EVT VT = N->getValueType(0);
13652 if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13655 SDValue Src = N0.getOperand(0);
13656 EVT SrcVT = Src.getValueType();
13657 bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13658 bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13660 // We can safely assume the conversion won't overflow the output range,
13661 // because (for example) (uint8_t)18293.f is undefined behavior.
13663 // Since we can assume the conversion won't overflow, our decision as to
13664 // whether the input will fit in the float should depend on the minimum
13665 // of the input range and output range.
13667 // This means this is also safe for a signed input and unsigned output, since
13668 // a negative input would lead to undefined behavior.
13669 unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13670 unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13671 unsigned ActualSize = std::min(InputSize, OutputSize);
13672 const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13674 // We can only fold away the float conversion if the input range can be
13675 // represented exactly in the float range.
13676 if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13677 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13678 unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13679 : ISD::ZERO_EXTEND;
13680 return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13682 if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13683 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13684 return DAG.getBitcast(VT, Src);
13689 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13690 SDValue N0 = N->getOperand(0);
13691 EVT VT = N->getValueType(0);
13693 // fold (fp_to_sint undef) -> undef
13695 return DAG.getUNDEF(VT);
13697 // fold (fp_to_sint c1fp) -> c1
13698 if (isConstantFPBuildVectorOrConstantFP(N0))
13699 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13701 return FoldIntToFPToInt(N, DAG);
13704 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13705 SDValue N0 = N->getOperand(0);
13706 EVT VT = N->getValueType(0);
13708 // fold (fp_to_uint undef) -> undef
13710 return DAG.getUNDEF(VT);
13712 // fold (fp_to_uint c1fp) -> c1
13713 if (isConstantFPBuildVectorOrConstantFP(N0))
13714 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13716 return FoldIntToFPToInt(N, DAG);
13719 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13720 SDValue N0 = N->getOperand(0);
13721 SDValue N1 = N->getOperand(1);
13722 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13723 EVT VT = N->getValueType(0);
13725 // fold (fp_round c1fp) -> c1fp
13727 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13729 // fold (fp_round (fp_extend x)) -> x
13730 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13731 return N0.getOperand(0);
13733 // fold (fp_round (fp_round x)) -> (fp_round x)
13734 if (N0.getOpcode() == ISD::FP_ROUND) {
13735 const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13736 const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13738 // Skip this folding if it results in an fp_round from f80 to f16.
13740 // f80 to f16 always generates an expensive (and as yet, unimplemented)
13741 // libcall to __truncxfhf2 instead of selecting native f16 conversion
13742 // instructions from f32 or f64. Moreover, the first (value-preserving)
13743 // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13745 if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13748 // If the first fp_round isn't a value preserving truncation, it might
13749 // introduce a tie in the second fp_round, that wouldn't occur in the
13750 // single-step fp_round we want to fold to.
13751 // In other words, double rounding isn't the same as rounding.
13752 // Also, this is a value preserving truncation iff both fp_round's are.
13753 if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13755 return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13756 DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13760 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13761 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13762 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13763 N0.getOperand(0), N1);
13764 AddToWorklist(Tmp.getNode());
13765 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13766 Tmp, N0.getOperand(1));
13769 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13775 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13776 SDValue N0 = N->getOperand(0);
13777 EVT VT = N->getValueType(0);
13779 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13780 if (N->hasOneUse() &&
13781 N->use_begin()->getOpcode() == ISD::FP_ROUND)
13784 // fold (fp_extend c1fp) -> c1fp
13785 if (isConstantFPBuildVectorOrConstantFP(N0))
13786 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13788 // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13789 if (N0.getOpcode() == ISD::FP16_TO_FP &&
13790 TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13791 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13793 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13795 if (N0.getOpcode() == ISD::FP_ROUND
13796 && N0.getConstantOperandVal(1) == 1) {
13797 SDValue In = N0.getOperand(0);
13798 if (In.getValueType() == VT) return In;
13799 if (VT.bitsLT(In.getValueType()))
13800 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13801 In, N0.getOperand(1));
13802 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13805 // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13806 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13807 TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13808 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13809 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13811 LN0->getBasePtr(), N0.getValueType(),
13812 LN0->getMemOperand());
13813 CombineTo(N, ExtLoad);
13814 CombineTo(N0.getNode(),
13815 DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13816 N0.getValueType(), ExtLoad,
13817 DAG.getIntPtrConstant(1, SDLoc(N0))),
13818 ExtLoad.getValue(1));
13819 return SDValue(N, 0); // Return N so it doesn't get rechecked!
13822 if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13828 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13829 SDValue N0 = N->getOperand(0);
13830 EVT VT = N->getValueType(0);
13832 // fold (fceil c1) -> fceil(c1)
13833 if (isConstantFPBuildVectorOrConstantFP(N0))
13834 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13839 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13840 SDValue N0 = N->getOperand(0);
13841 EVT VT = N->getValueType(0);
13843 // fold (ftrunc c1) -> ftrunc(c1)
13844 if (isConstantFPBuildVectorOrConstantFP(N0))
13845 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13847 // fold ftrunc (known rounded int x) -> x
13848 // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13849 // likely to be generated to extract integer from a rounded floating value.
13850 switch (N0.getOpcode()) {
13854 case ISD::FNEARBYINT:
13863 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13864 SDValue N0 = N->getOperand(0);
13865 EVT VT = N->getValueType(0);
13867 // fold (ffloor c1) -> ffloor(c1)
13868 if (isConstantFPBuildVectorOrConstantFP(N0))
13869 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13874 // FIXME: FNEG and FABS have a lot in common; refactor.
13875 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13876 SDValue N0 = N->getOperand(0);
13877 EVT VT = N->getValueType(0);
13879 // Constant fold FNEG.
13880 if (isConstantFPBuildVectorOrConstantFP(N0))
13881 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13883 if (SDValue NegN0 =
13884 TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
13887 // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
13888 // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
13889 // know it was called from a context with a nsz flag if the input fsub does
13891 if (N0.getOpcode() == ISD::FSUB &&
13892 (DAG.getTarget().Options.NoSignedZerosFPMath ||
13893 N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
13894 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
13895 N0.getOperand(0), N->getFlags());
13898 // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13899 // constant pool values.
13900 if (!TLI.isFNegFree(VT) &&
13901 N0.getOpcode() == ISD::BITCAST &&
13902 N0.getNode()->hasOneUse()) {
13903 SDValue Int = N0.getOperand(0);
13904 EVT IntVT = Int.getValueType();
13905 if (IntVT.isInteger() && !IntVT.isVector()) {
13907 if (N0.getValueType().isVector()) {
13908 // For a vector, get a mask such as 0x80... per scalar element
13910 SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13911 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13913 // For a scalar, just generate 0x80...
13914 SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13917 Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13918 DAG.getConstant(SignMask, DL0, IntVT));
13919 AddToWorklist(Int.getNode());
13920 return DAG.getBitcast(VT, Int);
13924 // (fneg (fmul c, x)) -> (fmul -c, x)
13925 if (N0.getOpcode() == ISD::FMUL &&
13926 (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13927 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13929 APFloat CVal = CFP1->getValueAPF();
13931 if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13932 TLI.isOperationLegal(ISD::ConstantFP, VT)))
13933 return DAG.getNode(
13934 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13935 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13943 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13944 APFloat (*Op)(const APFloat &, const APFloat &)) {
13945 SDValue N0 = N->getOperand(0);
13946 SDValue N1 = N->getOperand(1);
13947 EVT VT = N->getValueType(0);
13948 const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13949 const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13951 if (N0CFP && N1CFP) {
13952 const APFloat &C0 = N0CFP->getValueAPF();
13953 const APFloat &C1 = N1CFP->getValueAPF();
13954 return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13957 // Canonicalize to constant on RHS.
13958 if (isConstantFPBuildVectorOrConstantFP(N0) &&
13959 !isConstantFPBuildVectorOrConstantFP(N1))
13960 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13965 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13966 return visitFMinMax(DAG, N, minnum);
13969 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13970 return visitFMinMax(DAG, N, maxnum);
13973 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13974 return visitFMinMax(DAG, N, minimum);
13977 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13978 return visitFMinMax(DAG, N, maximum);
13981 SDValue DAGCombiner::visitFABS(SDNode *N) {
13982 SDValue N0 = N->getOperand(0);
13983 EVT VT = N->getValueType(0);
13985 // fold (fabs c1) -> fabs(c1)
13986 if (isConstantFPBuildVectorOrConstantFP(N0))
13987 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13989 // fold (fabs (fabs x)) -> (fabs x)
13990 if (N0.getOpcode() == ISD::FABS)
13991 return N->getOperand(0);
13993 // fold (fabs (fneg x)) -> (fabs x)
13994 // fold (fabs (fcopysign x, y)) -> (fabs x)
13995 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13996 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13998 // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13999 if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
14000 SDValue Int = N0.getOperand(0);
14001 EVT IntVT = Int.getValueType();
14002 if (IntVT.isInteger() && !IntVT.isVector()) {
14004 if (N0.getValueType().isVector()) {
14005 // For a vector, get a mask such as 0x7f... per scalar element
14007 SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
14008 SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
14010 // For a scalar, just generate 0x7f...
14011 SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
14014 Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
14015 DAG.getConstant(SignMask, DL, IntVT));
14016 AddToWorklist(Int.getNode());
14017 return DAG.getBitcast(N->getValueType(0), Int);
14024 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14025 SDValue Chain = N->getOperand(0);
14026 SDValue N1 = N->getOperand(1);
14027 SDValue N2 = N->getOperand(2);
14029 // If N is a constant we could fold this into a fallthrough or unconditional
14030 // branch. However that doesn't happen very often in normal code, because
14031 // Instcombine/SimplifyCFG should have handled the available opportunities.
14032 // If we did this folding here, it would be necessary to update the
14033 // MachineBasicBlock CFG, which is awkward.
14035 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14037 if (N1.getOpcode() == ISD::SETCC &&
14038 TLI.isOperationLegalOrCustom(ISD::BR_CC,
14039 N1.getOperand(0).getValueType())) {
14040 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14041 Chain, N1.getOperand(2),
14042 N1.getOperand(0), N1.getOperand(1), N2);
14045 if (N1.hasOneUse()) {
14046 // rebuildSetCC calls visitXor which may change the Chain when there is a
14047 // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14048 HandleSDNode ChainHandle(Chain);
14049 if (SDValue NewN1 = rebuildSetCC(N1))
14050 return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14051 ChainHandle.getValue(), NewN1, N2);
14057 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14058 if (N.getOpcode() == ISD::SRL ||
14059 (N.getOpcode() == ISD::TRUNCATE &&
14060 (N.getOperand(0).hasOneUse() &&
14061 N.getOperand(0).getOpcode() == ISD::SRL))) {
14062 // Look pass the truncate.
14063 if (N.getOpcode() == ISD::TRUNCATE)
14064 N = N.getOperand(0);
14066 // Match this pattern so that we can generate simpler code:
14069 // %b = and i32 %a, 2
14070 // %c = srl i32 %b, 1
14071 // brcond i32 %c ...
14076 // %b = and i32 %a, 2
14077 // %c = setcc eq %b, 0
14080 // This applies only when the AND constant value has one bit set and the
14081 // SRL constant is equal to the log2 of the AND constant. The back-end is
14082 // smart enough to convert the result into a TEST/JMP sequence.
14083 SDValue Op0 = N.getOperand(0);
14084 SDValue Op1 = N.getOperand(1);
14086 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14087 SDValue AndOp1 = Op0.getOperand(1);
14089 if (AndOp1.getOpcode() == ISD::Constant) {
14090 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14092 if (AndConst.isPowerOf2() &&
14093 cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14095 return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14096 Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14103 // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14104 // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14105 if (N.getOpcode() == ISD::XOR) {
14106 // Because we may call this on a speculatively constructed
14107 // SimplifiedSetCC Node, we need to simplify this node first.
14108 // Ideally this should be folded into SimplifySetCC and not
14109 // here. For now, grab a handle to N so we don't lose it from
14110 // replacements interal to the visit.
14111 HandleSDNode XORHandle(N);
14112 while (N.getOpcode() == ISD::XOR) {
14113 SDValue Tmp = visitXOR(N.getNode());
14114 // No simplification done.
14115 if (!Tmp.getNode())
14117 // Returning N is form in-visit replacement that may invalidated
14118 // N. Grab value from Handle.
14119 if (Tmp.getNode() == N.getNode())
14120 N = XORHandle.getValue();
14121 else // Node simplified. Try simplifying again.
14125 if (N.getOpcode() != ISD::XOR)
14128 SDValue Op0 = N->getOperand(0);
14129 SDValue Op1 = N->getOperand(1);
14131 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14132 bool Equal = false;
14133 // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14134 if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14135 Op0.getValueType() == MVT::i1) {
14137 Op0 = N->getOperand(0);
14138 Op1 = N->getOperand(1);
14142 EVT SetCCVT = N.getValueType();
14144 SetCCVT = getSetCCResultType(SetCCVT);
14145 // Replace the uses of XOR with SETCC
14146 return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14147 Equal ? ISD::SETEQ : ISD::SETNE);
14154 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14156 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14157 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14158 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14160 // If N is a constant we could fold this into a fallthrough or unconditional
14161 // branch. However that doesn't happen very often in normal code, because
14162 // Instcombine/SimplifyCFG should have handled the available opportunities.
14163 // If we did this folding here, it would be necessary to update the
14164 // MachineBasicBlock CFG, which is awkward.
14166 // Use SimplifySetCC to simplify SETCC's.
14167 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14168 CondLHS, CondRHS, CC->get(), SDLoc(N),
14170 if (Simp.getNode()) AddToWorklist(Simp.getNode());
14172 // fold to a simpler setcc
14173 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14174 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14175 N->getOperand(0), Simp.getOperand(2),
14176 Simp.getOperand(0), Simp.getOperand(1),
14182 /// Return true if 'Use' is a load or a store that uses N as its base pointer
14183 /// and that N may be folded in the load / store addressing mode.
14184 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
14186 const TargetLowering &TLI) {
14190 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
14191 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14193 VT = LD->getMemoryVT();
14194 AS = LD->getAddressSpace();
14195 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
14196 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14198 VT = ST->getMemoryVT();
14199 AS = ST->getAddressSpace();
14200 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
14201 if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
14203 VT = LD->getMemoryVT();
14204 AS = LD->getAddressSpace();
14205 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
14206 if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
14208 VT = ST->getMemoryVT();
14209 AS = ST->getAddressSpace();
14213 TargetLowering::AddrMode AM;
14214 if (N->getOpcode() == ISD::ADD) {
14215 AM.HasBaseReg = true;
14216 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14219 AM.BaseOffs = Offset->getSExtValue();
14223 } else if (N->getOpcode() == ISD::SUB) {
14224 AM.HasBaseReg = true;
14225 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
14228 AM.BaseOffs = -Offset->getSExtValue();
14235 return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
14236 VT.getTypeForEVT(*DAG.getContext()), AS);
14239 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14240 bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14241 const TargetLowering &TLI) {
14242 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14243 if (LD->isIndexed())
14245 EVT VT = LD->getMemoryVT();
14246 if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14248 Ptr = LD->getBasePtr();
14249 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14250 if (ST->isIndexed())
14252 EVT VT = ST->getMemoryVT();
14253 if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14255 Ptr = ST->getBasePtr();
14257 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14258 if (LD->isIndexed())
14260 EVT VT = LD->getMemoryVT();
14261 if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14262 !TLI.isIndexedMaskedLoadLegal(Dec, VT))
14264 Ptr = LD->getBasePtr();
14266 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14267 if (ST->isIndexed())
14269 EVT VT = ST->getMemoryVT();
14270 if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14271 !TLI.isIndexedMaskedStoreLegal(Dec, VT))
14273 Ptr = ST->getBasePtr();
14282 /// Try turning a load/store into a pre-indexed load/store when the base
14283 /// pointer is an add or subtract and it has other uses besides the load/store.
14284 /// After the transformation, the new indexed load/store has effectively folded
14285 /// the add/subtract in and all of its other uses are redirected to the
14286 /// new load/store.
14287 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
14288 if (Level < AfterLegalizeDAG)
14291 bool IsLoad = true;
14292 bool IsMasked = false;
14294 if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
14298 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
14299 // out. There is no reason to make this a preinc/predec.
14300 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
14301 Ptr.getNode()->hasOneUse())
14304 // Ask the target to do addressing mode selection.
14307 ISD::MemIndexedMode AM = ISD::UNINDEXED;
14308 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
14311 // Backends without true r+i pre-indexed forms may need to pass a
14312 // constant base with a variable offset so that constant coercion
14313 // will work with the patterns in canonical form.
14314 bool Swapped = false;
14315 if (isa<ConstantSDNode>(BasePtr)) {
14316 std::swap(BasePtr, Offset);
14320 // Don't create a indexed load / store with zero offset.
14321 if (isNullConstant(Offset))
14324 // Try turning it into a pre-indexed load / store except when:
14325 // 1) The new base ptr is a frame index.
14326 // 2) If N is a store and the new base ptr is either the same as or is a
14327 // predecessor of the value being stored.
14328 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
14329 // that would create a cycle.
14330 // 4) All uses are load / store ops that use it as old base ptr.
14332 // Check #1. Preinc'ing a frame index would require copying the stack pointer
14333 // (plus the implicit offset) to a register to preinc anyway.
14334 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14339 SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
14340 : cast<StoreSDNode>(N)->getValue();
14342 // Would require a copy.
14343 if (Val == BasePtr)
14346 // Would create a cycle.
14347 if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
14351 // Caches for hasPredecessorHelper.
14352 SmallPtrSet<const SDNode *, 32> Visited;
14353 SmallVector<const SDNode *, 16> Worklist;
14354 Worklist.push_back(N);
14356 // If the offset is a constant, there may be other adds of constants that
14357 // can be folded with this one. We should do this to avoid having to keep
14358 // a copy of the original base pointer.
14359 SmallVector<SDNode *, 16> OtherUses;
14360 if (isa<ConstantSDNode>(Offset))
14361 for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
14362 UE = BasePtr.getNode()->use_end();
14364 SDUse &Use = UI.getUse();
14365 // Skip the use that is Ptr and uses of other results from BasePtr's
14366 // node (important for nodes that return multiple results).
14367 if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
14370 if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
14373 if (Use.getUser()->getOpcode() != ISD::ADD &&
14374 Use.getUser()->getOpcode() != ISD::SUB) {
14379 SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
14380 if (!isa<ConstantSDNode>(Op1)) {
14385 // FIXME: In some cases, we can be smarter about this.
14386 if (Op1.getValueType() != Offset.getValueType()) {
14391 OtherUses.push_back(Use.getUser());
14395 std::swap(BasePtr, Offset);
14397 // Now check for #3 and #4.
14398 bool RealUse = false;
14400 for (SDNode *Use : Ptr.getNode()->uses()) {
14403 if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
14406 // If Ptr may be folded in addressing mode of other use, then it's
14407 // not profitable to do this transformation.
14408 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
14418 Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14421 DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14424 Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14427 Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
14432 LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
14433 Result.getNode()->dump(&DAG); dbgs() << '\n');
14434 WorklistRemover DeadNodes(*this);
14436 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14437 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14439 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14442 // Finally, since the node is now dead, remove it from the graph.
14443 deleteAndRecombine(N);
14446 std::swap(BasePtr, Offset);
14448 // Replace other uses of BasePtr that can be updated to use Ptr
14449 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
14450 unsigned OffsetIdx = 1;
14451 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
14453 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
14454 BasePtr.getNode() && "Expected BasePtr operand");
14456 // We need to replace ptr0 in the following expression:
14457 // x0 * offset0 + y0 * ptr0 = t0
14459 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
14461 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
14462 // indexed load/store and the expression that needs to be re-written.
14464 // Therefore, we have:
14465 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
14467 ConstantSDNode *CN =
14468 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
14469 int X0, X1, Y0, Y1;
14470 const APInt &Offset0 = CN->getAPIntValue();
14471 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
14473 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
14474 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
14475 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
14476 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
14478 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
14480 APInt CNV = Offset0;
14481 if (X0 < 0) CNV = -CNV;
14482 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
14483 else CNV = CNV - Offset1;
14485 SDLoc DL(OtherUses[i]);
14487 // We can now generate the new expression.
14488 SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14489 SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14491 SDValue NewUse = DAG.getNode(Opcode,
14493 OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14494 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14495 deleteAndRecombine(OtherUses[i]);
14498 // Replace the uses of Ptr with uses of the updated base value.
14499 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14500 deleteAndRecombine(Ptr.getNode());
14501 AddToWorklist(Result.getNode());
14506 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
14507 SDValue &BasePtr, SDValue &Offset,
14508 ISD::MemIndexedMode &AM,
14510 const TargetLowering &TLI) {
14512 (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
14515 if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
14518 // Don't create a indexed load / store with zero offset.
14519 if (isNullConstant(Offset))
14522 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14525 SmallPtrSet<const SDNode *, 32> Visited;
14526 for (SDNode *Use : BasePtr.getNode()->uses()) {
14527 if (Use == Ptr.getNode())
14530 // No if there's a later user which could perform the index instead.
14531 if (isa<MemSDNode>(Use)) {
14532 bool IsLoad = true;
14533 bool IsMasked = false;
14535 if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14536 IsMasked, OtherPtr, TLI)) {
14537 SmallVector<const SDNode *, 2> Worklist;
14538 Worklist.push_back(Use);
14539 if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
14544 // If all the uses are load / store addresses, then don't do the
14546 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
14547 for (SDNode *UseUse : Use->uses())
14548 if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
14555 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
14556 bool &IsMasked, SDValue &Ptr,
14557 SDValue &BasePtr, SDValue &Offset,
14558 ISD::MemIndexedMode &AM,
14560 const TargetLowering &TLI) {
14561 if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
14562 IsMasked, Ptr, TLI) ||
14563 Ptr.getNode()->hasOneUse())
14566 // Try turning it into a post-indexed load / store except when
14567 // 1) All uses are load / store ops that use it as base ptr (and
14568 // it may be folded as addressing mmode).
14569 // 2) Op must be independent of N, i.e. Op is neither a predecessor
14570 // nor a successor of N. Otherwise, if Op is folded that would
14572 for (SDNode *Op : Ptr->uses()) {
14574 if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
14578 SmallPtrSet<const SDNode *, 32> Visited;
14579 SmallVector<const SDNode *, 8> Worklist;
14580 // Ptr is predecessor to both N and Op.
14581 Visited.insert(Ptr.getNode());
14582 Worklist.push_back(N);
14583 Worklist.push_back(Op);
14584 if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
14585 !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
14591 /// Try to combine a load/store with a add/sub of the base pointer node into a
14592 /// post-indexed load/store. The transformation folded the add/subtract into the
14593 /// new indexed load/store effectively and all of its uses are redirected to the
14594 /// new load/store.
14595 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
14596 if (Level < AfterLegalizeDAG)
14599 bool IsLoad = true;
14600 bool IsMasked = false;
14604 ISD::MemIndexedMode AM = ISD::UNINDEXED;
14605 SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
14606 Offset, AM, DAG, TLI);
14612 Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14614 : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
14615 BasePtr, Offset, AM);
14617 Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
14618 BasePtr, Offset, AM)
14619 : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
14620 BasePtr, Offset, AM);
14621 ++PostIndexedNodes;
14623 LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
14624 dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
14626 WorklistRemover DeadNodes(*this);
14628 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14629 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14631 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14634 // Finally, since the node is now dead, remove it from the graph.
14635 deleteAndRecombine(N);
14637 // Replace the uses of Use with uses of the updated base value.
14638 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
14639 Result.getValue(IsLoad ? 1 : 0));
14640 deleteAndRecombine(Op);
14644 /// Return the base-pointer arithmetic from an indexed \p LD.
14645 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
14646 ISD::MemIndexedMode AM = LD->getAddressingMode();
14647 assert(AM != ISD::UNINDEXED);
14648 SDValue BP = LD->getOperand(1);
14649 SDValue Inc = LD->getOperand(2);
14651 // Some backends use TargetConstants for load offsets, but don't expect
14652 // TargetConstants in general ADD nodes. We can convert these constants into
14653 // regular Constants (if the constant is not opaque).
14654 assert((Inc.getOpcode() != ISD::TargetConstant ||
14655 !cast<ConstantSDNode>(Inc)->isOpaque()) &&
14656 "Cannot split out indexing using opaque target constants");
14657 if (Inc.getOpcode() == ISD::TargetConstant) {
14658 ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
14659 Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
14660 ConstInc->getValueType(0));
14664 (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
14665 return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
14668 static inline int numVectorEltsOrZero(EVT T) {
14669 return T.isVector() ? T.getVectorNumElements() : 0;
14672 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
14673 Val = ST->getValue();
14674 EVT STType = Val.getValueType();
14675 EVT STMemType = ST->getMemoryVT();
14676 if (STType == STMemType)
14678 if (isTypeLegal(STMemType))
14679 return false; // fail.
14680 if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
14681 TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
14682 Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
14685 if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
14686 STType.isInteger() && STMemType.isInteger()) {
14687 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
14690 if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
14691 Val = DAG.getBitcast(STMemType, Val);
14694 return false; // fail.
14697 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14698 EVT LDMemType = LD->getMemoryVT();
14699 EVT LDType = LD->getValueType(0);
14700 assert(Val.getValueType() == LDMemType &&
14701 "Attempting to extend value of non-matching type");
14702 if (LDType == LDMemType)
14704 if (LDMemType.isInteger() && LDType.isInteger()) {
14705 switch (LD->getExtensionType()) {
14706 case ISD::NON_EXTLOAD:
14707 Val = DAG.getBitcast(LDType, Val);
14710 Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14712 case ISD::SEXTLOAD:
14713 Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14715 case ISD::ZEXTLOAD:
14716 Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14723 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14724 if (OptLevel == CodeGenOpt::None || !LD->isSimple())
14726 SDValue Chain = LD->getOperand(0);
14727 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14728 // TODO: Relax this restriction for unordered atomics (see D66309)
14729 if (!ST || !ST->isSimple())
14732 EVT LDType = LD->getValueType(0);
14733 EVT LDMemType = LD->getMemoryVT();
14734 EVT STMemType = ST->getMemoryVT();
14735 EVT STType = ST->getValue().getValueType();
14737 BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14738 BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14740 if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14743 // Normalize for Endianness. After this Offset=0 will denote that the least
14744 // significant bit in the loaded value maps to the least significant bit in
14745 // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14746 // n:th least significant byte of the stored value.
14747 if (DAG.getDataLayout().isBigEndian())
14748 Offset = ((int64_t)STMemType.getStoreSizeInBits() -
14749 (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
14751 // Check that the stored value cover all bits that are loaded.
14754 (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14756 auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14757 if (LD->isIndexed()) {
14758 // Cannot handle opaque target constants and we must respect the user's
14759 // request not to split indexes from loads.
14760 if (!canSplitIdx(LD))
14762 SDValue Idx = SplitIndexingFromLoad(LD);
14763 SDValue Ops[] = {Val, Idx, Chain};
14764 return CombineTo(LD, Ops, 3);
14766 return CombineTo(LD, Val, Chain);
14772 // Memory as copy space (potentially masked).
14773 if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14774 // Simple case: Direct non-truncating forwarding
14775 if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14776 return ReplaceLd(LD, ST->getValue(), Chain);
14777 // Can we model the truncate and extension with an and mask?
14778 if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14779 !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14780 // Mask to size of LDMemType
14782 DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14783 STMemType.getSizeInBits()),
14784 SDLoc(ST), STType);
14785 auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14786 return ReplaceLd(LD, Val, Chain);
14790 // TODO: Deal with nonzero offset.
14791 if (LD->getBasePtr().isUndef() || Offset != 0)
14793 // Model necessary truncations / extenstions.
14795 // Truncate Value To Stored Memory Size.
14797 if (!getTruncatedStoreValue(ST, Val))
14799 if (!isTypeLegal(LDMemType))
14801 if (STMemType != LDMemType) {
14802 // TODO: Support vectors? This requires extract_subvector/bitcast.
14803 if (!STMemType.isVector() && !LDMemType.isVector() &&
14804 STMemType.isInteger() && LDMemType.isInteger())
14805 Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14809 if (!extendLoadedValueToExtension(LD, Val))
14811 return ReplaceLd(LD, Val, Chain);
14814 // On failure, cleanup dead nodes we may have created.
14815 if (Val->use_empty())
14816 deleteAndRecombine(Val.getNode());
14820 SDValue DAGCombiner::visitLOAD(SDNode *N) {
14821 LoadSDNode *LD = cast<LoadSDNode>(N);
14822 SDValue Chain = LD->getChain();
14823 SDValue Ptr = LD->getBasePtr();
14825 // If load is not volatile and there are no uses of the loaded value (and
14826 // the updated indexed value in case of indexed loads), change uses of the
14827 // chain value into uses of the chain input (i.e. delete the dead load).
14828 // TODO: Allow this for unordered atomics (see D66309)
14829 if (LD->isSimple()) {
14830 if (N->getValueType(1) == MVT::Other) {
14831 // Unindexed loads.
14832 if (!N->hasAnyUseOfValue(0)) {
14833 // It's not safe to use the two value CombineTo variant here. e.g.
14834 // v1, chain2 = load chain1, loc
14835 // v2, chain3 = load chain2, loc
14837 // Now we replace use of chain2 with chain1. This makes the second load
14838 // isomorphic to the one we are deleting, and thus makes this load live.
14839 LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14840 dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14842 WorklistRemover DeadNodes(*this);
14843 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14844 AddUsersToWorklist(Chain.getNode());
14845 if (N->use_empty())
14846 deleteAndRecombine(N);
14848 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14852 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14854 // If this load has an opaque TargetConstant offset, then we cannot split
14855 // the indexing into an add/sub directly (that TargetConstant may not be
14856 // valid for a different type of node, and we cannot convert an opaque
14857 // target constant into a regular constant).
14858 bool CanSplitIdx = canSplitIdx(LD);
14860 if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
14861 SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14863 if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
14864 Index = SplitIndexingFromLoad(LD);
14865 // Try to fold the base pointer arithmetic into subsequent loads and
14867 AddUsersToWorklist(N);
14869 Index = DAG.getUNDEF(N->getValueType(1));
14870 LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14871 dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14872 dbgs() << " and 2 other values\n");
14873 WorklistRemover DeadNodes(*this);
14874 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14875 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14876 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14877 deleteAndRecombine(N);
14878 return SDValue(N, 0); // Return N so it doesn't get rechecked!
14883 // If this load is directly stored, replace the load value with the stored
14885 if (auto V = ForwardStoreValueToDirectLoad(LD))
14888 // Try to infer better alignment information than the load already has.
14889 if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
14890 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
14891 if (*Alignment > LD->getAlign() &&
14892 isAligned(*Alignment, LD->getSrcValueOffset())) {
14893 SDValue NewLoad = DAG.getExtLoad(
14894 LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14895 LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
14896 LD->getMemOperand()->getFlags(), LD->getAAInfo());
14897 // NewLoad will always be N as we are only refining the alignment
14898 assert(NewLoad.getNode() == N);
14904 if (LD->isUnindexed()) {
14905 // Walk up chain skipping non-aliasing memory nodes.
14906 SDValue BetterChain = FindBetterChain(LD, Chain);
14908 // If there is a better chain.
14909 if (Chain != BetterChain) {
14912 // Replace the chain to void dependency.
14913 if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14914 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14915 BetterChain, Ptr, LD->getMemOperand());
14917 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14918 LD->getValueType(0),
14919 BetterChain, Ptr, LD->getMemoryVT(),
14920 LD->getMemOperand());
14923 // Create token factor to keep old chain connected.
14924 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14925 MVT::Other, Chain, ReplLoad.getValue(1));
14927 // Replace uses with load result and token factor
14928 return CombineTo(N, ReplLoad.getValue(0), Token);
14932 // Try transforming N to an indexed load.
14933 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14934 return SDValue(N, 0);
14936 // Try to slice up N to more direct loads if the slices are mapped to
14937 // different register banks or pairing can take place.
14938 if (SliceUpLoad(N))
14939 return SDValue(N, 0);
14946 /// Helper structure used to slice a load in smaller loads.
14947 /// Basically a slice is obtained from the following sequence:
14948 /// Origin = load Ty1, Base
14949 /// Shift = srl Ty1 Origin, CstTy Amount
14950 /// Inst = trunc Shift to Ty2
14952 /// Then, it will be rewritten into:
14953 /// Slice = load SliceTy, Base + SliceOffset
14954 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14956 /// SliceTy is deduced from the number of bits that are actually used to
14958 struct LoadedSlice {
14959 /// Helper structure used to compute the cost of a slice.
14961 /// Are we optimizing for code size.
14962 bool ForCodeSize = false;
14965 unsigned Loads = 0;
14966 unsigned Truncates = 0;
14967 unsigned CrossRegisterBanksCopies = 0;
14968 unsigned ZExts = 0;
14969 unsigned Shift = 0;
14971 explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14973 /// Get the cost of one isolated slice.
14974 Cost(const LoadedSlice &LS, bool ForCodeSize)
14975 : ForCodeSize(ForCodeSize), Loads(1) {
14976 EVT TruncType = LS.Inst->getValueType(0);
14977 EVT LoadedType = LS.getLoadedType();
14978 if (TruncType != LoadedType &&
14979 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14983 /// Account for slicing gain in the current cost.
14984 /// Slicing provide a few gains like removing a shift or a
14985 /// truncate. This method allows to grow the cost of the original
14986 /// load with the gain from this slice.
14987 void addSliceGain(const LoadedSlice &LS) {
14988 // Each slice saves a truncate.
14989 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14990 if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14991 LS.Inst->getValueType(0)))
14993 // If there is a shift amount, this slice gets rid of it.
14996 // If this slice can merge a cross register bank copy, account for it.
14997 if (LS.canMergeExpensiveCrossRegisterBankCopy())
14998 ++CrossRegisterBanksCopies;
15001 Cost &operator+=(const Cost &RHS) {
15002 Loads += RHS.Loads;
15003 Truncates += RHS.Truncates;
15004 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15005 ZExts += RHS.ZExts;
15006 Shift += RHS.Shift;
15010 bool operator==(const Cost &RHS) const {
15011 return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15012 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15013 ZExts == RHS.ZExts && Shift == RHS.Shift;
15016 bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15018 bool operator<(const Cost &RHS) const {
15019 // Assume cross register banks copies are as expensive as loads.
15020 // FIXME: Do we want some more target hooks?
15021 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15022 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15023 // Unless we are optimizing for code size, consider the
15024 // expensive operation first.
15025 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15026 return ExpensiveOpsLHS < ExpensiveOpsRHS;
15027 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15028 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15031 bool operator>(const Cost &RHS) const { return RHS < *this; }
15033 bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15035 bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15038 // The last instruction that represent the slice. This should be a
15039 // truncate instruction.
15042 // The original load instruction.
15043 LoadSDNode *Origin;
15045 // The right shift amount in bits from the original load.
15048 // The DAG from which Origin came from.
15049 // This is used to get some contextual information about legal types, etc.
15052 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15053 unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15054 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15056 /// Get the bits used in a chunk of bits \p BitWidth large.
15057 /// \return Result is \p BitWidth and has used bits set to 1 and
15058 /// not used bits set to 0.
15059 APInt getUsedBits() const {
15060 // Reproduce the trunc(lshr) sequence:
15061 // - Start from the truncated value.
15062 // - Zero extend to the desired bit width.
15064 assert(Origin && "No original load to compare against.");
15065 unsigned BitWidth = Origin->getValueSizeInBits(0);
15066 assert(Inst && "This slice is not bound to an instruction");
15067 assert(Inst->getValueSizeInBits(0) <= BitWidth &&
15068 "Extracted slice is bigger than the whole type!");
15069 APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15070 UsedBits.setAllBits();
15071 UsedBits = UsedBits.zext(BitWidth);
15072 UsedBits <<= Shift;
15076 /// Get the size of the slice to be loaded in bytes.
15077 unsigned getLoadedSize() const {
15078 unsigned SliceSize = getUsedBits().countPopulation();
15079 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
15080 return SliceSize / 8;
15083 /// Get the type that will be loaded for this slice.
15084 /// Note: This may not be the final type for the slice.
15085 EVT getLoadedType() const {
15086 assert(DAG && "Missing context");
15087 LLVMContext &Ctxt = *DAG->getContext();
15088 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15091 /// Get the alignment of the load used for this slice.
15092 Align getAlign() const {
15093 Align Alignment = Origin->getAlign();
15094 uint64_t Offset = getOffsetFromBase();
15096 Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15100 /// Check if this slice can be rewritten with legal operations.
15101 bool isLegal() const {
15102 // An invalid slice is not legal.
15103 if (!Origin || !Inst || !DAG)
15106 // Offsets are for indexed load only, we do not handle that.
15107 if (!Origin->getOffset().isUndef())
15110 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15112 // Check that the type is legal.
15113 EVT SliceType = getLoadedType();
15114 if (!TLI.isTypeLegal(SliceType))
15117 // Check that the load is legal for this type.
15118 if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15121 // Check that the offset can be computed.
15122 // 1. Check its type.
15123 EVT PtrType = Origin->getBasePtr().getValueType();
15124 if (PtrType == MVT::Untyped || PtrType.isExtended())
15127 // 2. Check that it fits in the immediate.
15128 if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15131 // 3. Check that the computation is legal.
15132 if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15135 // Check that the zext is legal if it needs one.
15136 EVT TruncateType = Inst->getValueType(0);
15137 if (TruncateType != SliceType &&
15138 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15144 /// Get the offset in bytes of this slice in the original chunk of
15146 /// \pre DAG != nullptr.
15147 uint64_t getOffsetFromBase() const {
15148 assert(DAG && "Missing context.");
15149 bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15150 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
15151 uint64_t Offset = Shift / 8;
15152 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15153 assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
15154 "The size of the original loaded type is not a multiple of a"
15156 // If Offset is bigger than TySizeInBytes, it means we are loading all
15157 // zeros. This should have been optimized before in the process.
15158 assert(TySizeInBytes > Offset &&
15159 "Invalid shift amount for given loaded size");
15161 Offset = TySizeInBytes - Offset - getLoadedSize();
15165 /// Generate the sequence of instructions to load the slice
15166 /// represented by this object and redirect the uses of this slice to
15167 /// this new sequence of instructions.
15168 /// \pre this->Inst && this->Origin are valid Instructions and this
15169 /// object passed the legal check: LoadedSlice::isLegal returned true.
15170 /// \return The last instruction of the sequence used to load the slice.
15171 SDValue loadSlice() const {
15172 assert(Inst && Origin && "Unable to replace a non-existing slice.");
15173 const SDValue &OldBaseAddr = Origin->getBasePtr();
15174 SDValue BaseAddr = OldBaseAddr;
15175 // Get the offset in that chunk of bytes w.r.t. the endianness.
15176 int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15177 assert(Offset >= 0 && "Offset too big to fit in int64_t!");
15179 // BaseAddr = BaseAddr + Offset.
15180 EVT ArithType = BaseAddr.getValueType();
15182 BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15183 DAG->getConstant(Offset, DL, ArithType));
15186 // Create the type of the loaded slice according to its size.
15187 EVT SliceType = getLoadedType();
15189 // Create the load for the slice.
15191 DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15192 Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15193 Origin->getMemOperand()->getFlags());
15194 // If the final type is not the same as the loaded type, this means that
15195 // we have to pad with zero. Create a zero extend for that.
15196 EVT FinalType = Inst->getValueType(0);
15197 if (SliceType != FinalType)
15199 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15203 /// Check if this slice can be merged with an expensive cross register
15204 /// bank copy. E.g.,
15206 /// f = bitcast i32 i to float
15207 bool canMergeExpensiveCrossRegisterBankCopy() const {
15208 if (!Inst || !Inst->hasOneUse())
15210 SDNode *Use = *Inst->use_begin();
15211 if (Use->getOpcode() != ISD::BITCAST)
15213 assert(DAG && "Missing context");
15214 const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15215 EVT ResVT = Use->getValueType(0);
15216 const TargetRegisterClass *ResRC =
15217 TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15218 const TargetRegisterClass *ArgRC =
15219 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15220 Use->getOperand(0)->isDivergent());
15221 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
15224 // At this point, we know that we perform a cross-register-bank copy.
15225 // Check if it is expensive.
15226 const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15227 // Assume bitcasts are cheap, unless both register classes do not
15228 // explicitly share a common sub class.
15229 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
15232 // Check if it will be merged with the load.
15233 // 1. Check the alignment constraint.
15234 Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15235 ResVT.getTypeForEVT(*DAG->getContext()));
15237 if (RequiredAlignment > getAlign())
15240 // 2. Check that the load is a legal operation for that type.
15241 if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15244 // 3. Check that we do not have a zext in the way.
15245 if (Inst->getValueType(0) != getLoadedType())
15252 } // end anonymous namespace
15254 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
15255 /// \p UsedBits looks like 0..0 1..1 0..0.
15256 static bool areUsedBitsDense(const APInt &UsedBits) {
15257 // If all the bits are one, this is dense!
15258 if (UsedBits.isAllOnesValue())
15261 // Get rid of the unused bits on the right.
15262 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
15263 // Get rid of the unused bits on the left.
15264 if (NarrowedUsedBits.countLeadingZeros())
15265 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
15266 // Check that the chunk of bits is completely used.
15267 return NarrowedUsedBits.isAllOnesValue();
15270 /// Check whether or not \p First and \p Second are next to each other
15271 /// in memory. This means that there is no hole between the bits loaded
15272 /// by \p First and the bits loaded by \p Second.
15273 static bool areSlicesNextToEachOther(const LoadedSlice &First,
15274 const LoadedSlice &Second) {
15275 assert(First.Origin == Second.Origin && First.Origin &&
15276 "Unable to match different memory origins.");
15277 APInt UsedBits = First.getUsedBits();
15278 assert((UsedBits & Second.getUsedBits()) == 0 &&
15279 "Slices are not supposed to overlap.");
15280 UsedBits |= Second.getUsedBits();
15281 return areUsedBitsDense(UsedBits);
15284 /// Adjust the \p GlobalLSCost according to the target
15285 /// paring capabilities and the layout of the slices.
15286 /// \pre \p GlobalLSCost should account for at least as many loads as
15287 /// there is in the slices in \p LoadedSlices.
15288 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15289 LoadedSlice::Cost &GlobalLSCost) {
15290 unsigned NumberOfSlices = LoadedSlices.size();
15291 // If there is less than 2 elements, no pairing is possible.
15292 if (NumberOfSlices < 2)
15295 // Sort the slices so that elements that are likely to be next to each
15296 // other in memory are next to each other in the list.
15297 llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
15298 assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
15299 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
15301 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
15302 // First (resp. Second) is the first (resp. Second) potentially candidate
15303 // to be placed in a paired load.
15304 const LoadedSlice *First = nullptr;
15305 const LoadedSlice *Second = nullptr;
15306 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
15307 // Set the beginning of the pair.
15309 Second = &LoadedSlices[CurrSlice];
15311 // If First is NULL, it means we start a new pair.
15312 // Get to the next slice.
15316 EVT LoadedType = First->getLoadedType();
15318 // If the types of the slices are different, we cannot pair them.
15319 if (LoadedType != Second->getLoadedType())
15322 // Check if the target supplies paired loads for this type.
15323 Align RequiredAlignment;
15324 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
15325 // move to the next pair, this type is hopeless.
15329 // Check if we meet the alignment requirement.
15330 if (First->getAlign() < RequiredAlignment)
15333 // Check that both loads are next to each other in memory.
15334 if (!areSlicesNextToEachOther(*First, *Second))
15337 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
15338 --GlobalLSCost.Loads;
15339 // Move to the next pair.
15344 /// Check the profitability of all involved LoadedSlice.
15345 /// Currently, it is considered profitable if there is exactly two
15346 /// involved slices (1) which are (2) next to each other in memory, and
15347 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
15349 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
15350 /// the elements themselves.
15352 /// FIXME: When the cost model will be mature enough, we can relax
15353 /// constraints (1) and (2).
15354 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15355 const APInt &UsedBits, bool ForCodeSize) {
15356 unsigned NumberOfSlices = LoadedSlices.size();
15357 if (StressLoadSlicing)
15358 return NumberOfSlices > 1;
15361 if (NumberOfSlices != 2)
15365 if (!areUsedBitsDense(UsedBits))
15369 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
15370 // The original code has one big load.
15371 OrigCost.Loads = 1;
15372 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
15373 const LoadedSlice &LS = LoadedSlices[CurrSlice];
15374 // Accumulate the cost of all the slices.
15375 LoadedSlice::Cost SliceCost(LS, ForCodeSize);
15376 GlobalSlicingCost += SliceCost;
15378 // Account as cost in the original configuration the gain obtained
15379 // with the current slices.
15380 OrigCost.addSliceGain(LS);
15383 // If the target supports paired load, adjust the cost accordingly.
15384 adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
15385 return OrigCost > GlobalSlicingCost;
15388 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
15389 /// operations, split it in the various pieces being extracted.
15391 /// This sort of thing is introduced by SROA.
15392 /// This slicing takes care not to insert overlapping loads.
15393 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
15394 bool DAGCombiner::SliceUpLoad(SDNode *N) {
15395 if (Level < AfterLegalizeDAG)
15398 LoadSDNode *LD = cast<LoadSDNode>(N);
15399 if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
15400 !LD->getValueType(0).isInteger())
15403 // The algorithm to split up a load of a scalable vector into individual
15404 // elements currently requires knowing the length of the loaded type,
15405 // so will need adjusting to work on scalable vectors.
15406 if (LD->getValueType(0).isScalableVector())
15409 // Keep track of already used bits to detect overlapping values.
15410 // In that case, we will just abort the transformation.
15411 APInt UsedBits(LD->getValueSizeInBits(0), 0);
15413 SmallVector<LoadedSlice, 4> LoadedSlices;
15415 // Check if this load is used as several smaller chunks of bits.
15416 // Basically, look for uses in trunc or trunc(lshr) and record a new chain
15417 // of computation for each trunc.
15418 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
15419 UI != UIEnd; ++UI) {
15420 // Skip the uses of the chain.
15421 if (UI.getUse().getResNo() != 0)
15424 SDNode *User = *UI;
15425 unsigned Shift = 0;
15427 // Check if this is a trunc(lshr).
15428 if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
15429 isa<ConstantSDNode>(User->getOperand(1))) {
15430 Shift = User->getConstantOperandVal(1);
15431 User = *User->use_begin();
15434 // At this point, User is a Truncate, iff we encountered, trunc or
15436 if (User->getOpcode() != ISD::TRUNCATE)
15439 // The width of the type must be a power of 2 and greater than 8-bits.
15440 // Otherwise the load cannot be represented in LLVM IR.
15441 // Moreover, if we shifted with a non-8-bits multiple, the slice
15442 // will be across several bytes. We do not support that.
15443 unsigned Width = User->getValueSizeInBits(0);
15444 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
15447 // Build the slice for this chain of computations.
15448 LoadedSlice LS(User, LD, Shift, &DAG);
15449 APInt CurrentUsedBits = LS.getUsedBits();
15451 // Check if this slice overlaps with another.
15452 if ((CurrentUsedBits & UsedBits) != 0)
15454 // Update the bits used globally.
15455 UsedBits |= CurrentUsedBits;
15457 // Check if the new slice would be legal.
15461 // Record the slice.
15462 LoadedSlices.push_back(LS);
15465 // Abort slicing if it does not seem to be profitable.
15466 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
15471 // Rewrite each chain to use an independent load.
15472 // By construction, each chain can be represented by a unique load.
15474 // Prepare the argument for the new token factor for all the slices.
15475 SmallVector<SDValue, 8> ArgChains;
15476 for (SmallVectorImpl<LoadedSlice>::const_iterator
15477 LSIt = LoadedSlices.begin(),
15478 LSItEnd = LoadedSlices.end();
15479 LSIt != LSItEnd; ++LSIt) {
15480 SDValue SliceInst = LSIt->loadSlice();
15481 CombineTo(LSIt->Inst, SliceInst, true);
15482 if (SliceInst.getOpcode() != ISD::LOAD)
15483 SliceInst = SliceInst.getOperand(0);
15484 assert(SliceInst->getOpcode() == ISD::LOAD &&
15485 "It takes more than a zext to get to the loaded slice!!");
15486 ArgChains.push_back(SliceInst.getValue(1));
15489 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
15491 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15492 AddToWorklist(Chain.getNode());
15496 /// Check to see if V is (and load (ptr), imm), where the load is having
15497 /// specific bytes cleared out. If so, return the byte size being masked out
15498 /// and the shift amount.
15499 static std::pair<unsigned, unsigned>
15500 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
15501 std::pair<unsigned, unsigned> Result(0, 0);
15503 // Check for the structure we're looking for.
15504 if (V->getOpcode() != ISD::AND ||
15505 !isa<ConstantSDNode>(V->getOperand(1)) ||
15506 !ISD::isNormalLoad(V->getOperand(0).getNode()))
15509 // Check the chain and pointer.
15510 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
15511 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
15513 // This only handles simple types.
15514 if (V.getValueType() != MVT::i16 &&
15515 V.getValueType() != MVT::i32 &&
15516 V.getValueType() != MVT::i64)
15519 // Check the constant mask. Invert it so that the bits being masked out are
15520 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
15521 // follow the sign bit for uniformity.
15522 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
15523 unsigned NotMaskLZ = countLeadingZeros(NotMask);
15524 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
15525 unsigned NotMaskTZ = countTrailingZeros(NotMask);
15526 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
15527 if (NotMaskLZ == 64) return Result; // All zero mask.
15529 // See if we have a continuous run of bits. If so, we have 0*1+0*
15530 if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
15533 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
15534 if (V.getValueType() != MVT::i64 && NotMaskLZ)
15535 NotMaskLZ -= 64-V.getValueSizeInBits();
15537 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
15538 switch (MaskedBytes) {
15542 default: return Result; // All one mask, or 5-byte mask.
15545 // Verify that the first bit starts at a multiple of mask so that the access
15546 // is aligned the same as the access width.
15547 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
15549 // For narrowing to be valid, it must be the case that the load the
15550 // immediately preceding memory operation before the store.
15551 if (LD == Chain.getNode())
15553 else if (Chain->getOpcode() == ISD::TokenFactor &&
15554 SDValue(LD, 1).hasOneUse()) {
15555 // LD has only 1 chain use so they are no indirect dependencies.
15556 if (!LD->isOperandOf(Chain.getNode()))
15559 return Result; // Fail.
15561 Result.first = MaskedBytes;
15562 Result.second = NotMaskTZ/8;
15566 /// Check to see if IVal is something that provides a value as specified by
15567 /// MaskInfo. If so, replace the specified store with a narrower store of
15568 /// truncated IVal.
15570 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
15571 SDValue IVal, StoreSDNode *St,
15573 unsigned NumBytes = MaskInfo.first;
15574 unsigned ByteShift = MaskInfo.second;
15575 SelectionDAG &DAG = DC->getDAG();
15577 // Check to see if IVal is all zeros in the part being masked in by the 'or'
15578 // that uses this. If not, this is not a replacement.
15579 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
15580 ByteShift*8, (ByteShift+NumBytes)*8);
15581 if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
15583 // Check that it is legal on the target to do this. It is legal if the new
15584 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
15585 // legalization (and the target doesn't explicitly think this is a bad idea).
15586 MVT VT = MVT::getIntegerVT(NumBytes * 8);
15587 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15588 if (!DC->isTypeLegal(VT))
15590 if (St->getMemOperand() &&
15591 !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15592 *St->getMemOperand()))
15595 // Okay, we can do this! Replace the 'St' store with a store of IVal that is
15596 // shifted by ByteShift and truncated down to NumBytes.
15599 IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
15600 DAG.getConstant(ByteShift*8, DL,
15601 DC->getShiftAmountTy(IVal.getValueType())));
15604 // Figure out the offset for the store and the alignment of the access.
15606 unsigned NewAlign = St->getAlignment();
15608 if (DAG.getDataLayout().isLittleEndian())
15609 StOffset = ByteShift;
15611 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
15613 SDValue Ptr = St->getBasePtr();
15616 Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
15617 NewAlign = MinAlign(NewAlign, StOffset);
15620 // Truncate down to the new size.
15621 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
15625 .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
15626 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
15629 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
15630 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
15631 /// narrowing the load and store if it would end up being a win for performance
15633 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
15634 StoreSDNode *ST = cast<StoreSDNode>(N);
15635 if (!ST->isSimple())
15638 SDValue Chain = ST->getChain();
15639 SDValue Value = ST->getValue();
15640 SDValue Ptr = ST->getBasePtr();
15641 EVT VT = Value.getValueType();
15643 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
15646 unsigned Opc = Value.getOpcode();
15648 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
15649 // is a byte mask indicating a consecutive number of bytes, check to see if
15650 // Y is known to provide just those bytes. If so, we try to replace the
15651 // load + replace + store sequence with a single (narrower) store, which makes
15653 if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
15654 std::pair<unsigned, unsigned> MaskedLoad;
15655 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
15656 if (MaskedLoad.first)
15657 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15658 Value.getOperand(1), ST,this))
15661 // Or is commutative, so try swapping X and Y.
15662 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
15663 if (MaskedLoad.first)
15664 if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
15665 Value.getOperand(0), ST,this))
15669 if (!EnableReduceLoadOpStoreWidth)
15672 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
15673 Value.getOperand(1).getOpcode() != ISD::Constant)
15676 SDValue N0 = Value.getOperand(0);
15677 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15678 Chain == SDValue(N0.getNode(), 1)) {
15679 LoadSDNode *LD = cast<LoadSDNode>(N0);
15680 if (LD->getBasePtr() != Ptr ||
15681 LD->getPointerInfo().getAddrSpace() !=
15682 ST->getPointerInfo().getAddrSpace())
15685 // Find the type to narrow it the load / op / store to.
15686 SDValue N1 = Value.getOperand(1);
15687 unsigned BitWidth = N1.getValueSizeInBits();
15688 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
15689 if (Opc == ISD::AND)
15690 Imm ^= APInt::getAllOnesValue(BitWidth);
15691 if (Imm == 0 || Imm.isAllOnesValue())
15693 unsigned ShAmt = Imm.countTrailingZeros();
15694 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
15695 unsigned NewBW = NextPowerOf2(MSB - ShAmt);
15696 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15697 // The narrowing should be profitable, the load/store operation should be
15698 // legal (or custom) and the store size should be equal to the NewVT width.
15699 while (NewBW < BitWidth &&
15700 (NewVT.getStoreSizeInBits() != NewBW ||
15701 !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
15702 !TLI.isNarrowingProfitable(VT, NewVT))) {
15703 NewBW = NextPowerOf2(NewBW);
15704 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15706 if (NewBW >= BitWidth)
15709 // If the lsb changed does not start at the type bitwidth boundary,
15710 // start at the previous one.
15712 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15713 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15714 std::min(BitWidth, ShAmt + NewBW));
15715 if ((Imm & Mask) == Imm) {
15716 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15717 if (Opc == ISD::AND)
15718 NewImm ^= APInt::getAllOnesValue(NewBW);
15719 uint64_t PtrOff = ShAmt / 8;
15720 // For big endian targets, we need to adjust the offset to the pointer to
15721 // load the correct bytes.
15722 if (DAG.getDataLayout().isBigEndian())
15723 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15725 Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
15726 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15727 if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
15730 SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
15732 DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15733 LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15734 LD->getMemOperand()->getFlags(), LD->getAAInfo());
15735 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15736 DAG.getConstant(NewImm, SDLoc(Value),
15739 DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15740 ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15742 AddToWorklist(NewPtr.getNode());
15743 AddToWorklist(NewLD.getNode());
15744 AddToWorklist(NewVal.getNode());
15745 WorklistRemover DeadNodes(*this);
15746 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15755 /// For a given floating point load / store pair, if the load value isn't used
15756 /// by any other operations, then consider transforming the pair to integer
15757 /// load / store operations if the target deems the transformation profitable.
15758 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15759 StoreSDNode *ST = cast<StoreSDNode>(N);
15760 SDValue Value = ST->getValue();
15761 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15762 Value.hasOneUse()) {
15763 LoadSDNode *LD = cast<LoadSDNode>(Value);
15764 EVT VT = LD->getMemoryVT();
15765 if (!VT.isFloatingPoint() ||
15766 VT != ST->getMemoryVT() ||
15767 LD->isNonTemporal() ||
15768 ST->isNonTemporal() ||
15769 LD->getPointerInfo().getAddrSpace() != 0 ||
15770 ST->getPointerInfo().getAddrSpace() != 0)
15773 TypeSize VTSize = VT.getSizeInBits();
15775 // We don't know the size of scalable types at compile time so we cannot
15776 // create an integer of the equivalent size.
15777 if (VTSize.isScalable())
15780 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
15781 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15782 !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15783 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15784 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15787 Align LDAlign = LD->getAlign();
15788 Align STAlign = ST->getAlign();
15789 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15790 Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
15791 if (LDAlign < ABIAlign || STAlign < ABIAlign)
15795 DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15796 LD->getPointerInfo(), LDAlign);
15799 DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15800 ST->getPointerInfo(), STAlign);
15802 AddToWorklist(NewLD.getNode());
15803 AddToWorklist(NewST.getNode());
15804 WorklistRemover DeadNodes(*this);
15805 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15813 // This is a helper function for visitMUL to check the profitability
15814 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15815 // MulNode is the original multiply, AddNode is (add x, c1),
15816 // and ConstNode is c2.
15818 // If the (add x, c1) has multiple uses, we could increase
15819 // the number of adds if we make this transformation.
15820 // It would only be worth doing this if we can remove a
15821 // multiply in the process. Check for that here.
15825 // We're checking for cases where we have common "c3 * A" expressions.
15826 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15828 SDValue &ConstNode) {
15831 // If the add only has one use, this would be OK to do.
15832 if (AddNode.getNode()->hasOneUse())
15835 // Walk all the users of the constant with which we're multiplying.
15836 for (SDNode *Use : ConstNode->uses()) {
15837 if (Use == MulNode) // This use is the one we're on right now. Skip it.
15840 if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15842 SDNode *MulVar = AddNode.getOperand(0).getNode();
15844 // OtherOp is what we're multiplying against the constant.
15845 if (Use->getOperand(0) == ConstNode)
15846 OtherOp = Use->getOperand(1).getNode();
15848 OtherOp = Use->getOperand(0).getNode();
15850 // Check to see if multiply is with the same operand of our "add".
15852 // ConstNode = CONST
15853 // Use = ConstNode * A <-- visiting Use. OtherOp is A.
15855 // AddNode = (A + c1) <-- MulVar is A.
15856 // = AddNode * ConstNode <-- current visiting instruction.
15858 // If we make this transformation, we will have a common
15859 // multiply (ConstNode * A) that we can save.
15860 if (OtherOp == MulVar)
15863 // Now check to see if a future expansion will give us a common
15866 // ConstNode = CONST
15867 // AddNode = (A + c1)
15868 // ... = AddNode * ConstNode <-- current visiting instruction.
15870 // OtherOp = (A + c2)
15871 // Use = OtherOp * ConstNode <-- visiting Use.
15873 // If we make this transformation, we will have a common
15874 // multiply (CONST * A) after we also do the same transformation
15875 // to the "t2" instruction.
15876 if (OtherOp->getOpcode() == ISD::ADD &&
15877 DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15878 OtherOp->getOperand(0).getNode() == MulVar)
15883 // Didn't find a case where this would be profitable.
15887 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15888 unsigned NumStores) {
15889 SmallVector<SDValue, 8> Chains;
15890 SmallPtrSet<const SDNode *, 8> Visited;
15891 SDLoc StoreDL(StoreNodes[0].MemNode);
15893 for (unsigned i = 0; i < NumStores; ++i) {
15894 Visited.insert(StoreNodes[i].MemNode);
15897 // don't include nodes that are children or repeated nodes.
15898 for (unsigned i = 0; i < NumStores; ++i) {
15899 if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15900 Chains.push_back(StoreNodes[i].MemNode->getChain());
15903 assert(Chains.size() > 0 && "Chain should have generated a chain");
15904 return DAG.getTokenFactor(StoreDL, Chains);
15907 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
15908 SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15909 bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15910 // Make sure we have something to merge.
15914 // The latest Node in the DAG.
15915 SDLoc DL(StoreNodes[0].MemNode);
15917 TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
15918 unsigned SizeInBits = NumStores * ElementSizeBits;
15919 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15923 unsigned Elts = NumStores * NumMemElts;
15924 // Get the type for the merged vector store.
15925 StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15927 StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15931 if (IsConstantSrc) {
15932 SmallVector<SDValue, 8> BuildVector;
15933 for (unsigned I = 0; I != NumStores; ++I) {
15934 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15935 SDValue Val = St->getValue();
15936 // If constant is of the wrong type, convert it now.
15937 if (MemVT != Val.getValueType()) {
15938 Val = peekThroughBitcasts(Val);
15939 // Deal with constants of wrong size.
15940 if (ElementSizeBits != Val.getValueSizeInBits()) {
15942 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15943 if (isa<ConstantFPSDNode>(Val)) {
15944 // Not clear how to truncate FP values.
15946 } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15947 Val = DAG.getConstant(C->getAPIntValue()
15948 .zextOrTrunc(Val.getValueSizeInBits())
15949 .zextOrTrunc(ElementSizeBits),
15950 SDLoc(C), IntMemVT);
15952 // Make sure correctly size type is the correct type.
15953 Val = DAG.getBitcast(MemVT, Val);
15955 BuildVector.push_back(Val);
15957 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15958 : ISD::BUILD_VECTOR,
15959 DL, StoreTy, BuildVector);
15961 SmallVector<SDValue, 8> Ops;
15962 for (unsigned i = 0; i < NumStores; ++i) {
15963 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15964 SDValue Val = peekThroughBitcasts(St->getValue());
15965 // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15966 // type MemVT. If the underlying value is not the correct
15967 // type, but it is an extraction of an appropriate vector we
15968 // can recast Val to be of the correct type. This may require
15969 // converting between EXTRACT_VECTOR_ELT and
15970 // EXTRACT_SUBVECTOR.
15971 if ((MemVT != Val.getValueType()) &&
15972 (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15973 Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15974 EVT MemVTScalarTy = MemVT.getScalarType();
15975 // We may need to add a bitcast here to get types to line up.
15976 if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15977 Val = DAG.getBitcast(MemVT, Val);
15979 unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15980 : ISD::EXTRACT_VECTOR_ELT;
15981 SDValue Vec = Val.getOperand(0);
15982 SDValue Idx = Val.getOperand(1);
15983 Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15986 Ops.push_back(Val);
15989 // Build the extracted vector elements back into a vector.
15990 StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15991 : ISD::BUILD_VECTOR,
15995 // We should always use a vector store when merging extracted vector
15996 // elements, so this path implies a store of constants.
15997 assert(IsConstantSrc && "Merged vector elements should use vector store");
15999 APInt StoreInt(SizeInBits, 0);
16001 // Construct a single integer constant which is made of the smaller
16002 // constant inputs.
16003 bool IsLE = DAG.getDataLayout().isLittleEndian();
16004 for (unsigned i = 0; i < NumStores; ++i) {
16005 unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16006 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16008 SDValue Val = St->getValue();
16009 Val = peekThroughBitcasts(Val);
16010 StoreInt <<= ElementSizeBits;
16011 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16012 StoreInt |= C->getAPIntValue()
16013 .zextOrTrunc(ElementSizeBits)
16014 .zextOrTrunc(SizeInBits);
16015 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16016 StoreInt |= C->getValueAPF()
16018 .zextOrTrunc(ElementSizeBits)
16019 .zextOrTrunc(SizeInBits);
16020 // If fp truncation is necessary give up for now.
16021 if (MemVT.getSizeInBits() != ElementSizeBits)
16024 llvm_unreachable("Invalid constant element type");
16028 // Create the new Load and Store operations.
16029 StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16032 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16033 SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16035 // make sure we use trunc store if it's necessary to be legal.
16038 NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16039 FirstInChain->getPointerInfo(),
16040 FirstInChain->getAlignment());
16041 } else { // Must be realized as a trunc store
16042 EVT LegalizedStoredValTy =
16043 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16044 unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16045 ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16046 SDValue ExtendedStoreVal =
16047 DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16048 LegalizedStoredValTy);
16049 NewStore = DAG.getTruncStore(
16050 NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16051 FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16052 FirstInChain->getAlignment(),
16053 FirstInChain->getMemOperand()->getFlags());
16056 // Replace all merged stores with the new store.
16057 for (unsigned i = 0; i < NumStores; ++i)
16058 CombineTo(StoreNodes[i].MemNode, NewStore);
16060 AddToWorklist(NewChain.getNode());
16064 void DAGCombiner::getStoreMergeCandidates(
16065 StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16066 SDNode *&RootNode) {
16067 // This holds the base pointer, index, and the offset in bytes from the base
16069 BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16070 EVT MemVT = St->getMemoryVT();
16072 SDValue Val = peekThroughBitcasts(St->getValue());
16073 // We must have a base and an offset.
16074 if (!BasePtr.getBase().getNode())
16077 // Do not handle stores to undef base pointers.
16078 if (BasePtr.getBase().isUndef())
16081 StoreSource StoreSrc = getStoreSource(Val);
16082 assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
16083 BaseIndexOffset LBasePtr;
16084 // Match on loadbaseptr if relevant.
16086 if (StoreSrc == StoreSource::Load) {
16087 auto *Ld = cast<LoadSDNode>(Val);
16088 LBasePtr = BaseIndexOffset::match(Ld, DAG);
16089 LoadVT = Ld->getMemoryVT();
16090 // Load and store should be the same type.
16091 if (MemVT != LoadVT)
16093 // Loads must only have one use.
16094 if (!Ld->hasNUsesOfValue(1, 0))
16096 // The memory operands must not be volatile/indexed/atomic.
16097 // TODO: May be able to relax for unordered atomics (see D66309)
16098 if (!Ld->isSimple() || Ld->isIndexed())
16101 auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16102 int64_t &Offset) -> bool {
16103 // The memory operands must not be volatile/indexed/atomic.
16104 // TODO: May be able to relax for unordered atomics (see D66309)
16105 if (!Other->isSimple() || Other->isIndexed())
16107 // Don't mix temporal stores with non-temporal stores.
16108 if (St->isNonTemporal() != Other->isNonTemporal())
16110 SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16111 // Allow merging constants of different types as integers.
16112 bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16113 : Other->getMemoryVT() != MemVT;
16114 if (StoreSrc == StoreSource::Load) {
16117 // The Load's Base Ptr must also match
16118 if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
16119 BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16120 if (LoadVT != OtherLd->getMemoryVT())
16122 // Loads must only have one use.
16123 if (!OtherLd->hasNUsesOfValue(1, 0))
16125 // The memory operands must not be volatile/indexed/atomic.
16126 // TODO: May be able to relax for unordered atomics (see D66309)
16127 if (!OtherLd->isSimple() ||
16128 OtherLd->isIndexed())
16130 // Don't mix temporal loads with non-temporal loads.
16131 if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16133 if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16138 if (StoreSrc == StoreSource::Constant) {
16141 if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
16144 if (StoreSrc == StoreSource::Extract) {
16145 // Do not merge truncated stores here.
16146 if (Other->isTruncatingStore())
16148 if (!MemVT.bitsEq(OtherBC.getValueType()))
16150 if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16151 OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16154 Ptr = BaseIndexOffset::match(Other, DAG);
16155 return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16158 // Check if the pair of StoreNode and the RootNode already bail out many
16159 // times which is over the limit in dependence check.
16160 auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16161 SDNode *RootNode) -> bool {
16162 auto RootCount = StoreRootCountMap.find(StoreNode);
16163 if (RootCount != StoreRootCountMap.end() &&
16164 RootCount->second.first == RootNode &&
16165 RootCount->second.second > StoreMergeDependenceLimit)
16170 // We looking for a root node which is an ancestor to all mergable
16171 // stores. We search up through a load, to our root and then down
16172 // through all children. For instance we will find Store{1,2,3} if
16173 // St is Store1, Store2. or Store3 where the root is not a load
16174 // which always true for nonvolatile ops. TODO: Expand
16175 // the search to find all valid candidates through multiple layers of loads.
16178 // |-------|-------|
16179 // Load Load Store3
16183 // FIXME: We should be able to climb and
16184 // descend TokenFactors to find candidates as well.
16186 RootNode = St->getChain().getNode();
16188 unsigned NumNodesExplored = 0;
16189 if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16190 RootNode = Ldn->getChain().getNode();
16191 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16192 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16193 if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
16194 for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
16195 if (I2.getOperandNo() == 0)
16196 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
16197 BaseIndexOffset Ptr;
16199 if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16200 !OverLimitInDependenceCheck(OtherST, RootNode))
16201 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16204 for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16205 I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
16206 if (I.getOperandNo() == 0)
16207 if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
16208 BaseIndexOffset Ptr;
16210 if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
16211 !OverLimitInDependenceCheck(OtherST, RootNode))
16212 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
16216 // We need to check that merging these stores does not cause a loop in
16217 // the DAG. Any store candidate may depend on another candidate
16218 // indirectly through its operand (we already consider dependencies
16219 // through the chain). Check in parallel by searching up from
16220 // non-chain operands of candidates.
16221 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16222 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16223 SDNode *RootNode) {
16224 // FIXME: We should be able to truncate a full search of
16225 // predecessors by doing a BFS and keeping tabs the originating
16226 // stores from which worklist nodes come from in a similar way to
16227 // TokenFactor simplfication.
16229 SmallPtrSet<const SDNode *, 32> Visited;
16230 SmallVector<const SDNode *, 8> Worklist;
16232 // RootNode is a predecessor to all candidates so we need not search
16233 // past it. Add RootNode (peeking through TokenFactors). Do not count
16234 // these towards size check.
16236 Worklist.push_back(RootNode);
16237 while (!Worklist.empty()) {
16238 auto N = Worklist.pop_back_val();
16239 if (!Visited.insert(N).second)
16240 continue; // Already present in Visited.
16241 if (N->getOpcode() == ISD::TokenFactor) {
16242 for (SDValue Op : N->ops())
16243 Worklist.push_back(Op.getNode());
16247 // Don't count pruning nodes towards max.
16248 unsigned int Max = 1024 + Visited.size();
16249 // Search Ops of store candidates.
16250 for (unsigned i = 0; i < NumStores; ++i) {
16251 SDNode *N = StoreNodes[i].MemNode;
16252 // Of the 4 Store Operands:
16253 // * Chain (Op 0) -> We have already considered these
16254 // in candidate selection and can be
16256 // * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16257 // * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
16258 // but aren't necessarily fromt the same base node, so
16259 // cycles possible (e.g. via indexed store).
16260 // * (Op 3) -> Represents the pre or post-indexing offset (or undef for
16261 // non-indexed stores). Not constant on all targets (e.g. ARM)
16262 // and so can participate in a cycle.
16263 for (unsigned j = 1; j < N->getNumOperands(); ++j)
16264 Worklist.push_back(N->getOperand(j).getNode());
16266 // Search through DAG. We can stop early if we find a store node.
16267 for (unsigned i = 0; i < NumStores; ++i)
16268 if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
16270 // If the searching bail out, record the StoreNode and RootNode in the
16271 // StoreRootCountMap. If we have seen the pair many times over a limit,
16272 // we won't add the StoreNode into StoreNodes set again.
16273 if (Visited.size() >= Max) {
16274 auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
16275 if (RootCount.first == RootNode)
16276 RootCount.second++;
16278 RootCount = {RootNode, 1};
16286 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
16287 int64_t ElementSizeBytes) const {
16289 // Find a store past the width of the first store.
16290 size_t StartIdx = 0;
16291 while ((StartIdx + 1 < StoreNodes.size()) &&
16292 StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
16293 StoreNodes[StartIdx + 1].OffsetFromBase)
16296 // Bail if we don't have enough candidates to merge.
16297 if (StartIdx + 1 >= StoreNodes.size())
16300 // Trim stores that overlapped with the first store.
16302 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
16304 // Scan the memory operations on the chain and find the first
16305 // non-consecutive store memory address.
16306 unsigned NumConsecutiveStores = 1;
16307 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16308 // Check that the addresses are consecutive starting from the second
16309 // element in the list of stores.
16310 for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
16311 int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
16312 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16314 NumConsecutiveStores = i + 1;
16316 if (NumConsecutiveStores > 1)
16317 return NumConsecutiveStores;
16319 // There are no consecutive stores at the start of the list.
16320 // Remove the first store and try again.
16321 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
16325 bool DAGCombiner::tryStoreMergeOfConstants(
16326 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16327 EVT MemVT, SDNode *RootNode, bool AllowVectors) {
16328 LLVMContext &Context = *DAG.getContext();
16329 const DataLayout &DL = DAG.getDataLayout();
16330 int64_t ElementSizeBytes = MemVT.getStoreSize();
16331 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16332 bool MadeChange = false;
16334 // Store the constants into memory as one consecutive store.
16335 while (NumConsecutiveStores >= 2) {
16336 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16337 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16338 unsigned FirstStoreAlign = FirstInChain->getAlignment();
16339 unsigned LastLegalType = 1;
16340 unsigned LastLegalVectorType = 1;
16341 bool LastIntegerTrunc = false;
16342 bool NonZero = false;
16343 unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
16344 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16345 StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
16346 SDValue StoredVal = ST->getValue();
16347 bool IsElementZero = false;
16348 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
16349 IsElementZero = C->isNullValue();
16350 else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
16351 IsElementZero = C->getConstantFPValue()->isNullValue();
16352 if (IsElementZero) {
16353 if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
16354 FirstZeroAfterNonZero = i;
16356 NonZero |= !IsElementZero;
16358 // Find a legal type for the constant store.
16359 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16360 EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16361 bool IsFast = false;
16363 // Break early when size is too large to be legal.
16364 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16367 if (TLI.isTypeLegal(StoreTy) &&
16368 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16369 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16370 *FirstInChain->getMemOperand(), &IsFast) &&
16372 LastIntegerTrunc = false;
16373 LastLegalType = i + 1;
16374 // Or check whether a truncstore is legal.
16375 } else if (TLI.getTypeAction(Context, StoreTy) ==
16376 TargetLowering::TypePromoteInteger) {
16377 EVT LegalizedStoredValTy =
16378 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
16379 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16380 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16381 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16382 *FirstInChain->getMemOperand(), &IsFast) &&
16384 LastIntegerTrunc = true;
16385 LastLegalType = i + 1;
16389 // We only use vectors if the constant is known to be zero or the
16390 // target allows it and the function is not marked with the
16391 // noimplicitfloat attribute.
16393 TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
16395 // Find a legal type for the vector store.
16396 unsigned Elts = (i + 1) * NumMemElts;
16397 EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16398 if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
16399 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16400 TLI.allowsMemoryAccess(Context, DL, Ty,
16401 *FirstInChain->getMemOperand(), &IsFast) &&
16403 LastLegalVectorType = i + 1;
16407 bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
16408 unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
16410 // Check if we found a legal integer type that creates a meaningful
16413 // We know that candidate stores are in order and of correct
16414 // shape. While there is no mergeable sequence from the
16415 // beginning one may start later in the sequence. The only
16416 // reason a merge of size N could have failed where another of
16417 // the same size would not have, is if the alignment has
16418 // improved or we've dropped a non-zero value. Drop as many
16419 // candidates as we can here.
16420 unsigned NumSkip = 1;
16421 while ((NumSkip < NumConsecutiveStores) &&
16422 (NumSkip < FirstZeroAfterNonZero) &&
16423 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16426 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16427 NumConsecutiveStores -= NumSkip;
16431 // Check that we can merge these candidates without causing a cycle.
16432 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16434 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16435 NumConsecutiveStores -= NumElem;
16439 MadeChange |= mergeStoresOfConstantsOrVecElts(
16440 StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
16442 // Remove merged stores for next iteration.
16443 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16444 NumConsecutiveStores -= NumElem;
16449 bool DAGCombiner::tryStoreMergeOfExtracts(
16450 SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16451 EVT MemVT, SDNode *RootNode) {
16452 LLVMContext &Context = *DAG.getContext();
16453 const DataLayout &DL = DAG.getDataLayout();
16454 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16455 bool MadeChange = false;
16457 // Loop on Consecutive Stores on success.
16458 while (NumConsecutiveStores >= 2) {
16459 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16460 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16461 unsigned FirstStoreAlign = FirstInChain->getAlignment();
16462 unsigned NumStoresToMerge = 1;
16463 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16464 // Find a legal type for the vector store.
16465 unsigned Elts = (i + 1) * NumMemElts;
16466 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16467 bool IsFast = false;
16469 // Break early when size is too large to be legal.
16470 if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16473 if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16474 TLI.allowsMemoryAccess(Context, DL, Ty,
16475 *FirstInChain->getMemOperand(), &IsFast) &&
16477 NumStoresToMerge = i + 1;
16480 // Check if we found a legal integer type creating a meaningful
16482 if (NumStoresToMerge < 2) {
16483 // We know that candidate stores are in order and of correct
16484 // shape. While there is no mergeable sequence from the
16485 // beginning one may start later in the sequence. The only
16486 // reason a merge of size N could have failed where another of
16487 // the same size would not have, is if the alignment has
16488 // improved. Drop as many candidates as we can here.
16489 unsigned NumSkip = 1;
16490 while ((NumSkip < NumConsecutiveStores) &&
16491 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16494 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16495 NumConsecutiveStores -= NumSkip;
16499 // Check that we can merge these candidates without causing a cycle.
16500 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
16502 StoreNodes.erase(StoreNodes.begin(),
16503 StoreNodes.begin() + NumStoresToMerge);
16504 NumConsecutiveStores -= NumStoresToMerge;
16508 MadeChange |= mergeStoresOfConstantsOrVecElts(
16509 StoreNodes, MemVT, NumStoresToMerge, false, true, false);
16511 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
16512 NumConsecutiveStores -= NumStoresToMerge;
16517 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
16518 unsigned NumConsecutiveStores, EVT MemVT,
16519 SDNode *RootNode, bool AllowVectors,
16520 bool IsNonTemporalStore,
16521 bool IsNonTemporalLoad) {
16522 LLVMContext &Context = *DAG.getContext();
16523 const DataLayout &DL = DAG.getDataLayout();
16524 int64_t ElementSizeBytes = MemVT.getStoreSize();
16525 unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16526 bool MadeChange = false;
16528 int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16530 // Look for load nodes which are used by the stored values.
16531 SmallVector<MemOpLink, 8> LoadNodes;
16533 // Find acceptable loads. Loads need to have the same chain (token factor),
16534 // must not be zext, volatile, indexed, and they must be consecutive.
16535 BaseIndexOffset LdBasePtr;
16537 for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16538 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16539 SDValue Val = peekThroughBitcasts(St->getValue());
16540 LoadSDNode *Ld = cast<LoadSDNode>(Val);
16542 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
16543 // If this is not the first ptr that we check.
16544 int64_t LdOffset = 0;
16545 if (LdBasePtr.getBase().getNode()) {
16546 // The base ptr must be the same.
16547 if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
16550 // Check that all other base pointers are the same as this one.
16554 // We found a potential memory operand to merge.
16555 LoadNodes.push_back(MemOpLink(Ld, LdOffset));
16558 while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
16559 Align RequiredAlignment;
16560 bool NeedRotate = false;
16561 if (LoadNodes.size() == 2) {
16562 // If we have load/store pair instructions and we only have two values,
16563 // don't bother merging.
16564 if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
16565 StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
16566 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
16567 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
16570 // If the loads are reversed, see if we can rotate the halves into place.
16571 int64_t Offset0 = LoadNodes[0].OffsetFromBase;
16572 int64_t Offset1 = LoadNodes[1].OffsetFromBase;
16573 EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
16574 if (Offset0 - Offset1 == ElementSizeBytes &&
16575 (hasOperation(ISD::ROTL, PairVT) ||
16576 hasOperation(ISD::ROTR, PairVT))) {
16577 std::swap(LoadNodes[0], LoadNodes[1]);
16581 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16582 unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16583 unsigned FirstStoreAlign = FirstInChain->getAlignment();
16584 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
16586 // Scan the memory operations on the chain and find the first
16587 // non-consecutive load memory address. These variables hold the index in
16588 // the store node array.
16590 unsigned LastConsecutiveLoad = 1;
16592 // This variable refers to the size and not index in the array.
16593 unsigned LastLegalVectorType = 1;
16594 unsigned LastLegalIntegerType = 1;
16595 bool isDereferenceable = true;
16596 bool DoIntegerTruncate = false;
16597 StartAddress = LoadNodes[0].OffsetFromBase;
16598 SDValue LoadChain = FirstLoad->getChain();
16599 for (unsigned i = 1; i < LoadNodes.size(); ++i) {
16600 // All loads must share the same chain.
16601 if (LoadNodes[i].MemNode->getChain() != LoadChain)
16604 int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
16605 if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16607 LastConsecutiveLoad = i;
16609 if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
16610 isDereferenceable = false;
16612 // Find a legal type for the vector store.
16613 unsigned Elts = (i + 1) * NumMemElts;
16614 EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16616 // Break early when size is too large to be legal.
16617 if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16620 bool IsFastSt = false;
16621 bool IsFastLd = false;
16622 if (TLI.isTypeLegal(StoreTy) &&
16623 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16624 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16625 *FirstInChain->getMemOperand(), &IsFastSt) &&
16627 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16628 *FirstLoad->getMemOperand(), &IsFastLd) &&
16630 LastLegalVectorType = i + 1;
16633 // Find a legal type for the integer store.
16634 unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16635 StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16636 if (TLI.isTypeLegal(StoreTy) &&
16637 TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16638 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16639 *FirstInChain->getMemOperand(), &IsFastSt) &&
16641 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16642 *FirstLoad->getMemOperand(), &IsFastLd) &&
16644 LastLegalIntegerType = i + 1;
16645 DoIntegerTruncate = false;
16646 // Or check whether a truncstore and extload is legal.
16647 } else if (TLI.getTypeAction(Context, StoreTy) ==
16648 TargetLowering::TypePromoteInteger) {
16649 EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
16650 if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16651 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16652 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16653 TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
16654 TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
16655 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16656 *FirstInChain->getMemOperand(), &IsFastSt) &&
16658 TLI.allowsMemoryAccess(Context, DL, StoreTy,
16659 *FirstLoad->getMemOperand(), &IsFastLd) &&
16661 LastLegalIntegerType = i + 1;
16662 DoIntegerTruncate = true;
16667 // Only use vector types if the vector type is larger than the integer
16668 // type. If they are the same, use integers.
16670 LastLegalVectorType > LastLegalIntegerType && AllowVectors;
16671 unsigned LastLegalType =
16672 std::max(LastLegalVectorType, LastLegalIntegerType);
16674 // We add +1 here because the LastXXX variables refer to location while
16675 // the NumElem refers to array/index size.
16676 unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
16677 NumElem = std::min(LastLegalType, NumElem);
16678 unsigned FirstLoadAlign = FirstLoad->getAlignment();
16681 // We know that candidate stores are in order and of correct
16682 // shape. While there is no mergeable sequence from the
16683 // beginning one may start later in the sequence. The only
16684 // reason a merge of size N could have failed where another of
16685 // the same size would not have is if the alignment or either
16686 // the load or store has improved. Drop as many candidates as we
16688 unsigned NumSkip = 1;
16689 while ((NumSkip < LoadNodes.size()) &&
16690 (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
16691 (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16693 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16694 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16695 NumConsecutiveStores -= NumSkip;
16699 // Check that we can merge these candidates without causing a cycle.
16700 if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16702 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16703 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16704 NumConsecutiveStores -= NumElem;
16708 // Find if it is better to use vectors or integers to load and store
16712 // Find a legal type for the vector store.
16713 unsigned Elts = NumElem * NumMemElts;
16714 JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16716 unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16717 JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16720 SDLoc LoadDL(LoadNodes[0].MemNode);
16721 SDLoc StoreDL(StoreNodes[0].MemNode);
16723 // The merged loads are required to have the same incoming chain, so
16724 // using the first's chain is acceptable.
16726 SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16727 AddToWorklist(NewStoreChain.getNode());
16729 MachineMemOperand::Flags LdMMOFlags =
16730 isDereferenceable ? MachineMemOperand::MODereferenceable
16731 : MachineMemOperand::MONone;
16732 if (IsNonTemporalLoad)
16733 LdMMOFlags |= MachineMemOperand::MONonTemporal;
16735 MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
16736 ? MachineMemOperand::MONonTemporal
16737 : MachineMemOperand::MONone;
16739 SDValue NewLoad, NewStore;
16740 if (UseVectorTy || !DoIntegerTruncate) {
16741 NewLoad = DAG.getLoad(
16742 JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
16743 FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
16744 SDValue StoreOp = NewLoad;
16746 unsigned LoadWidth = ElementSizeBytes * 8 * 2;
16747 assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
16748 "Unexpected type for rotate-able load pair");
16750 DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
16751 // Target can convert to the identical ROTR if it does not have ROTL.
16752 StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
16754 NewStore = DAG.getStore(
16755 NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
16756 FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16757 } else { // This must be the truncstore/extload case
16759 TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16760 NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16761 FirstLoad->getChain(), FirstLoad->getBasePtr(),
16762 FirstLoad->getPointerInfo(), JointMemOpVT,
16763 FirstLoadAlign, LdMMOFlags);
16764 NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16765 FirstInChain->getBasePtr(),
16766 FirstInChain->getPointerInfo(), JointMemOpVT,
16767 FirstInChain->getAlignment(),
16768 FirstInChain->getMemOperand()->getFlags());
16771 // Transfer chain users from old loads to the new load.
16772 for (unsigned i = 0; i < NumElem; ++i) {
16773 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16774 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16775 SDValue(NewLoad.getNode(), 1));
16778 // Replace all stores with the new store. Recursively remove corresponding
16779 // values if they are no longer used.
16780 for (unsigned i = 0; i < NumElem; ++i) {
16781 SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16782 CombineTo(StoreNodes[i].MemNode, NewStore);
16783 if (Val.getNode()->use_empty())
16784 recursivelyDeleteUnusedNodes(Val.getNode());
16788 StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16789 LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16790 NumConsecutiveStores -= NumElem;
16795 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
16796 if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
16799 // TODO: Extend this function to merge stores of scalable vectors.
16800 // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
16801 // store since we know <vscale x 16 x i8> is exactly twice as large as
16802 // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
16803 EVT MemVT = St->getMemoryVT();
16804 if (MemVT.isScalableVector())
16806 if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
16809 // This function cannot currently deal with non-byte-sized memory sizes.
16810 int64_t ElementSizeBytes = MemVT.getStoreSize();
16811 if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
16814 // Do not bother looking at stored values that are not constants, loads, or
16815 // extracted vector elements.
16816 SDValue StoredVal = peekThroughBitcasts(St->getValue());
16817 const StoreSource StoreSrc = getStoreSource(StoredVal);
16818 if (StoreSrc == StoreSource::Unknown)
16821 SmallVector<MemOpLink, 8> StoreNodes;
16823 // Find potential store merge candidates by searching through chain sub-DAG
16824 getStoreMergeCandidates(St, StoreNodes, RootNode);
16826 // Check if there is anything to merge.
16827 if (StoreNodes.size() < 2)
16830 // Sort the memory operands according to their distance from the
16832 llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
16833 return LHS.OffsetFromBase < RHS.OffsetFromBase;
16836 bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
16837 Attribute::NoImplicitFloat);
16838 bool IsNonTemporalStore = St->isNonTemporal();
16839 bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
16840 cast<LoadSDNode>(StoredVal)->isNonTemporal();
16842 // Store Merge attempts to merge the lowest stores. This generally
16843 // works out as if successful, as the remaining stores are checked
16844 // after the first collection of stores is merged. However, in the
16845 // case that a non-mergeable store is found first, e.g., {p[-2],
16846 // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
16847 // mergeable cases. To prevent this, we prune such stores from the
16848 // front of StoreNodes here.
16849 bool MadeChange = false;
16850 while (StoreNodes.size() > 1) {
16851 unsigned NumConsecutiveStores =
16852 getConsecutiveStores(StoreNodes, ElementSizeBytes);
16853 // There are no more stores in the list to examine.
16854 if (NumConsecutiveStores == 0)
16857 // We have at least 2 consecutive stores. Try to merge them.
16858 assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
16859 switch (StoreSrc) {
16860 case StoreSource::Constant:
16861 MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
16862 MemVT, RootNode, AllowVectors);
16865 case StoreSource::Extract:
16866 MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
16870 case StoreSource::Load:
16871 MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
16872 MemVT, RootNode, AllowVectors,
16873 IsNonTemporalStore, IsNonTemporalLoad);
16877 llvm_unreachable("Unhandled store source type");
16883 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16887 // Replace the chain to avoid dependency.
16888 if (ST->isTruncatingStore()) {
16889 ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16890 ST->getBasePtr(), ST->getMemoryVT(),
16891 ST->getMemOperand());
16893 ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16894 ST->getMemOperand());
16897 // Create token to keep both nodes around.
16898 SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16899 MVT::Other, ST->getChain(), ReplStore);
16901 // Make sure the new and old chains are cleaned up.
16902 AddToWorklist(Token.getNode());
16904 // Don't add users to work list.
16905 return CombineTo(ST, Token, false);
16908 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16909 SDValue Value = ST->getValue();
16910 if (Value.getOpcode() == ISD::TargetConstantFP)
16913 if (!ISD::isNormalStore(ST))
16918 SDValue Chain = ST->getChain();
16919 SDValue Ptr = ST->getBasePtr();
16921 const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16923 // NOTE: If the original store is volatile, this transform must not increase
16924 // the number of stores. For example, on x86-32 an f64 can be stored in one
16925 // processor operation but an i64 (which is not legal) requires two. So the
16926 // transform should not be done in this case.
16929 switch (CFP->getSimpleValueType(0).SimpleTy) {
16931 llvm_unreachable("Unknown FP type");
16932 case MVT::f16: // We don't do this for these yet.
16938 if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
16939 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16941 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16942 bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16944 return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16949 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16951 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16953 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16954 getZExtValue(), SDLoc(CFP), MVT::i64);
16955 return DAG.getStore(Chain, DL, Tmp,
16956 Ptr, ST->getMemOperand());
16959 if (ST->isSimple() &&
16960 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16961 // Many FP stores are not made apparent until after legalize, e.g. for
16962 // argument passing. Since this is so common, custom legalize the
16963 // 64-bit integer store into two 32-bit stores.
16964 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16965 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16966 SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16967 if (DAG.getDataLayout().isBigEndian())
16970 unsigned Alignment = ST->getAlignment();
16971 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16972 AAMDNodes AAInfo = ST->getAAInfo();
16974 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16975 ST->getAlignment(), MMOFlags, AAInfo);
16976 Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
16977 Alignment = MinAlign(Alignment, 4U);
16978 SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16979 ST->getPointerInfo().getWithOffset(4),
16980 Alignment, MMOFlags, AAInfo);
16981 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16989 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16990 StoreSDNode *ST = cast<StoreSDNode>(N);
16991 SDValue Chain = ST->getChain();
16992 SDValue Value = ST->getValue();
16993 SDValue Ptr = ST->getBasePtr();
16995 // If this is a store of a bit convert, store the input value if the
16996 // resultant store does not need a higher alignment than the original.
16997 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16998 ST->isUnindexed()) {
16999 EVT SVT = Value.getOperand(0).getValueType();
17000 // If the store is volatile, we only want to change the store type if the
17001 // resulting store is legal. Otherwise we might increase the number of
17002 // memory accesses. We don't care if the original type was legal or not
17003 // as we assume software couldn't rely on the number of accesses of an
17005 // TODO: May be able to relax for unordered atomics (see D66309)
17006 if (((!LegalOperations && ST->isSimple()) ||
17007 TLI.isOperationLegal(ISD::STORE, SVT)) &&
17008 TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17009 DAG, *ST->getMemOperand())) {
17010 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17011 ST->getMemOperand());
17015 // Turn 'store undef, Ptr' -> nothing.
17016 if (Value.isUndef() && ST->isUnindexed())
17019 // Try to infer better alignment information than the store already has.
17020 if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17021 if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17022 if (*Alignment > ST->getAlign() &&
17023 isAligned(*Alignment, ST->getSrcValueOffset())) {
17025 DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17026 ST->getMemoryVT(), *Alignment,
17027 ST->getMemOperand()->getFlags(), ST->getAAInfo());
17028 // NewStore will always be N as we are only refining the alignment
17029 assert(NewStore.getNode() == N);
17035 // Try transforming a pair floating point load / store ops to integer
17036 // load / store ops.
17037 if (SDValue NewST = TransformFPLoadStorePair(N))
17040 // Try transforming several stores into STORE (BSWAP).
17041 if (SDValue Store = MatchStoreCombine(ST))
17044 if (ST->isUnindexed()) {
17045 // Walk up chain skipping non-aliasing memory nodes, on this store and any
17046 // adjacent stores.
17047 if (findBetterNeighborChains(ST)) {
17048 // replaceStoreChain uses CombineTo, which handled all of the worklist
17049 // manipulation. Return the original node to not do anything else.
17050 return SDValue(ST, 0);
17052 Chain = ST->getChain();
17055 // FIXME: is there such a thing as a truncating indexed store?
17056 if (ST->isTruncatingStore() && ST->isUnindexed() &&
17057 Value.getValueType().isInteger() &&
17058 (!isa<ConstantSDNode>(Value) ||
17059 !cast<ConstantSDNode>(Value)->isOpaque())) {
17060 APInt TruncDemandedBits =
17061 APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17062 ST->getMemoryVT().getScalarSizeInBits());
17064 // See if we can simplify the input to this truncstore with knowledge that
17065 // only the low bits are being used. For example:
17066 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
17067 AddToWorklist(Value.getNode());
17068 if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17069 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17070 ST->getMemOperand());
17072 // Otherwise, see if we can simplify the operation with
17073 // SimplifyDemandedBits, which only works if the value has a single use.
17074 if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17075 // Re-visit the store if anything changed and the store hasn't been merged
17076 // with another node (N is deleted) SimplifyDemandedBits will add Value's
17077 // node back to the worklist if necessary, but we also need to re-visit
17078 // the Store node itself.
17079 if (N->getOpcode() != ISD::DELETED_NODE)
17081 return SDValue(N, 0);
17085 // If this is a load followed by a store to the same location, then the store
17087 // TODO: Can relax for unordered atomics (see D66309)
17088 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17089 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17090 ST->isUnindexed() && ST->isSimple() &&
17091 // There can't be any side effects between the load and store, such as
17092 // a call or store.
17093 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17094 // The store is dead, remove it.
17099 // TODO: Can relax for unordered atomics (see D66309)
17100 if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17101 if (ST->isUnindexed() && ST->isSimple() &&
17102 ST1->isUnindexed() && ST1->isSimple()) {
17103 if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17104 ST->getMemoryVT() == ST1->getMemoryVT()) {
17105 // If this is a store followed by a store with the same value to the
17106 // same location, then the store is dead/noop.
17110 if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17111 !ST1->getBasePtr().isUndef() &&
17112 // BaseIndexOffset and the code below requires knowing the size
17113 // of a vector, so bail out if MemoryVT is scalable.
17114 !ST1->getMemoryVT().isScalableVector()) {
17115 const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17116 const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17117 unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
17118 unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
17119 // If this is a store who's preceding store to a subset of the current
17120 // location and no one other node is chained to that store we can
17121 // effectively drop the store. Do not remove stores to undef as they may
17122 // be used as data sinks.
17123 if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17124 CombineTo(ST1, ST1->getChain());
17131 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17132 // truncating store. We can do this even if this is already a truncstore.
17133 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17134 && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17135 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17136 ST->getMemoryVT())) {
17137 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17138 Ptr, ST->getMemoryVT(), ST->getMemOperand());
17141 // Always perform this optimization before types are legal. If the target
17142 // prefers, also try this after legalization to catch stores that were created
17143 // by intrinsics or other nodes.
17144 if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17146 // There can be multiple store sequences on the same chain.
17147 // Keep trying to merge store sequences until we are unable to do so
17148 // or until we merge the last store on the chain.
17149 bool Changed = mergeConsecutiveStores(ST);
17150 if (!Changed) break;
17151 // Return N as merge only uses CombineTo and no worklist clean
17152 // up is necessary.
17153 if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17154 return SDValue(N, 0);
17158 // Try transforming N to an indexed store.
17159 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17160 return SDValue(N, 0);
17162 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17164 // Make sure to do this only after attempting to merge stores in order to
17165 // avoid changing the types of some subset of stores due to visit order,
17166 // preventing their merging.
17167 if (isa<ConstantFPSDNode>(ST->getValue())) {
17168 if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17172 if (SDValue NewSt = splitMergedValStore(ST))
17175 return ReduceLoadOpStoreWidth(N);
17178 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17179 const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17180 if (!LifetimeEnd->hasOffset())
17183 const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17184 LifetimeEnd->getOffset(), false);
17186 // We walk up the chains to find stores.
17187 SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17188 while (!Chains.empty()) {
17189 SDValue Chain = Chains.back();
17191 if (!Chain.hasOneUse())
17193 switch (Chain.getOpcode()) {
17194 case ISD::TokenFactor:
17195 for (unsigned Nops = Chain.getNumOperands(); Nops;)
17196 Chains.push_back(Chain.getOperand(--Nops));
17198 case ISD::LIFETIME_START:
17199 case ISD::LIFETIME_END:
17200 // We can forward past any lifetime start/end that can be proven not to
17202 if (!isAlias(Chain.getNode(), N))
17203 Chains.push_back(Chain.getOperand(0));
17206 StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17207 // TODO: Can relax for unordered atomics (see D66309)
17208 if (!ST->isSimple() || ST->isIndexed())
17210 const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17211 // If we store purely within object bounds just before its lifetime ends,
17212 // we can remove the store.
17213 if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17214 ST->getMemoryVT().getStoreSizeInBits())) {
17215 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
17216 dbgs() << "\nwithin LIFETIME_END of : ";
17217 LifetimeEndBase.dump(); dbgs() << "\n");
17218 CombineTo(ST, ST->getChain());
17219 return SDValue(N, 0);
17227 /// For the instruction sequence of store below, F and I values
17228 /// are bundled together as an i64 value before being stored into memory.
17229 /// Sometimes it is more efficent to generate separate stores for F and I,
17230 /// which can remove the bitwise instructions or sink them to colder places.
17232 /// (store (or (zext (bitcast F to i32) to i64),
17233 /// (shl (zext I to i64), 32)), addr) -->
17234 /// (store F, addr) and (store I, addr+4)
17236 /// Similarly, splitting for other merged store can also be beneficial, like:
17237 /// For pair of {i32, i32}, i64 store --> two i32 stores.
17238 /// For pair of {i32, i16}, i64 store --> two i32 stores.
17239 /// For pair of {i16, i16}, i32 store --> two i16 stores.
17240 /// For pair of {i16, i8}, i32 store --> two i16 stores.
17241 /// For pair of {i8, i8}, i16 store --> two i8 stores.
17243 /// We allow each target to determine specifically which kind of splitting is
17246 /// The store patterns are commonly seen from the simple code snippet below
17247 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17248 /// void goo(const std::pair<int, float> &);
17251 /// goo(std::make_pair(tmp, ftmp));
17255 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
17256 if (OptLevel == CodeGenOpt::None)
17259 // Can't change the number of memory accesses for a volatile store or break
17260 // atomicity for an atomic one.
17261 if (!ST->isSimple())
17264 SDValue Val = ST->getValue();
17267 // Match OR operand.
17268 if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
17271 // Match SHL operand and get Lower and Higher parts of Val.
17272 SDValue Op1 = Val.getOperand(0);
17273 SDValue Op2 = Val.getOperand(1);
17275 if (Op1.getOpcode() != ISD::SHL) {
17276 std::swap(Op1, Op2);
17277 if (Op1.getOpcode() != ISD::SHL)
17281 Hi = Op1.getOperand(0);
17282 if (!Op1.hasOneUse())
17285 // Match shift amount to HalfValBitSize.
17286 unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
17287 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
17288 if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
17291 // Lo and Hi are zero-extended from int with size less equal than 32
17293 if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
17294 !Lo.getOperand(0).getValueType().isScalarInteger() ||
17295 Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
17296 Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
17297 !Hi.getOperand(0).getValueType().isScalarInteger() ||
17298 Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
17301 // Use the EVT of low and high parts before bitcast as the input
17302 // of target query.
17303 EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
17304 ? Lo.getOperand(0).getValueType()
17305 : Lo.getValueType();
17306 EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
17307 ? Hi.getOperand(0).getValueType()
17308 : Hi.getValueType();
17309 if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
17312 // Start to split store.
17313 unsigned Alignment = ST->getAlignment();
17314 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17315 AAMDNodes AAInfo = ST->getAAInfo();
17317 // Change the sizes of Lo and Hi's value types to HalfValBitSize.
17318 EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
17319 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
17320 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
17322 SDValue Chain = ST->getChain();
17323 SDValue Ptr = ST->getBasePtr();
17324 // Lower value store.
17325 SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17326 ST->getAlignment(), MMOFlags, AAInfo);
17327 Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
17328 // Higher value store.
17330 DAG.getStore(St0, DL, Hi, Ptr,
17331 ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
17332 Alignment / 2, MMOFlags, AAInfo);
17336 /// Convert a disguised subvector insertion into a shuffle:
17337 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
17338 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
17339 "Expected extract_vector_elt");
17340 SDValue InsertVal = N->getOperand(1);
17341 SDValue Vec = N->getOperand(0);
17343 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
17345 // --> (vector_shuffle X, Y) and variations where shuffle operands may be
17347 if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
17348 InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17349 isa<ConstantSDNode>(InsertVal.getOperand(1))) {
17350 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
17351 ArrayRef<int> Mask = SVN->getMask();
17353 SDValue X = Vec.getOperand(0);
17354 SDValue Y = Vec.getOperand(1);
17356 // Vec's operand 0 is using indices from 0 to N-1 and
17357 // operand 1 from N to 2N - 1, where N is the number of
17358 // elements in the vectors.
17359 SDValue InsertVal0 = InsertVal.getOperand(0);
17360 int ElementOffset = -1;
17362 // We explore the inputs of the shuffle in order to see if we find the
17363 // source of the extract_vector_elt. If so, we can use it to modify the
17364 // shuffle rather than perform an insert_vector_elt.
17365 SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
17366 ArgWorkList.emplace_back(Mask.size(), Y);
17367 ArgWorkList.emplace_back(0, X);
17369 while (!ArgWorkList.empty()) {
17372 std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
17374 if (ArgVal == InsertVal0) {
17375 ElementOffset = ArgOffset;
17379 // Peek through concat_vector.
17380 if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
17381 int CurrentArgOffset =
17382 ArgOffset + ArgVal.getValueType().getVectorNumElements();
17383 int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
17384 for (SDValue Op : reverse(ArgVal->ops())) {
17385 CurrentArgOffset -= Step;
17386 ArgWorkList.emplace_back(CurrentArgOffset, Op);
17389 // Make sure we went through all the elements and did not screw up index
17391 assert(CurrentArgOffset == ArgOffset);
17395 if (ElementOffset != -1) {
17396 SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
17398 auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
17399 NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
17400 assert(NewMask[InsIndex] <
17401 (int)(2 * Vec.getValueType().getVectorNumElements()) &&
17402 NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
17404 SDValue LegalShuffle =
17405 TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
17408 return LegalShuffle;
17412 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
17413 // bitcast(shuffle (bitcast V), (extended X), Mask)
17414 // Note: We do not use an insert_subvector node because that requires a
17415 // legal subvector type.
17416 if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
17417 !InsertVal.getOperand(0).getValueType().isVector())
17420 SDValue SubVec = InsertVal.getOperand(0);
17421 SDValue DestVec = N->getOperand(0);
17422 EVT SubVecVT = SubVec.getValueType();
17423 EVT VT = DestVec.getValueType();
17424 unsigned NumSrcElts = SubVecVT.getVectorNumElements();
17425 // If the source only has a single vector element, the cost of creating adding
17426 // it to a vector is likely to exceed the cost of a insert_vector_elt.
17427 if (NumSrcElts == 1)
17429 unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
17430 unsigned NumMaskVals = ExtendRatio * NumSrcElts;
17432 // Step 1: Create a shuffle mask that implements this insert operation. The
17433 // vector that we are inserting into will be operand 0 of the shuffle, so
17434 // those elements are just 'i'. The inserted subvector is in the first
17435 // positions of operand 1 of the shuffle. Example:
17436 // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
17437 SmallVector<int, 16> Mask(NumMaskVals);
17438 for (unsigned i = 0; i != NumMaskVals; ++i) {
17439 if (i / NumSrcElts == InsIndex)
17440 Mask[i] = (i % NumSrcElts) + NumMaskVals;
17445 // Bail out if the target can not handle the shuffle we want to create.
17446 EVT SubVecEltVT = SubVecVT.getVectorElementType();
17447 EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
17448 if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
17451 // Step 2: Create a wide vector from the inserted source vector by appending
17452 // undefined elements. This is the same size as our destination vector.
17454 SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
17455 ConcatOps[0] = SubVec;
17456 SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
17458 // Step 3: Shuffle in the padded subvector.
17459 SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
17460 SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
17461 AddToWorklist(PaddedSubV.getNode());
17462 AddToWorklist(DestVecBC.getNode());
17463 AddToWorklist(Shuf.getNode());
17464 return DAG.getBitcast(VT, Shuf);
17467 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
17468 SDValue InVec = N->getOperand(0);
17469 SDValue InVal = N->getOperand(1);
17470 SDValue EltNo = N->getOperand(2);
17473 EVT VT = InVec.getValueType();
17474 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17476 // Insert into out-of-bounds element is undefined.
17477 if (IndexC && VT.isFixedLengthVector() &&
17478 IndexC->getZExtValue() >= VT.getVectorNumElements())
17479 return DAG.getUNDEF(VT);
17481 // Remove redundant insertions:
17482 // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
17483 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17484 InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
17488 // If this is variable insert to undef vector, it might be better to splat:
17489 // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
17490 if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
17491 if (VT.isScalableVector())
17492 return DAG.getSplatVector(VT, DL, InVal);
17494 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
17495 return DAG.getBuildVector(VT, DL, Ops);
17501 if (VT.isScalableVector())
17504 unsigned NumElts = VT.getVectorNumElements();
17506 // We must know which element is being inserted for folds below here.
17507 unsigned Elt = IndexC->getZExtValue();
17508 if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
17511 // Canonicalize insert_vector_elt dag nodes.
17513 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
17514 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
17516 // Do this only if the child insert_vector node has one use; also
17517 // do this only if indices are both constants and Idx1 < Idx0.
17518 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
17519 && isa<ConstantSDNode>(InVec.getOperand(2))) {
17520 unsigned OtherElt = InVec.getConstantOperandVal(2);
17521 if (Elt < OtherElt) {
17523 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
17524 InVec.getOperand(0), InVal, EltNo);
17525 AddToWorklist(NewOp.getNode());
17526 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
17527 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
17531 // If we can't generate a legal BUILD_VECTOR, exit
17532 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
17535 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
17536 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
17537 // vector elements.
17538 SmallVector<SDValue, 8> Ops;
17539 // Do not combine these two vectors if the output vector will not replace
17540 // the input vector.
17541 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
17542 Ops.append(InVec.getNode()->op_begin(),
17543 InVec.getNode()->op_end());
17544 } else if (InVec.isUndef()) {
17545 Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
17549 assert(Ops.size() == NumElts && "Unexpected vector size");
17551 // Insert the element
17552 if (Elt < Ops.size()) {
17553 // All the operands of BUILD_VECTOR must have the same type;
17554 // we enforce that here.
17555 EVT OpVT = Ops[0].getValueType();
17556 Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
17559 // Return the new vector
17560 return DAG.getBuildVector(VT, DL, Ops);
17563 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
17565 LoadSDNode *OriginalLoad) {
17566 assert(OriginalLoad->isSimple());
17568 EVT ResultVT = EVE->getValueType(0);
17569 EVT VecEltVT = InVecVT.getVectorElementType();
17570 Align Alignment = OriginalLoad->getAlign();
17571 Align NewAlign = DAG.getDataLayout().getABITypeAlign(
17572 VecEltVT.getTypeForEVT(*DAG.getContext()));
17574 if (NewAlign > Alignment ||
17575 !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
17578 ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
17579 ISD::NON_EXTLOAD : ISD::EXTLOAD;
17580 if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
17583 Alignment = NewAlign;
17585 SDValue NewPtr = OriginalLoad->getBasePtr();
17587 EVT PtrType = NewPtr.getValueType();
17588 MachinePointerInfo MPI;
17590 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
17591 int Elt = ConstEltNo->getZExtValue();
17592 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
17593 Offset = DAG.getConstant(PtrOff, DL, PtrType);
17594 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
17596 Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
17597 Offset = DAG.getNode(
17598 ISD::MUL, DL, PtrType, Offset,
17599 DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
17600 // Discard the pointer info except the address space because the memory
17601 // operand can't represent this new access since the offset is variable.
17602 MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
17604 NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
17606 // The replacement we need to do here is a little tricky: we need to
17607 // replace an extractelement of a load with a load.
17608 // Use ReplaceAllUsesOfValuesWith to do the replacement.
17609 // Note that this replacement assumes that the extractvalue is the only
17610 // use of the load; that's okay because we don't want to perform this
17611 // transformation in other cases anyway.
17614 if (ResultVT.bitsGT(VecEltVT)) {
17615 // If the result type of vextract is wider than the load, then issue an
17616 // extending load instead.
17617 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
17621 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
17622 OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
17623 Alignment, OriginalLoad->getMemOperand()->getFlags(),
17624 OriginalLoad->getAAInfo());
17625 Chain = Load.getValue(1);
17627 Load = DAG.getLoad(
17628 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
17629 OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
17630 Chain = Load.getValue(1);
17631 if (ResultVT.bitsLT(VecEltVT))
17632 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
17634 Load = DAG.getBitcast(ResultVT, Load);
17636 WorklistRemover DeadNodes(*this);
17637 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
17638 SDValue To[] = { Load, Chain };
17639 DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
17640 // Make sure to revisit this node to clean it up; it will usually be dead.
17641 AddToWorklist(EVE);
17642 // Since we're explicitly calling ReplaceAllUses, add the new node to the
17643 // worklist explicitly as well.
17644 AddToWorklistWithUsers(Load.getNode());
17646 return SDValue(EVE, 0);
17649 /// Transform a vector binary operation into a scalar binary operation by moving
17650 /// the math/logic after an extract element of a vector.
17651 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
17652 bool LegalOperations) {
17653 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17654 SDValue Vec = ExtElt->getOperand(0);
17655 SDValue Index = ExtElt->getOperand(1);
17656 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17657 if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
17658 Vec.getNode()->getNumValues() != 1)
17661 // Targets may want to avoid this to prevent an expensive register transfer.
17662 if (!TLI.shouldScalarizeBinop(Vec))
17665 // Extracting an element of a vector constant is constant-folded, so this
17666 // transform is just replacing a vector op with a scalar op while moving the
17668 SDValue Op0 = Vec.getOperand(0);
17669 SDValue Op1 = Vec.getOperand(1);
17670 if (isAnyConstantBuildVector(Op0, true) ||
17671 isAnyConstantBuildVector(Op1, true)) {
17672 // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
17673 // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
17675 EVT VT = ExtElt->getValueType(0);
17676 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
17677 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
17678 return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
17684 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
17685 SDValue VecOp = N->getOperand(0);
17686 SDValue Index = N->getOperand(1);
17687 EVT ScalarVT = N->getValueType(0);
17688 EVT VecVT = VecOp.getValueType();
17689 if (VecOp.isUndef())
17690 return DAG.getUNDEF(ScalarVT);
17692 // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
17694 // This only really matters if the index is non-constant since other combines
17695 // on the constant elements already work.
17697 if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
17698 Index == VecOp.getOperand(2)) {
17699 SDValue Elt = VecOp.getOperand(1);
17700 return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
17703 // (vextract (scalar_to_vector val, 0) -> val
17704 if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17705 // Only 0'th element of SCALAR_TO_VECTOR is defined.
17706 if (DAG.isKnownNeverZero(Index))
17707 return DAG.getUNDEF(ScalarVT);
17709 // Check if the result type doesn't match the inserted element type. A
17710 // SCALAR_TO_VECTOR may truncate the inserted element and the
17711 // EXTRACT_VECTOR_ELT may widen the extracted vector.
17712 SDValue InOp = VecOp.getOperand(0);
17713 if (InOp.getValueType() != ScalarVT) {
17714 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17715 return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17720 // extract_vector_elt of out-of-bounds element -> UNDEF
17721 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17722 if (IndexC && VecVT.isFixedLengthVector() &&
17723 IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
17724 return DAG.getUNDEF(ScalarVT);
17726 // extract_vector_elt (build_vector x, y), 1 -> y
17727 if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
17728 VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
17729 TLI.isTypeLegal(VecVT) &&
17730 (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
17731 assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
17732 VecVT.isFixedLengthVector()) &&
17733 "BUILD_VECTOR used for scalable vectors");
17734 unsigned IndexVal =
17735 VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
17736 SDValue Elt = VecOp.getOperand(IndexVal);
17737 EVT InEltVT = Elt.getValueType();
17739 // Sometimes build_vector's scalar input types do not match result type.
17740 if (ScalarVT == InEltVT)
17743 // TODO: It may be useful to truncate if free if the build_vector implicitly
17747 if (VecVT.isScalableVector())
17750 // All the code from this point onwards assumes fixed width vectors, but it's
17751 // possible that some of the combinations could be made to work for scalable
17753 unsigned NumElts = VecVT.getVectorNumElements();
17754 unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
17756 // TODO: These transforms should not require the 'hasOneUse' restriction, but
17757 // there are regressions on multiple targets without it. We can end up with a
17758 // mess of scalar and vector code if we reduce only part of the DAG to scalar.
17759 if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
17760 VecOp.hasOneUse()) {
17761 // The vector index of the LSBs of the source depend on the endian-ness.
17762 bool IsLE = DAG.getDataLayout().isLittleEndian();
17763 unsigned ExtractIndex = IndexC->getZExtValue();
17764 // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
17765 unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
17766 SDValue BCSrc = VecOp.getOperand(0);
17767 if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
17768 return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
17770 if (LegalTypes && BCSrc.getValueType().isInteger() &&
17771 BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17772 // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
17773 // trunc i64 X to i32
17774 SDValue X = BCSrc.getOperand(0);
17775 assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
17776 "Extract element and scalar to vector can't change element type "
17777 "from FP to integer.");
17778 unsigned XBitWidth = X.getValueSizeInBits();
17779 BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
17781 // An extract element return value type can be wider than its vector
17782 // operand element type. In that case, the high bits are undefined, so
17783 // it's possible that we may need to extend rather than truncate.
17784 if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
17785 assert(XBitWidth % VecEltBitWidth == 0 &&
17786 "Scalar bitwidth must be a multiple of vector element bitwidth");
17787 return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
17792 if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
17795 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
17796 // We only perform this optimization before the op legalization phase because
17797 // we may introduce new vector instructions which are not backed by TD
17798 // patterns. For example on AVX, extracting elements from a wide vector
17799 // without using extract_subvector. However, if we can find an underlying
17800 // scalar value, then we can always use that.
17801 if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
17802 auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
17803 // Find the new index to extract from.
17804 int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
17806 // Extracting an undef index is undef.
17808 return DAG.getUNDEF(ScalarVT);
17810 // Select the right vector half to extract from.
17812 if (OrigElt < (int)NumElts) {
17813 SVInVec = VecOp.getOperand(0);
17815 SVInVec = VecOp.getOperand(1);
17816 OrigElt -= NumElts;
17819 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
17820 SDValue InOp = SVInVec.getOperand(OrigElt);
17821 if (InOp.getValueType() != ScalarVT) {
17822 assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
17823 InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17829 // FIXME: We should handle recursing on other vector shuffles and
17830 // scalar_to_vector here as well.
17832 if (!LegalOperations ||
17833 // FIXME: Should really be just isOperationLegalOrCustom.
17834 TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
17835 TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
17836 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
17837 DAG.getVectorIdxConstant(OrigElt, DL));
17841 // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
17842 // simplify it based on the (valid) extraction indices.
17843 if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
17844 return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17845 Use->getOperand(0) == VecOp &&
17846 isa<ConstantSDNode>(Use->getOperand(1));
17848 APInt DemandedElts = APInt::getNullValue(NumElts);
17849 for (SDNode *Use : VecOp->uses()) {
17850 auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
17851 if (CstElt->getAPIntValue().ult(NumElts))
17852 DemandedElts.setBit(CstElt->getZExtValue());
17854 if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
17855 // We simplified the vector operand of this extract element. If this
17856 // extract is not dead, visit it again so it is folded properly.
17857 if (N->getOpcode() != ISD::DELETED_NODE)
17859 return SDValue(N, 0);
17861 APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
17862 if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
17863 // We simplified the vector operand of this extract element. If this
17864 // extract is not dead, visit it again so it is folded properly.
17865 if (N->getOpcode() != ISD::DELETED_NODE)
17867 return SDValue(N, 0);
17871 // Everything under here is trying to match an extract of a loaded value.
17872 // If the result of load has to be truncated, then it's not necessarily
17874 bool BCNumEltsChanged = false;
17875 EVT ExtVT = VecVT.getVectorElementType();
17877 if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
17880 if (VecOp.getOpcode() == ISD::BITCAST) {
17881 // Don't duplicate a load with other uses.
17882 if (!VecOp.hasOneUse())
17885 EVT BCVT = VecOp.getOperand(0).getValueType();
17886 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
17888 if (NumElts != BCVT.getVectorNumElements())
17889 BCNumEltsChanged = true;
17890 VecOp = VecOp.getOperand(0);
17891 ExtVT = BCVT.getVectorElementType();
17894 // extract (vector load $addr), i --> load $addr + i * size
17895 if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
17896 ISD::isNormalLoad(VecOp.getNode()) &&
17897 !Index->hasPredecessor(VecOp.getNode())) {
17898 auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
17899 if (VecLoad && VecLoad->isSimple())
17900 return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
17903 // Perform only after legalization to ensure build_vector / vector_shuffle
17904 // optimizations have already been done.
17905 if (!LegalOperations || !IndexC)
17908 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
17909 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
17910 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
17911 int Elt = IndexC->getZExtValue();
17912 LoadSDNode *LN0 = nullptr;
17913 if (ISD::isNormalLoad(VecOp.getNode())) {
17914 LN0 = cast<LoadSDNode>(VecOp);
17915 } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17916 VecOp.getOperand(0).getValueType() == ExtVT &&
17917 ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17918 // Don't duplicate a load with other uses.
17919 if (!VecOp.hasOneUse())
17922 LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17924 if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17925 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17927 // (load $addr+1*size)
17929 // Don't duplicate a load with other uses.
17930 if (!VecOp.hasOneUse())
17933 // If the bit convert changed the number of elements, it is unsafe
17934 // to examine the mask.
17935 if (BCNumEltsChanged)
17938 // Select the input vector, guarding against out of range extract vector.
17939 int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17940 VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17942 if (VecOp.getOpcode() == ISD::BITCAST) {
17943 // Don't duplicate a load with other uses.
17944 if (!VecOp.hasOneUse())
17947 VecOp = VecOp.getOperand(0);
17949 if (ISD::isNormalLoad(VecOp.getNode())) {
17950 LN0 = cast<LoadSDNode>(VecOp);
17951 Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17952 Index = DAG.getConstant(Elt, DL, Index.getValueType());
17954 } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
17955 VecVT.getVectorElementType() == ScalarVT &&
17958 VecOp.getOperand(0).getValueType().getVectorElementType()))) {
17959 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
17960 // -> extract_vector_elt a, 0
17961 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
17962 // -> extract_vector_elt a, 1
17963 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
17964 // -> extract_vector_elt b, 0
17965 // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
17966 // -> extract_vector_elt b, 1
17968 EVT ConcatVT = VecOp.getOperand(0).getValueType();
17969 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17970 SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
17971 Index.getValueType());
17973 SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
17974 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
17975 ConcatVT.getVectorElementType(),
17977 return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
17980 // Make sure we found a non-volatile load and the extractelement is
17982 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
17985 // If Idx was -1 above, Elt is going to be -1, so just return undef.
17987 return DAG.getUNDEF(LVT);
17989 return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17992 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17993 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17994 // We perform this optimization post type-legalization because
17995 // the type-legalizer often scalarizes integer-promoted vectors.
17996 // Performing this optimization before may create bit-casts which
17997 // will be type-legalized to complex code sequences.
17998 // We perform this optimization only before the operation legalizer because we
17999 // may introduce illegal operations.
18000 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18003 unsigned NumInScalars = N->getNumOperands();
18005 EVT VT = N->getValueType(0);
18007 // Check to see if this is a BUILD_VECTOR of a bunch of values
18008 // which come from any_extend or zero_extend nodes. If so, we can create
18009 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18010 // optimizations. We do not handle sign-extend because we can't fill the sign
18012 EVT SourceType = MVT::Other;
18013 bool AllAnyExt = true;
18015 for (unsigned i = 0; i != NumInScalars; ++i) {
18016 SDValue In = N->getOperand(i);
18017 // Ignore undef inputs.
18018 if (In.isUndef()) continue;
18020 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
18021 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18023 // Abort if the element is not an extension.
18024 if (!ZeroExt && !AnyExt) {
18025 SourceType = MVT::Other;
18029 // The input is a ZeroExt or AnyExt. Check the original type.
18030 EVT InTy = In.getOperand(0).getValueType();
18032 // Check that all of the widened source types are the same.
18033 if (SourceType == MVT::Other)
18036 else if (InTy != SourceType) {
18037 // Multiple income types. Abort.
18038 SourceType = MVT::Other;
18042 // Check if all of the extends are ANY_EXTENDs.
18043 AllAnyExt &= AnyExt;
18046 // In order to have valid types, all of the inputs must be extended from the
18047 // same source type and all of the inputs must be any or zero extend.
18048 // Scalar sizes must be a power of two.
18049 EVT OutScalarTy = VT.getScalarType();
18050 bool ValidTypes = SourceType != MVT::Other &&
18051 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18052 isPowerOf2_32(SourceType.getSizeInBits());
18054 // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18055 // turn into a single shuffle instruction.
18059 // If we already have a splat buildvector, then don't fold it if it means
18060 // introducing zeros.
18061 if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18064 bool isLE = DAG.getDataLayout().isLittleEndian();
18065 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18066 assert(ElemRatio > 1 && "Invalid element size ratio");
18067 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18068 DAG.getConstant(0, DL, SourceType);
18070 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18071 SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18073 // Populate the new build_vector
18074 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18075 SDValue Cast = N->getOperand(i);
18076 assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
18077 Cast.getOpcode() == ISD::ZERO_EXTEND ||
18078 Cast.isUndef()) && "Invalid cast opcode");
18080 if (Cast.isUndef())
18081 In = DAG.getUNDEF(SourceType);
18083 In = Cast->getOperand(0);
18084 unsigned Index = isLE ? (i * ElemRatio) :
18085 (i * ElemRatio + (ElemRatio - 1));
18087 assert(Index < Ops.size() && "Invalid index");
18091 // The type of the new BUILD_VECTOR node.
18092 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18093 assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
18094 "Invalid vector size");
18095 // Check if the new vector type is legal.
18096 if (!isTypeLegal(VecVT) ||
18097 (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18098 TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18101 // Make the new BUILD_VECTOR.
18102 SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18104 // The new BUILD_VECTOR node has the potential to be further optimized.
18105 AddToWorklist(BV.getNode());
18106 // Bitcast to the desired type.
18107 return DAG.getBitcast(VT, BV);
18110 // Simplify (build_vec (trunc $1)
18111 // (trunc (srl $1 half-width))
18112 // (trunc (srl $1 (2 * half-width))) …)
18114 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18115 assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18117 // Only for little endian
18118 if (!DAG.getDataLayout().isLittleEndian())
18122 EVT VT = N->getValueType(0);
18123 EVT OutScalarTy = VT.getScalarType();
18124 uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18126 // Only for power of two types to be sure that bitcast works well
18127 if (!isPowerOf2_64(ScalarTypeBitsize))
18130 unsigned NumInScalars = N->getNumOperands();
18132 // Look through bitcasts
18133 auto PeekThroughBitcast = [](SDValue Op) {
18134 if (Op.getOpcode() == ISD::BITCAST)
18135 return Op.getOperand(0);
18139 // The source value where all the parts are extracted.
18141 for (unsigned i = 0; i != NumInScalars; ++i) {
18142 SDValue In = PeekThroughBitcast(N->getOperand(i));
18143 // Ignore undef inputs.
18144 if (In.isUndef()) continue;
18146 if (In.getOpcode() != ISD::TRUNCATE)
18149 In = PeekThroughBitcast(In.getOperand(0));
18151 if (In.getOpcode() != ISD::SRL) {
18152 // For now only build_vec without shuffling, handle shifts here in the
18160 SDValue part = PeekThroughBitcast(In.getOperand(0));
18164 } else if (Src != part) {
18165 // Vector parts do not stem from the same variable
18169 SDValue ShiftAmtVal = In.getOperand(1);
18170 if (!isa<ConstantSDNode>(ShiftAmtVal))
18173 uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18175 // The extracted value is not extracted at the right position
18176 if (ShiftAmt != i * ScalarTypeBitsize)
18181 // Only cast if the size is the same
18182 if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18185 return DAG.getBitcast(VT, Src);
18188 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18189 ArrayRef<int> VectorMask,
18190 SDValue VecIn1, SDValue VecIn2,
18191 unsigned LeftIdx, bool DidSplitVec) {
18192 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18194 EVT VT = N->getValueType(0);
18195 EVT InVT1 = VecIn1.getValueType();
18196 EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18198 unsigned NumElems = VT.getVectorNumElements();
18199 unsigned ShuffleNumElems = NumElems;
18201 // If we artificially split a vector in two already, then the offsets in the
18202 // operands will all be based off of VecIn1, even those in VecIn2.
18203 unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18205 // We can't generate a shuffle node with mismatched input and output types.
18206 // Try to make the types match the type of the output.
18207 if (InVT1 != VT || InVT2 != VT) {
18208 if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
18209 // If the output vector length is a multiple of both input lengths,
18210 // we can concatenate them and pad the rest with undefs.
18211 unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
18212 assert(NumConcats >= 2 && "Concat needs at least two inputs!");
18213 SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18214 ConcatOps[0] = VecIn1;
18215 ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18216 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18217 VecIn2 = SDValue();
18218 } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
18219 if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18222 if (!VecIn2.getNode()) {
18223 // If we only have one input vector, and it's twice the size of the
18224 // output, split it in two.
18225 VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18226 DAG.getVectorIdxConstant(NumElems, DL));
18227 VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18228 // Since we now have shorter input vectors, adjust the offset of the
18229 // second vector's start.
18230 Vec2Offset = NumElems;
18231 } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
18232 // VecIn1 is wider than the output, and we have another, possibly
18233 // smaller input. Pad the smaller input with undefs, shuffle at the
18234 // input vector width, and extract the output.
18235 // The shuffle type is different than VT, so check legality again.
18236 if (LegalOperations &&
18237 !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18240 // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18241 // lower it back into a BUILD_VECTOR. So if the inserted type is
18242 // illegal, don't even try.
18243 if (InVT1 != InVT2) {
18244 if (!TLI.isTypeLegal(InVT2))
18246 VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
18247 DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
18249 ShuffleNumElems = NumElems * 2;
18251 // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
18252 // than VecIn1. We can't handle this for now - this case will disappear
18253 // when we start sorting the vectors by type.
18256 } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
18257 InVT1.getSizeInBits() == VT.getSizeInBits()) {
18258 SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
18259 ConcatOps[0] = VecIn2;
18260 VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18262 // TODO: Support cases where the length mismatch isn't exactly by a
18264 // TODO: Move this check upwards, so that if we have bad type
18265 // mismatches, we don't create any DAG nodes.
18270 // Initialize mask to undef.
18271 SmallVector<int, 8> Mask(ShuffleNumElems, -1);
18273 // Only need to run up to the number of elements actually used, not the
18274 // total number of elements in the shuffle - if we are shuffling a wider
18275 // vector, the high lanes should be set to undef.
18276 for (unsigned i = 0; i != NumElems; ++i) {
18277 if (VectorMask[i] <= 0)
18280 unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
18281 if (VectorMask[i] == (int)LeftIdx) {
18282 Mask[i] = ExtIndex;
18283 } else if (VectorMask[i] == (int)LeftIdx + 1) {
18284 Mask[i] = Vec2Offset + ExtIndex;
18288 // The type the input vectors may have changed above.
18289 InVT1 = VecIn1.getValueType();
18291 // If we already have a VecIn2, it should have the same type as VecIn1.
18292 // If we don't, get an undef/zero vector of the appropriate type.
18293 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
18294 assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
18296 SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
18297 if (ShuffleNumElems > NumElems)
18298 Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
18303 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
18304 assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18306 // First, determine where the build vector is not undef.
18307 // TODO: We could extend this to handle zero elements as well as undefs.
18308 int NumBVOps = BV->getNumOperands();
18310 for (int i = 0; i != NumBVOps; ++i) {
18311 SDValue Op = BV->getOperand(i);
18319 // Bail out if there's no non-undef element.
18323 // The build vector contains some number of undef elements and exactly
18324 // one other element. That other element must be a zero-extended scalar
18325 // extracted from a vector at a constant index to turn this into a shuffle.
18326 // Also, require that the build vector does not implicitly truncate/extend
18328 // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
18329 EVT VT = BV->getValueType(0);
18330 SDValue Zext = BV->getOperand(ZextElt);
18331 if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
18332 Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18333 !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
18334 Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
18337 // The zero-extend must be a multiple of the source size, and we must be
18338 // building a vector of the same size as the source of the extract element.
18339 SDValue Extract = Zext.getOperand(0);
18340 unsigned DestSize = Zext.getValueSizeInBits();
18341 unsigned SrcSize = Extract.getValueSizeInBits();
18342 if (DestSize % SrcSize != 0 ||
18343 Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
18346 // Create a shuffle mask that will combine the extracted element with zeros
18348 int ZextRatio = DestSize / SrcSize;
18349 int NumMaskElts = NumBVOps * ZextRatio;
18350 SmallVector<int, 32> ShufMask(NumMaskElts, -1);
18351 for (int i = 0; i != NumMaskElts; ++i) {
18352 if (i / ZextRatio == ZextElt) {
18353 // The low bits of the (potentially translated) extracted element map to
18354 // the source vector. The high bits map to zero. We will use a zero vector
18355 // as the 2nd source operand of the shuffle, so use the 1st element of
18356 // that vector (mask value is number-of-elements) for the high bits.
18357 if (i % ZextRatio == 0)
18358 ShufMask[i] = Extract.getConstantOperandVal(1);
18360 ShufMask[i] = NumMaskElts;
18363 // Undef elements of the build vector remain undef because we initialize
18364 // the shuffle mask with -1.
18367 // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
18368 // bitcast (shuffle V, ZeroVec, VectorMask)
18370 EVT VecVT = Extract.getOperand(0).getValueType();
18371 SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
18372 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18373 SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
18374 ZeroVec, ShufMask, DAG);
18377 return DAG.getBitcast(VT, Shuf);
18380 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
18381 // operations. If the types of the vectors we're extracting from allow it,
18382 // turn this into a vector_shuffle node.
18383 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
18385 EVT VT = N->getValueType(0);
18387 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
18388 if (!isTypeLegal(VT))
18391 if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
18394 // May only combine to shuffle after legalize if shuffle is legal.
18395 if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
18398 bool UsesZeroVector = false;
18399 unsigned NumElems = N->getNumOperands();
18401 // Record, for each element of the newly built vector, which input vector
18402 // that element comes from. -1 stands for undef, 0 for the zero vector,
18403 // and positive values for the input vectors.
18404 // VectorMask maps each element to its vector number, and VecIn maps vector
18405 // numbers to their initial SDValues.
18407 SmallVector<int, 8> VectorMask(NumElems, -1);
18408 SmallVector<SDValue, 8> VecIn;
18409 VecIn.push_back(SDValue());
18411 for (unsigned i = 0; i != NumElems; ++i) {
18412 SDValue Op = N->getOperand(i);
18417 // See if we can use a blend with a zero vector.
18418 // TODO: Should we generalize this to a blend with an arbitrary constant
18420 if (isNullConstant(Op) || isNullFPConstant(Op)) {
18421 UsesZeroVector = true;
18426 // Not an undef or zero. If the input is something other than an
18427 // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
18428 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18429 !isa<ConstantSDNode>(Op.getOperand(1)))
18431 SDValue ExtractedFromVec = Op.getOperand(0);
18433 if (ExtractedFromVec.getValueType().isScalableVector())
18436 const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
18437 if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
18440 // All inputs must have the same element type as the output.
18441 if (VT.getVectorElementType() !=
18442 ExtractedFromVec.getValueType().getVectorElementType())
18445 // Have we seen this input vector before?
18446 // The vectors are expected to be tiny (usually 1 or 2 elements), so using
18447 // a map back from SDValues to numbers isn't worth it.
18448 unsigned Idx = std::distance(
18449 VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
18450 if (Idx == VecIn.size())
18451 VecIn.push_back(ExtractedFromVec);
18453 VectorMask[i] = Idx;
18456 // If we didn't find at least one input vector, bail out.
18457 if (VecIn.size() < 2)
18460 // If all the Operands of BUILD_VECTOR extract from same
18461 // vector, then split the vector efficiently based on the maximum
18462 // vector access index and adjust the VectorMask and
18463 // VecIn accordingly.
18464 bool DidSplitVec = false;
18465 if (VecIn.size() == 2) {
18466 unsigned MaxIndex = 0;
18467 unsigned NearestPow2 = 0;
18468 SDValue Vec = VecIn.back();
18469 EVT InVT = Vec.getValueType();
18470 SmallVector<unsigned, 8> IndexVec(NumElems, 0);
18472 for (unsigned i = 0; i < NumElems; i++) {
18473 if (VectorMask[i] <= 0)
18475 unsigned Index = N->getOperand(i).getConstantOperandVal(1);
18476 IndexVec[i] = Index;
18477 MaxIndex = std::max(MaxIndex, Index);
18480 NearestPow2 = PowerOf2Ceil(MaxIndex);
18481 if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
18482 NumElems * 2 < NearestPow2) {
18483 unsigned SplitSize = NearestPow2 / 2;
18484 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
18485 InVT.getVectorElementType(), SplitSize);
18486 if (TLI.isTypeLegal(SplitVT)) {
18487 SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18488 DAG.getVectorIdxConstant(SplitSize, DL));
18489 SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18490 DAG.getVectorIdxConstant(0, DL));
18492 VecIn.push_back(VecIn1);
18493 VecIn.push_back(VecIn2);
18494 DidSplitVec = true;
18496 for (unsigned i = 0; i < NumElems; i++) {
18497 if (VectorMask[i] <= 0)
18499 VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
18505 // TODO: We want to sort the vectors by descending length, so that adjacent
18506 // pairs have similar length, and the longer vector is always first in the
18509 // TODO: Should this fire if some of the input vectors has illegal type (like
18510 // it does now), or should we let legalization run its course first?
18513 // Take pairs of vectors, and shuffle them so that the result has elements
18514 // from these vectors in the correct places.
18515 // For example, given:
18516 // t10: i32 = extract_vector_elt t1, Constant:i64<0>
18517 // t11: i32 = extract_vector_elt t2, Constant:i64<0>
18518 // t12: i32 = extract_vector_elt t3, Constant:i64<0>
18519 // t13: i32 = extract_vector_elt t1, Constant:i64<1>
18520 // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
18521 // We will generate:
18522 // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
18523 // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
18524 SmallVector<SDValue, 4> Shuffles;
18525 for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
18526 unsigned LeftIdx = 2 * In + 1;
18527 SDValue VecLeft = VecIn[LeftIdx];
18529 (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
18531 if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
18532 VecRight, LeftIdx, DidSplitVec))
18533 Shuffles.push_back(Shuffle);
18538 // If we need the zero vector as an "ingredient" in the blend tree, add it
18539 // to the list of shuffles.
18540 if (UsesZeroVector)
18541 Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
18542 : DAG.getConstantFP(0.0, DL, VT));
18544 // If we only have one shuffle, we're done.
18545 if (Shuffles.size() == 1)
18546 return Shuffles[0];
18548 // Update the vector mask to point to the post-shuffle vectors.
18549 for (int &Vec : VectorMask)
18551 Vec = Shuffles.size() - 1;
18553 Vec = (Vec - 1) / 2;
18555 // More than one shuffle. Generate a binary tree of blends, e.g. if from
18556 // the previous step we got the set of shuffles t10, t11, t12, t13, we will
18558 // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
18559 // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
18560 // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
18561 // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
18562 // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
18563 // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
18564 // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
18566 // Make sure the initial size of the shuffle list is even.
18567 if (Shuffles.size() % 2)
18568 Shuffles.push_back(DAG.getUNDEF(VT));
18570 for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
18572 Shuffles[CurSize] = DAG.getUNDEF(VT);
18575 for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
18577 int Right = 2 * In + 1;
18578 SmallVector<int, 8> Mask(NumElems, -1);
18579 for (unsigned i = 0; i != NumElems; ++i) {
18580 if (VectorMask[i] == Left) {
18582 VectorMask[i] = In;
18583 } else if (VectorMask[i] == Right) {
18584 Mask[i] = i + NumElems;
18585 VectorMask[i] = In;
18590 DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
18593 return Shuffles[0];
18596 // Try to turn a build vector of zero extends of extract vector elts into a
18597 // a vector zero extend and possibly an extract subvector.
18598 // TODO: Support sign extend?
18599 // TODO: Allow undef elements?
18600 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
18601 if (LegalOperations)
18604 EVT VT = N->getValueType(0);
18606 bool FoundZeroExtend = false;
18607 SDValue Op0 = N->getOperand(0);
18608 auto checkElem = [&](SDValue Op) -> int64_t {
18609 unsigned Opc = Op.getOpcode();
18610 FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
18611 if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
18612 Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18613 Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
18614 if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
18615 return C->getZExtValue();
18619 // Make sure the first element matches
18620 // (zext (extract_vector_elt X, C))
18621 int64_t Offset = checkElem(Op0);
18625 unsigned NumElems = N->getNumOperands();
18626 SDValue In = Op0.getOperand(0).getOperand(0);
18627 EVT InSVT = In.getValueType().getScalarType();
18628 EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
18630 // Don't create an illegal input type after type legalization.
18631 if (LegalTypes && !TLI.isTypeLegal(InVT))
18634 // Ensure all the elements come from the same vector and are adjacent.
18635 for (unsigned i = 1; i != NumElems; ++i) {
18636 if ((Offset + i) != checkElem(N->getOperand(i)))
18641 In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
18642 Op0.getOperand(0).getOperand(1));
18643 return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
18647 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
18648 EVT VT = N->getValueType(0);
18650 // A vector built entirely of undefs is undef.
18651 if (ISD::allOperandsUndef(N))
18652 return DAG.getUNDEF(VT);
18654 // If this is a splat of a bitcast from another vector, change to a
18657 // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
18658 // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
18660 // If X is a build_vector itself, the concat can become a larger build_vector.
18661 // TODO: Maybe this is useful for non-splat too?
18662 if (!LegalOperations) {
18663 if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18664 Splat = peekThroughBitcasts(Splat);
18665 EVT SrcVT = Splat.getValueType();
18666 if (SrcVT.isVector()) {
18667 unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
18668 EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
18669 SrcVT.getVectorElementType(), NumElts);
18670 if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
18671 SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
18672 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
18674 return DAG.getBitcast(VT, Concat);
18680 // A splat of a single element is a SPLAT_VECTOR if supported on the target.
18681 if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
18682 if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
18683 assert(!V.isUndef() && "Splat of undef should have been handled earlier");
18684 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
18687 // Check if we can express BUILD VECTOR via subvector extract.
18688 if (!LegalTypes && (N->getNumOperands() > 1)) {
18689 SDValue Op0 = N->getOperand(0);
18690 auto checkElem = [&](SDValue Op) -> uint64_t {
18691 if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
18692 (Op0.getOperand(0) == Op.getOperand(0)))
18693 if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
18694 return CNode->getZExtValue();
18698 int Offset = checkElem(Op0);
18699 for (unsigned i = 0; i < N->getNumOperands(); ++i) {
18700 if (Offset + i != checkElem(N->getOperand(i))) {
18706 if ((Offset == 0) &&
18707 (Op0.getOperand(0).getValueType() == N->getValueType(0)))
18708 return Op0.getOperand(0);
18709 if ((Offset != -1) &&
18710 ((Offset % N->getValueType(0).getVectorNumElements()) ==
18711 0)) // IDX must be multiple of output size.
18712 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
18713 Op0.getOperand(0), Op0.getOperand(1));
18716 if (SDValue V = convertBuildVecZextToZext(N))
18719 if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
18722 if (SDValue V = reduceBuildVecTruncToBitCast(N))
18725 if (SDValue V = reduceBuildVecToShuffle(N))
18731 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
18732 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18733 EVT OpVT = N->getOperand(0).getValueType();
18735 // If the operands are legal vectors, leave them alone.
18736 if (TLI.isTypeLegal(OpVT))
18740 EVT VT = N->getValueType(0);
18741 SmallVector<SDValue, 8> Ops;
18743 EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
18744 SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18746 // Keep track of what we encounter.
18747 bool AnyInteger = false;
18748 bool AnyFP = false;
18749 for (const SDValue &Op : N->ops()) {
18750 if (ISD::BITCAST == Op.getOpcode() &&
18751 !Op.getOperand(0).getValueType().isVector())
18752 Ops.push_back(Op.getOperand(0));
18753 else if (ISD::UNDEF == Op.getOpcode())
18754 Ops.push_back(ScalarUndef);
18758 // Note whether we encounter an integer or floating point scalar.
18759 // If it's neither, bail out, it could be something weird like x86mmx.
18760 EVT LastOpVT = Ops.back().getValueType();
18761 if (LastOpVT.isFloatingPoint())
18763 else if (LastOpVT.isInteger())
18769 // If any of the operands is a floating point scalar bitcast to a vector,
18770 // use floating point types throughout, and bitcast everything.
18771 // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
18773 SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
18774 ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
18776 for (SDValue &Op : Ops) {
18777 if (Op.getValueType() == SVT)
18782 Op = DAG.getBitcast(SVT, Op);
18787 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
18788 VT.getSizeInBits() / SVT.getSizeInBits());
18789 return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
18792 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
18793 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
18794 // most two distinct vectors the same size as the result, attempt to turn this
18795 // into a legal shuffle.
18796 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
18797 EVT VT = N->getValueType(0);
18798 EVT OpVT = N->getOperand(0).getValueType();
18800 // We currently can't generate an appropriate shuffle for a scalable vector.
18801 if (VT.isScalableVector())
18804 int NumElts = VT.getVectorNumElements();
18805 int NumOpElts = OpVT.getVectorNumElements();
18807 SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
18808 SmallVector<int, 8> Mask;
18810 for (SDValue Op : N->ops()) {
18811 Op = peekThroughBitcasts(Op);
18813 // UNDEF nodes convert to UNDEF shuffle mask values.
18814 if (Op.isUndef()) {
18815 Mask.append((unsigned)NumOpElts, -1);
18819 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18822 // What vector are we extracting the subvector from and at what index?
18823 SDValue ExtVec = Op.getOperand(0);
18824 int ExtIdx = Op.getConstantOperandVal(1);
18826 // We want the EVT of the original extraction to correctly scale the
18827 // extraction index.
18828 EVT ExtVT = ExtVec.getValueType();
18829 ExtVec = peekThroughBitcasts(ExtVec);
18831 // UNDEF nodes convert to UNDEF shuffle mask values.
18832 if (ExtVec.isUndef()) {
18833 Mask.append((unsigned)NumOpElts, -1);
18837 // Ensure that we are extracting a subvector from a vector the same
18838 // size as the result.
18839 if (ExtVT.getSizeInBits() != VT.getSizeInBits())
18842 // Scale the subvector index to account for any bitcast.
18843 int NumExtElts = ExtVT.getVectorNumElements();
18844 if (0 == (NumExtElts % NumElts))
18845 ExtIdx /= (NumExtElts / NumElts);
18846 else if (0 == (NumElts % NumExtElts))
18847 ExtIdx *= (NumElts / NumExtElts);
18851 // At most we can reference 2 inputs in the final shuffle.
18852 if (SV0.isUndef() || SV0 == ExtVec) {
18854 for (int i = 0; i != NumOpElts; ++i)
18855 Mask.push_back(i + ExtIdx);
18856 } else if (SV1.isUndef() || SV1 == ExtVec) {
18858 for (int i = 0; i != NumOpElts; ++i)
18859 Mask.push_back(i + ExtIdx + NumElts);
18865 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18866 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
18867 DAG.getBitcast(VT, SV1), Mask, DAG);
18870 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
18871 unsigned CastOpcode = N->getOperand(0).getOpcode();
18872 switch (CastOpcode) {
18873 case ISD::SINT_TO_FP:
18874 case ISD::UINT_TO_FP:
18875 case ISD::FP_TO_SINT:
18876 case ISD::FP_TO_UINT:
18877 // TODO: Allow more opcodes?
18878 // case ISD::BITCAST:
18879 // case ISD::TRUNCATE:
18880 // case ISD::ZERO_EXTEND:
18881 // case ISD::SIGN_EXTEND:
18882 // case ISD::FP_EXTEND:
18888 EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
18889 if (!SrcVT.isVector())
18892 // All operands of the concat must be the same kind of cast from the same
18894 SmallVector<SDValue, 4> SrcOps;
18895 for (SDValue Op : N->ops()) {
18896 if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
18897 Op.getOperand(0).getValueType() != SrcVT)
18899 SrcOps.push_back(Op.getOperand(0));
18902 // The wider cast must be supported by the target. This is unusual because
18903 // the operation support type parameter depends on the opcode. In addition,
18904 // check the other type in the cast to make sure this is really legal.
18905 EVT VT = N->getValueType(0);
18906 EVT SrcEltVT = SrcVT.getVectorElementType();
18907 unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands();
18908 EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
18909 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18910 switch (CastOpcode) {
18911 case ISD::SINT_TO_FP:
18912 case ISD::UINT_TO_FP:
18913 if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
18914 !TLI.isTypeLegal(VT))
18917 case ISD::FP_TO_SINT:
18918 case ISD::FP_TO_UINT:
18919 if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
18920 !TLI.isTypeLegal(ConcatSrcVT))
18924 llvm_unreachable("Unexpected cast opcode");
18927 // concat (cast X), (cast Y)... -> cast (concat X, Y...)
18929 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
18930 return DAG.getNode(CastOpcode, DL, VT, NewConcat);
18933 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
18934 // If we only have one input vector, we don't need to do any concatenation.
18935 if (N->getNumOperands() == 1)
18936 return N->getOperand(0);
18938 // Check if all of the operands are undefs.
18939 EVT VT = N->getValueType(0);
18940 if (ISD::allOperandsUndef(N))
18941 return DAG.getUNDEF(VT);
18943 // Optimize concat_vectors where all but the first of the vectors are undef.
18944 if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
18945 return Op.isUndef();
18947 SDValue In = N->getOperand(0);
18948 assert(In.getValueType().isVector() && "Must concat vectors");
18950 // If the input is a concat_vectors, just make a larger concat by padding
18951 // with smaller undefs.
18952 if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
18953 unsigned NumOps = N->getNumOperands() * In.getNumOperands();
18954 SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
18955 Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
18956 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18959 SDValue Scalar = peekThroughOneUseBitcasts(In);
18961 // concat_vectors(scalar_to_vector(scalar), undef) ->
18962 // scalar_to_vector(scalar)
18963 if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18964 Scalar.hasOneUse()) {
18965 EVT SVT = Scalar.getValueType().getVectorElementType();
18966 if (SVT == Scalar.getOperand(0).getValueType())
18967 Scalar = Scalar.getOperand(0);
18970 // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
18971 if (!Scalar.getValueType().isVector()) {
18972 // If the bitcast type isn't legal, it might be a trunc of a legal type;
18973 // look through the trunc so we can still do the transform:
18974 // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
18975 if (Scalar->getOpcode() == ISD::TRUNCATE &&
18976 !TLI.isTypeLegal(Scalar.getValueType()) &&
18977 TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
18978 Scalar = Scalar->getOperand(0);
18980 EVT SclTy = Scalar.getValueType();
18982 if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
18985 // Bail out if the vector size is not a multiple of the scalar size.
18986 if (VT.getSizeInBits() % SclTy.getSizeInBits())
18989 unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
18990 if (VNTNumElms < 2)
18993 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
18994 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
18997 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
18998 return DAG.getBitcast(VT, Res);
19002 // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19003 // We have already tested above for an UNDEF only concatenation.
19004 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19005 // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19006 auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19007 return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19009 if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19010 SmallVector<SDValue, 8> Opnds;
19011 EVT SVT = VT.getScalarType();
19014 if (!SVT.isFloatingPoint()) {
19015 // If BUILD_VECTOR are from built from integer, they may have different
19016 // operand types. Get the smallest type and truncate all operands to it.
19017 bool FoundMinVT = false;
19018 for (const SDValue &Op : N->ops())
19019 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19020 EVT OpSVT = Op.getOperand(0).getValueType();
19021 MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19024 assert(FoundMinVT && "Concat vector type mismatch");
19027 for (const SDValue &Op : N->ops()) {
19028 EVT OpVT = Op.getValueType();
19029 unsigned NumElts = OpVT.getVectorNumElements();
19031 if (ISD::UNDEF == Op.getOpcode())
19032 Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19034 if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19035 if (SVT.isFloatingPoint()) {
19036 assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
19037 Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19039 for (unsigned i = 0; i != NumElts; ++i)
19041 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19046 assert(VT.getVectorNumElements() == Opnds.size() &&
19047 "Concat vector type mismatch");
19048 return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19051 // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19052 if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19055 // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19056 if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19057 if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19060 if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19063 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19064 // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19065 // operands and look for a CONCAT operations that place the incoming vectors
19066 // at the exact same location.
19068 // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19069 SDValue SingleSource = SDValue();
19070 unsigned PartNumElem =
19071 N->getOperand(0).getValueType().getVectorMinNumElements();
19073 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19074 SDValue Op = N->getOperand(i);
19079 // Check if this is the identity extract:
19080 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19083 // Find the single incoming vector for the extract_subvector.
19084 if (SingleSource.getNode()) {
19085 if (Op.getOperand(0) != SingleSource)
19088 SingleSource = Op.getOperand(0);
19090 // Check the source type is the same as the type of the result.
19091 // If not, this concat may extend the vector, so we can not
19092 // optimize it away.
19093 if (SingleSource.getValueType() != N->getValueType(0))
19097 // Check that we are reading from the identity index.
19098 unsigned IdentityIndex = i * PartNumElem;
19099 if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19103 if (SingleSource.getNode())
19104 return SingleSource;
19109 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19110 // if the subvector can be sourced for free.
19111 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19112 if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19113 V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19114 return V.getOperand(1);
19116 auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19117 if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19118 V.getOperand(0).getValueType() == SubVT &&
19119 (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
19120 uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
19121 return V.getOperand(SubIdx);
19126 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19127 SelectionDAG &DAG) {
19128 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19129 SDValue BinOp = Extract->getOperand(0);
19130 unsigned BinOpcode = BinOp.getOpcode();
19131 if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19134 EVT VecVT = BinOp.getValueType();
19135 SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19136 if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19139 SDValue Index = Extract->getOperand(1);
19140 EVT SubVT = Extract->getValueType(0);
19141 if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
19144 SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19145 SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19147 // TODO: We could handle the case where only 1 operand is being inserted by
19148 // creating an extract of the other operand, but that requires checking
19149 // number of uses and/or costs.
19150 if (!Sub0 || !Sub1)
19153 // We are inserting both operands of the wide binop only to extract back
19154 // to the narrow vector size. Eliminate all of the insert/extract:
19155 // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19156 return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19157 BinOp->getFlags());
19160 /// If we are extracting a subvector produced by a wide binary operator try
19161 /// to use a narrow binary operator and/or avoid concatenation and extraction.
19162 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
19163 // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19164 // some of these bailouts with other transforms.
19166 if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
19169 // The extract index must be a constant, so we can map it to a concat operand.
19170 auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19171 if (!ExtractIndexC)
19174 // We are looking for an optionally bitcasted wide vector binary operator
19175 // feeding an extract subvector.
19176 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19177 SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19178 unsigned BOpcode = BinOp.getOpcode();
19179 if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
19182 // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19183 // reduced to the unary fneg when it is visited, and we probably want to deal
19184 // with fneg in a target-specific way.
19185 if (BOpcode == ISD::FSUB) {
19186 auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
19187 if (C && C->getValueAPF().isNegZero())
19191 // The binop must be a vector type, so we can extract some fraction of it.
19192 EVT WideBVT = BinOp.getValueType();
19193 // The optimisations below currently assume we are dealing with fixed length
19194 // vectors. It is possible to add support for scalable vectors, but at the
19195 // moment we've done no analysis to prove whether they are profitable or not.
19196 if (!WideBVT.isFixedLengthVector())
19199 EVT VT = Extract->getValueType(0);
19200 unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19201 assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
19202 "Extract index is not a multiple of the vector length.");
19204 // Bail out if this is not a proper multiple width extraction.
19205 unsigned WideWidth = WideBVT.getSizeInBits();
19206 unsigned NarrowWidth = VT.getSizeInBits();
19207 if (WideWidth % NarrowWidth != 0)
19210 // Bail out if we are extracting a fraction of a single operation. This can
19211 // occur because we potentially looked through a bitcast of the binop.
19212 unsigned NarrowingRatio = WideWidth / NarrowWidth;
19213 unsigned WideNumElts = WideBVT.getVectorNumElements();
19214 if (WideNumElts % NarrowingRatio != 0)
19217 // Bail out if the target does not support a narrower version of the binop.
19218 EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19219 WideNumElts / NarrowingRatio);
19220 if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19223 // If extraction is cheap, we don't need to look at the binop operands
19224 // for concat ops. The narrow binop alone makes this transform profitable.
19225 // We can't just reuse the original extract index operand because we may have
19227 unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19228 unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19229 if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19230 BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19231 // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19233 SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19234 SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19235 BinOp.getOperand(0), NewExtIndex);
19236 SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19237 BinOp.getOperand(1), NewExtIndex);
19238 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19239 BinOp.getNode()->getFlags());
19240 return DAG.getBitcast(VT, NarrowBinOp);
19243 // Only handle the case where we are doubling and then halving. A larger ratio
19244 // may require more than two narrow binops to replace the wide binop.
19245 if (NarrowingRatio != 2)
19248 // TODO: The motivating case for this transform is an x86 AVX1 target. That
19249 // target has temptingly almost legal versions of bitwise logic ops in 256-bit
19250 // flavors, but no other 256-bit integer support. This could be extended to
19251 // handle any binop, but that may require fixing/adding other folds to avoid
19252 // codegen regressions.
19253 if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
19256 // We need at least one concatenation operation of a binop operand to make
19257 // this transform worthwhile. The concat must double the input vector sizes.
19258 auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
19259 if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
19260 return V.getOperand(ConcatOpNum);
19263 SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
19264 SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
19266 if (SubVecL || SubVecR) {
19267 // If a binop operand was not the result of a concat, we must extract a
19268 // half-sized operand for our new narrow binop:
19269 // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
19270 // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
19271 // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
19273 SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19274 SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
19275 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19276 BinOp.getOperand(0), IndexC);
19278 SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
19279 : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19280 BinOp.getOperand(1), IndexC);
19282 SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
19283 return DAG.getBitcast(VT, NarrowBinOp);
19289 /// If we are extracting a subvector from a wide vector load, convert to a
19290 /// narrow load to eliminate the extraction:
19291 /// (extract_subvector (load wide vector)) --> (load narrow vector)
19292 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
19293 // TODO: Add support for big-endian. The offset calculation must be adjusted.
19294 if (DAG.getDataLayout().isBigEndian())
19297 auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
19298 auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19299 if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
19303 // Allow targets to opt-out.
19304 EVT VT = Extract->getValueType(0);
19306 // We can only create byte sized loads.
19307 if (!VT.isByteSized())
19310 unsigned Index = ExtIdx->getZExtValue();
19311 unsigned NumElts = VT.getVectorNumElements();
19313 // If the index is a multiple of the extract element count, we can offset the
19314 // address by the store size multiplied by the subvector index. Otherwise if
19315 // the scalar type is byte sized, we can just use the index multiplied by
19316 // the element size in bytes as the offset.
19318 if (Index % NumElts == 0)
19319 Offset = (Index / NumElts) * VT.getStoreSize();
19320 else if (VT.getScalarType().isByteSized())
19321 Offset = Index * VT.getScalarType().getStoreSize();
19325 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19326 if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
19329 // The narrow load will be offset from the base address of the old load if
19330 // we are extracting from something besides index 0 (little-endian).
19332 SDValue BaseAddr = Ld->getBasePtr();
19334 // TODO: Use "BaseIndexOffset" to make this more effective.
19335 SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
19336 MachineFunction &MF = DAG.getMachineFunction();
19337 MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
19338 VT.getStoreSize());
19339 SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
19340 DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
19344 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
19345 EVT NVT = N->getValueType(0);
19346 SDValue V = N->getOperand(0);
19347 uint64_t ExtIdx = N->getConstantOperandVal(1);
19349 // Extract from UNDEF is UNDEF.
19351 return DAG.getUNDEF(NVT);
19353 if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
19354 if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
19357 // Combine an extract of an extract into a single extract_subvector.
19358 // ext (ext X, C), 0 --> ext X, C
19359 if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
19360 if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
19361 V.getConstantOperandVal(1)) &&
19362 TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
19363 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
19368 // Try to move vector bitcast after extract_subv by scaling extraction index:
19369 // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
19370 if (V.getOpcode() == ISD::BITCAST &&
19371 V.getOperand(0).getValueType().isVector()) {
19372 SDValue SrcOp = V.getOperand(0);
19373 EVT SrcVT = SrcOp.getValueType();
19374 unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
19375 unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
19376 if ((SrcNumElts % DestNumElts) == 0) {
19377 unsigned SrcDestRatio = SrcNumElts / DestNumElts;
19378 ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
19379 EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
19381 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19383 SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
19384 SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19385 V.getOperand(0), NewIndex);
19386 return DAG.getBitcast(NVT, NewExtract);
19389 if ((DestNumElts % SrcNumElts) == 0) {
19390 unsigned DestSrcRatio = DestNumElts / SrcNumElts;
19391 if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) {
19392 ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio;
19393 EVT ScalarVT = SrcVT.getScalarType();
19394 if ((ExtIdx % DestSrcRatio) == 0) {
19396 unsigned IndexValScaled = ExtIdx / DestSrcRatio;
19398 EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
19399 if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19400 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19401 SDValue NewExtract =
19402 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19403 V.getOperand(0), NewIndex);
19404 return DAG.getBitcast(NVT, NewExtract);
19406 if (NewExtEC == 1 &&
19407 TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
19408 SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19409 SDValue NewExtract =
19410 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
19411 V.getOperand(0), NewIndex);
19412 return DAG.getBitcast(NVT, NewExtract);
19419 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
19420 unsigned ExtNumElts = NVT.getVectorMinNumElements();
19421 EVT ConcatSrcVT = V.getOperand(0).getValueType();
19422 assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
19423 "Concat and extract subvector do not change element type");
19424 assert((ExtIdx % ExtNumElts) == 0 &&
19425 "Extract index is not a multiple of the input vector length.");
19427 unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
19428 unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
19430 // If the concatenated source types match this extract, it's a direct
19432 // extract_subvec (concat V1, V2, ...), i --> Vi
19433 if (ConcatSrcNumElts == ExtNumElts)
19434 return V.getOperand(ConcatOpIdx);
19436 // If the concatenated source vectors are a multiple length of this extract,
19437 // then extract a fraction of one of those source vectors directly from a
19438 // concat operand. Example:
19439 // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
19440 // v2i8 extract_subvec v8i8 Y, 6
19441 if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
19443 unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
19444 assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
19445 "Trying to extract from >1 concat operand?");
19446 assert(NewExtIdx % ExtNumElts == 0 &&
19447 "Extract index is not a multiple of the input vector length.");
19448 SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
19449 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
19450 V.getOperand(ConcatOpIdx), NewIndexC);
19454 V = peekThroughBitcasts(V);
19456 // If the input is a build vector. Try to make a smaller build vector.
19457 if (V.getOpcode() == ISD::BUILD_VECTOR) {
19458 EVT InVT = V.getValueType();
19459 unsigned ExtractSize = NVT.getSizeInBits();
19460 unsigned EltSize = InVT.getScalarSizeInBits();
19461 // Only do this if we won't split any elements.
19462 if (ExtractSize % EltSize == 0) {
19463 unsigned NumElems = ExtractSize / EltSize;
19464 EVT EltVT = InVT.getVectorElementType();
19466 NumElems == 1 ? EltVT
19467 : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
19468 if ((Level < AfterLegalizeDAG ||
19470 TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
19471 (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
19472 unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
19474 if (NumElems == 1) {
19475 SDValue Src = V->getOperand(IdxVal);
19476 if (EltVT != Src.getValueType())
19477 Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
19478 return DAG.getBitcast(NVT, Src);
19481 // Extract the pieces from the original build_vector.
19482 SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
19483 V->ops().slice(IdxVal, NumElems));
19484 return DAG.getBitcast(NVT, BuildVec);
19489 if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
19490 // Handle only simple case where vector being inserted and vector
19491 // being extracted are of same size.
19492 EVT SmallVT = V.getOperand(1).getValueType();
19493 if (!NVT.bitsEq(SmallVT))
19497 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
19499 // indices are equal or bit offsets are equal => V1
19500 // otherwise => (extract_subvec V1, ExtIdx)
19501 uint64_t InsIdx = V.getConstantOperandVal(2);
19502 if (InsIdx * SmallVT.getScalarSizeInBits() ==
19503 ExtIdx * NVT.getScalarSizeInBits())
19504 return DAG.getBitcast(NVT, V.getOperand(1));
19505 return DAG.getNode(
19506 ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
19507 DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
19511 if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
19514 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19515 return SDValue(N, 0);
19520 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
19521 /// followed by concatenation. Narrow vector ops may have better performance
19522 /// than wide ops, and this can unlock further narrowing of other vector ops.
19523 /// Targets can invert this transform later if it is not profitable.
19524 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
19525 SelectionDAG &DAG) {
19526 SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
19527 if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
19528 N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
19529 !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
19532 // Split the wide shuffle mask into halves. Any mask element that is accessing
19533 // operand 1 is offset down to account for narrowing of the vectors.
19534 ArrayRef<int> Mask = Shuf->getMask();
19535 EVT VT = Shuf->getValueType(0);
19536 unsigned NumElts = VT.getVectorNumElements();
19537 unsigned HalfNumElts = NumElts / 2;
19538 SmallVector<int, 16> Mask0(HalfNumElts, -1);
19539 SmallVector<int, 16> Mask1(HalfNumElts, -1);
19540 for (unsigned i = 0; i != NumElts; ++i) {
19543 int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
19544 if (i < HalfNumElts)
19547 Mask1[i - HalfNumElts] = M;
19550 // Ask the target if this is a valid transform.
19551 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19552 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
19554 if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
19555 !TLI.isShuffleMaskLegal(Mask1, HalfVT))
19558 // shuffle (concat X, undef), (concat Y, undef), Mask -->
19559 // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
19560 SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
19562 SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
19563 SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
19564 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
19567 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
19568 // or turn a shuffle of a single concat into simpler shuffle then concat.
19569 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
19570 EVT VT = N->getValueType(0);
19571 unsigned NumElts = VT.getVectorNumElements();
19573 SDValue N0 = N->getOperand(0);
19574 SDValue N1 = N->getOperand(1);
19575 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
19576 ArrayRef<int> Mask = SVN->getMask();
19578 SmallVector<SDValue, 4> Ops;
19579 EVT ConcatVT = N0.getOperand(0).getValueType();
19580 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
19581 unsigned NumConcats = NumElts / NumElemsPerConcat;
19583 auto IsUndefMaskElt = [](int i) { return i == -1; };
19585 // Special case: shuffle(concat(A,B)) can be more efficiently represented
19586 // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
19587 // half vector elements.
19588 if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
19589 llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
19591 N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
19593 Mask.slice(0, NumElemsPerConcat));
19594 N1 = DAG.getUNDEF(ConcatVT);
19595 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
19598 // Look at every vector that's inserted. We're looking for exact
19599 // subvector-sized copies from a concatenated vector
19600 for (unsigned I = 0; I != NumConcats; ++I) {
19601 unsigned Begin = I * NumElemsPerConcat;
19602 ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
19604 // Make sure we're dealing with a copy.
19605 if (llvm::all_of(SubMask, IsUndefMaskElt)) {
19606 Ops.push_back(DAG.getUNDEF(ConcatVT));
19611 for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
19612 if (IsUndefMaskElt(SubMask[i]))
19614 if ((SubMask[i] % (int)NumElemsPerConcat) != i)
19616 int EltOpIdx = SubMask[i] / NumElemsPerConcat;
19617 if (0 <= OpIdx && EltOpIdx != OpIdx)
19621 assert(0 <= OpIdx && "Unknown concat_vectors op");
19623 if (OpIdx < (int)N0.getNumOperands())
19624 Ops.push_back(N0.getOperand(OpIdx));
19626 Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
19629 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19632 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19633 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19635 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
19636 // a simplification in some sense, but it isn't appropriate in general: some
19637 // BUILD_VECTORs are substantially cheaper than others. The general case
19638 // of a BUILD_VECTOR requires inserting each element individually (or
19639 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
19640 // all constants is a single constant pool load. A BUILD_VECTOR where each
19641 // element is identical is a splat. A BUILD_VECTOR where most of the operands
19642 // are undef lowers to a small number of element insertions.
19644 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
19645 // We don't fold shuffles where one side is a non-zero constant, and we don't
19646 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
19647 // non-constant operands. This seems to work out reasonably well in practice.
19648 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
19650 const TargetLowering &TLI) {
19651 EVT VT = SVN->getValueType(0);
19652 unsigned NumElts = VT.getVectorNumElements();
19653 SDValue N0 = SVN->getOperand(0);
19654 SDValue N1 = SVN->getOperand(1);
19656 if (!N0->hasOneUse())
19659 // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
19660 // discussed above.
19661 if (!N1.isUndef()) {
19662 if (!N1->hasOneUse())
19665 bool N0AnyConst = isAnyConstantBuildVector(N0);
19666 bool N1AnyConst = isAnyConstantBuildVector(N1);
19667 if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
19669 if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
19673 // If both inputs are splats of the same value then we can safely merge this
19674 // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
19675 bool IsSplat = false;
19676 auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
19677 auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
19679 if (SDValue Splat0 = BV0->getSplatValue())
19680 IsSplat = (Splat0 == BV1->getSplatValue());
19682 SmallVector<SDValue, 8> Ops;
19683 SmallSet<SDValue, 16> DuplicateOps;
19684 for (int M : SVN->getMask()) {
19685 SDValue Op = DAG.getUNDEF(VT.getScalarType());
19687 int Idx = M < (int)NumElts ? M : M - NumElts;
19688 SDValue &S = (M < (int)NumElts ? N0 : N1);
19689 if (S.getOpcode() == ISD::BUILD_VECTOR) {
19690 Op = S.getOperand(Idx);
19691 } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19692 SDValue Op0 = S.getOperand(0);
19693 Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
19695 // Operand can't be combined - bail out.
19700 // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
19701 // generating a splat; semantically, this is fine, but it's likely to
19702 // generate low-quality code if the target can't reconstruct an appropriate
19704 if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
19705 if (!IsSplat && !DuplicateOps.insert(Op).second)
19711 // BUILD_VECTOR requires all inputs to be of the same type, find the
19712 // maximum type and extend them all.
19713 EVT SVT = VT.getScalarType();
19714 if (SVT.isInteger())
19715 for (SDValue &Op : Ops)
19716 SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
19717 if (SVT != VT.getScalarType())
19718 for (SDValue &Op : Ops)
19719 Op = TLI.isZExtFree(Op.getValueType(), SVT)
19720 ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
19721 : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
19722 return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
19725 // Match shuffles that can be converted to any_vector_extend_in_reg.
19726 // This is often generated during legalization.
19727 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
19728 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
19729 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
19731 const TargetLowering &TLI,
19732 bool LegalOperations) {
19733 EVT VT = SVN->getValueType(0);
19734 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19736 // TODO Add support for big-endian when we have a test case.
19737 if (!VT.isInteger() || IsBigEndian)
19740 unsigned NumElts = VT.getVectorNumElements();
19741 unsigned EltSizeInBits = VT.getScalarSizeInBits();
19742 ArrayRef<int> Mask = SVN->getMask();
19743 SDValue N0 = SVN->getOperand(0);
19745 // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
19746 auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
19747 for (unsigned i = 0; i != NumElts; ++i) {
19750 if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
19757 // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
19758 // power-of-2 extensions as they are the most likely.
19759 for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
19760 // Check for non power of 2 vector sizes
19761 if (NumElts % Scale != 0)
19763 if (!isAnyExtend(Scale))
19766 EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
19767 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
19768 // Never create an illegal type. Only create unsupported operations if we
19769 // are pre-legalization.
19770 if (TLI.isTypeLegal(OutVT))
19771 if (!LegalOperations ||
19772 TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
19773 return DAG.getBitcast(VT,
19774 DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
19775 SDLoc(SVN), OutVT, N0));
19781 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
19782 // each source element of a large type into the lowest elements of a smaller
19783 // destination type. This is often generated during legalization.
19784 // If the source node itself was a '*_extend_vector_inreg' node then we should
19785 // then be able to remove it.
19786 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
19787 SelectionDAG &DAG) {
19788 EVT VT = SVN->getValueType(0);
19789 bool IsBigEndian = DAG.getDataLayout().isBigEndian();
19791 // TODO Add support for big-endian when we have a test case.
19792 if (!VT.isInteger() || IsBigEndian)
19795 SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
19797 unsigned Opcode = N0.getOpcode();
19798 if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
19799 Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
19800 Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
19803 SDValue N00 = N0.getOperand(0);
19804 ArrayRef<int> Mask = SVN->getMask();
19805 unsigned NumElts = VT.getVectorNumElements();
19806 unsigned EltSizeInBits = VT.getScalarSizeInBits();
19807 unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
19808 unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
19810 if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
19812 unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
19814 // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
19815 // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
19816 // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
19817 auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
19818 for (unsigned i = 0; i != NumElts; ++i) {
19821 if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
19828 // At the moment we just handle the case where we've truncated back to the
19829 // same size as before the extension.
19830 // TODO: handle more extension/truncation cases as cases arise.
19831 if (EltSizeInBits != ExtSrcSizeInBits)
19834 // We can remove *extend_vector_inreg only if the truncation happens at
19835 // the same scale as the extension.
19836 if (isTruncate(ExtScale))
19837 return DAG.getBitcast(VT, N00);
19842 // Combine shuffles of splat-shuffles of the form:
19843 // shuffle (shuffle V, undef, splat-mask), undef, M
19844 // If splat-mask contains undef elements, we need to be careful about
19845 // introducing undef's in the folded mask which are not the result of composing
19846 // the masks of the shuffles.
19847 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
19848 SelectionDAG &DAG) {
19849 if (!Shuf->getOperand(1).isUndef())
19851 auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
19852 if (!Splat || !Splat->isSplat())
19855 ArrayRef<int> ShufMask = Shuf->getMask();
19856 ArrayRef<int> SplatMask = Splat->getMask();
19857 assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
19859 // Prefer simplifying to the splat-shuffle, if possible. This is legal if
19860 // every undef mask element in the splat-shuffle has a corresponding undef
19861 // element in the user-shuffle's mask or if the composition of mask elements
19862 // would result in undef.
19863 // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
19864 // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
19865 // In this case it is not legal to simplify to the splat-shuffle because we
19866 // may be exposing the users of the shuffle an undef element at index 1
19867 // which was not there before the combine.
19868 // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
19869 // In this case the composition of masks yields SplatMask, so it's ok to
19870 // simplify to the splat-shuffle.
19871 // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
19872 // In this case the composed mask includes all undef elements of SplatMask
19873 // and in addition sets element zero to undef. It is safe to simplify to
19874 // the splat-shuffle.
19875 auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
19876 ArrayRef<int> SplatMask) {
19877 for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
19878 if (UserMask[i] != -1 && SplatMask[i] == -1 &&
19879 SplatMask[UserMask[i]] != -1)
19883 if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
19884 return Shuf->getOperand(0);
19886 // Create a new shuffle with a mask that is composed of the two shuffles'
19888 SmallVector<int, 32> NewMask;
19889 for (int Idx : ShufMask)
19890 NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
19892 return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
19893 Splat->getOperand(0), Splat->getOperand(1),
19897 /// Combine shuffle of shuffle of the form:
19898 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
19899 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
19900 SelectionDAG &DAG) {
19901 if (!OuterShuf->getOperand(1).isUndef())
19903 auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
19904 if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
19907 ArrayRef<int> OuterMask = OuterShuf->getMask();
19908 ArrayRef<int> InnerMask = InnerShuf->getMask();
19909 unsigned NumElts = OuterMask.size();
19910 assert(NumElts == InnerMask.size() && "Mask length mismatch");
19911 SmallVector<int, 32> CombinedMask(NumElts, -1);
19912 int SplatIndex = -1;
19913 for (unsigned i = 0; i != NumElts; ++i) {
19914 // Undef lanes remain undef.
19915 int OuterMaskElt = OuterMask[i];
19916 if (OuterMaskElt == -1)
19919 // Peek through the shuffle masks to get the underlying source element.
19920 int InnerMaskElt = InnerMask[OuterMaskElt];
19921 if (InnerMaskElt == -1)
19924 // Initialize the splatted element.
19925 if (SplatIndex == -1)
19926 SplatIndex = InnerMaskElt;
19928 // Non-matching index - this is not a splat.
19929 if (SplatIndex != InnerMaskElt)
19932 CombinedMask[i] = InnerMaskElt;
19934 assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
19935 getSplatIndex(CombinedMask) != -1) &&
19936 "Expected a splat mask");
19938 // TODO: The transform may be a win even if the mask is not legal.
19939 EVT VT = OuterShuf->getValueType(0);
19940 assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
19941 if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
19944 return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
19945 InnerShuf->getOperand(1), CombinedMask);
19948 /// If the shuffle mask is taking exactly one element from the first vector
19949 /// operand and passing through all other elements from the second vector
19950 /// operand, return the index of the mask element that is choosing an element
19951 /// from the first operand. Otherwise, return -1.
19952 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
19953 int MaskSize = Mask.size();
19954 int EltFromOp0 = -1;
19955 // TODO: This does not match if there are undef elements in the shuffle mask.
19956 // Should we ignore undefs in the shuffle mask instead? The trade-off is
19957 // removing an instruction (a shuffle), but losing the knowledge that some
19958 // vector lanes are not needed.
19959 for (int i = 0; i != MaskSize; ++i) {
19960 if (Mask[i] >= 0 && Mask[i] < MaskSize) {
19961 // We're looking for a shuffle of exactly one element from operand 0.
19962 if (EltFromOp0 != -1)
19965 } else if (Mask[i] != i + MaskSize) {
19966 // Nothing from operand 1 can change lanes.
19973 /// If a shuffle inserts exactly one element from a source vector operand into
19974 /// another vector operand and we can access the specified element as a scalar,
19975 /// then we can eliminate the shuffle.
19976 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
19977 SelectionDAG &DAG) {
19978 // First, check if we are taking one element of a vector and shuffling that
19979 // element into another vector.
19980 ArrayRef<int> Mask = Shuf->getMask();
19981 SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
19982 SDValue Op0 = Shuf->getOperand(0);
19983 SDValue Op1 = Shuf->getOperand(1);
19984 int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
19985 if (ShufOp0Index == -1) {
19986 // Commute mask and check again.
19987 ShuffleVectorSDNode::commuteMask(CommutedMask);
19988 ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
19989 if (ShufOp0Index == -1)
19991 // Commute operands to match the commuted shuffle mask.
19992 std::swap(Op0, Op1);
19993 Mask = CommutedMask;
19996 // The shuffle inserts exactly one element from operand 0 into operand 1.
19997 // Now see if we can access that element as a scalar via a real insert element
19999 // TODO: We can try harder to locate the element as a scalar. Examples: it
20000 // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
20001 assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
20002 "Shuffle mask value must be from operand 0");
20003 if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20006 auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20007 if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20010 // There's an existing insertelement with constant insertion index, so we
20011 // don't need to check the legality/profitability of a replacement operation
20012 // that differs at most in the constant value. The target should be able to
20013 // lower any of those in a similar way. If not, legalization will expand this
20014 // to a scalar-to-vector plus shuffle.
20016 // Note that the shuffle may move the scalar from the position that the insert
20017 // element used. Therefore, our new insert element occurs at the shuffle's
20018 // mask index value, not the insert's index value.
20019 // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20020 SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20021 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20022 Op1, Op0.getOperand(1), NewInsIndex);
20025 /// If we have a unary shuffle of a shuffle, see if it can be folded away
20026 /// completely. This has the potential to lose undef knowledge because the first
20027 /// shuffle may not have an undef mask element where the second one does. So
20028 /// only call this after doing simplifications based on demanded elements.
20029 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20030 // shuf (shuf0 X, Y, Mask0), undef, Mask
20031 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20032 if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20035 ArrayRef<int> Mask = Shuf->getMask();
20036 ArrayRef<int> Mask0 = Shuf0->getMask();
20037 for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20038 // Ignore undef elements.
20041 assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
20043 // Is the element of the shuffle operand chosen by this shuffle the same as
20044 // the element chosen by the shuffle operand itself?
20045 if (Mask0[Mask[i]] != Mask0[i])
20048 // Every element of this shuffle is identical to the result of the previous
20049 // shuffle, so we can replace this value.
20050 return Shuf->getOperand(0);
20053 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20054 EVT VT = N->getValueType(0);
20055 unsigned NumElts = VT.getVectorNumElements();
20057 SDValue N0 = N->getOperand(0);
20058 SDValue N1 = N->getOperand(1);
20060 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
20062 // Canonicalize shuffle undef, undef -> undef
20063 if (N0.isUndef() && N1.isUndef())
20064 return DAG.getUNDEF(VT);
20066 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20068 // Canonicalize shuffle v, v -> v, undef
20070 SmallVector<int, 8> NewMask;
20071 for (unsigned i = 0; i != NumElts; ++i) {
20072 int Idx = SVN->getMaskElt(i);
20073 if (Idx >= (int)NumElts) Idx -= NumElts;
20074 NewMask.push_back(Idx);
20076 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20079 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
20081 return DAG.getCommutedVectorShuffle(*SVN);
20083 // Remove references to rhs if it is undef
20084 if (N1.isUndef()) {
20085 bool Changed = false;
20086 SmallVector<int, 8> NewMask;
20087 for (unsigned i = 0; i != NumElts; ++i) {
20088 int Idx = SVN->getMaskElt(i);
20089 if (Idx >= (int)NumElts) {
20093 NewMask.push_back(Idx);
20096 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20099 if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20102 // A shuffle of a single vector that is a splatted value can always be folded.
20103 if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20106 if (SDValue V = formSplatFromShuffles(SVN, DAG))
20109 // If it is a splat, check if the argument vector is another splat or a
20111 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20112 int SplatIndex = SVN->getSplatIndex();
20113 if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20114 TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20115 // splat (vector_bo L, R), Index -->
20116 // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20117 SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20119 EVT EltVT = VT.getScalarType();
20120 SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20121 SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20122 SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20123 SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20124 N0.getNode()->getFlags());
20125 SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20126 SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20127 return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20130 // If this is a bit convert that changes the element type of the vector but
20131 // not the number of vector elements, look through it. Be careful not to
20132 // look though conversions that change things like v4f32 to v2f64.
20133 SDNode *V = N0.getNode();
20134 if (V->getOpcode() == ISD::BITCAST) {
20135 SDValue ConvInput = V->getOperand(0);
20136 if (ConvInput.getValueType().isVector() &&
20137 ConvInput.getValueType().getVectorNumElements() == NumElts)
20138 V = ConvInput.getNode();
20141 if (V->getOpcode() == ISD::BUILD_VECTOR) {
20142 assert(V->getNumOperands() == NumElts &&
20143 "BUILD_VECTOR has wrong number of operands");
20145 bool AllSame = true;
20146 for (unsigned i = 0; i != NumElts; ++i) {
20147 if (!V->getOperand(i).isUndef()) {
20148 Base = V->getOperand(i);
20152 // Splat of <u, u, u, u>, return <u, u, u, u>
20153 if (!Base.getNode())
20155 for (unsigned i = 0; i != NumElts; ++i) {
20156 if (V->getOperand(i) != Base) {
20161 // Splat of <x, x, x, x>, return <x, x, x, x>
20165 // Canonicalize any other splat as a build_vector.
20166 SDValue Splatted = V->getOperand(SplatIndex);
20167 SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20168 SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20170 // We may have jumped through bitcasts, so the type of the
20171 // BUILD_VECTOR may not match the type of the shuffle.
20172 if (V->getValueType(0) != VT)
20173 NewBV = DAG.getBitcast(VT, NewBV);
20178 // Simplify source operands based on shuffle mask.
20179 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20180 return SDValue(N, 0);
20182 // This is intentionally placed after demanded elements simplification because
20183 // it could eliminate knowledge of undef elements created by this shuffle.
20184 if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20187 // Match shuffles that can be converted to any_vector_extend_in_reg.
20188 if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20191 // Combine "truncate_vector_in_reg" style shuffles.
20192 if (SDValue V = combineTruncationShuffle(SVN, DAG))
20195 if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20196 Level < AfterLegalizeVectorOps &&
20198 (N1.getOpcode() == ISD::CONCAT_VECTORS &&
20199 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20200 if (SDValue V = partitionShuffleOfConcats(N, DAG))
20204 // A shuffle of a concat of the same narrow vector can be reduced to use
20205 // only low-half elements of a concat with undef:
20206 // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20207 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20208 N0.getNumOperands() == 2 &&
20209 N0.getOperand(0) == N0.getOperand(1)) {
20210 int HalfNumElts = (int)NumElts / 2;
20211 SmallVector<int, 8> NewMask;
20212 for (unsigned i = 0; i != NumElts; ++i) {
20213 int Idx = SVN->getMaskElt(i);
20214 if (Idx >= HalfNumElts) {
20215 assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
20216 Idx -= HalfNumElts;
20218 NewMask.push_back(Idx);
20220 if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20221 SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20222 SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20223 N0.getOperand(0), UndefVec);
20224 return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20228 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20229 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20230 if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20231 if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20234 // If this shuffle only has a single input that is a bitcasted shuffle,
20235 // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20236 // back to their original types.
20237 if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20238 N1.isUndef() && Level < AfterLegalizeVectorOps &&
20239 TLI.isTypeLegal(VT)) {
20241 SDValue BC0 = peekThroughOneUseBitcasts(N0);
20242 if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
20243 EVT SVT = VT.getScalarType();
20244 EVT InnerVT = BC0->getValueType(0);
20245 EVT InnerSVT = InnerVT.getScalarType();
20247 // Determine which shuffle works with the smaller scalar type.
20248 EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
20249 EVT ScaleSVT = ScaleVT.getScalarType();
20251 if (TLI.isTypeLegal(ScaleVT) &&
20252 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
20253 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
20254 int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20255 int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20257 // Scale the shuffle masks to the smaller scalar type.
20258 ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
20259 SmallVector<int, 8> InnerMask;
20260 SmallVector<int, 8> OuterMask;
20261 narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
20262 narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
20264 // Merge the shuffle masks.
20265 SmallVector<int, 8> NewMask;
20266 for (int M : OuterMask)
20267 NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
20269 // Test for shuffle mask legality over both commutations.
20270 SDValue SV0 = BC0->getOperand(0);
20271 SDValue SV1 = BC0->getOperand(1);
20272 bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20274 std::swap(SV0, SV1);
20275 ShuffleVectorSDNode::commuteMask(NewMask);
20276 LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20280 SV0 = DAG.getBitcast(ScaleVT, SV0);
20281 SV1 = DAG.getBitcast(ScaleVT, SV1);
20282 return DAG.getBitcast(
20283 VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
20289 // Canonicalize shuffles according to rules:
20290 // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20291 // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20292 // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20293 if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20294 N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
20295 TLI.isTypeLegal(VT)) {
20296 // The incoming shuffle must be of the same type as the result of the
20297 // current shuffle.
20298 assert(N1->getOperand(0).getValueType() == VT &&
20299 "Shuffle types don't match");
20301 SDValue SV0 = N1->getOperand(0);
20302 SDValue SV1 = N1->getOperand(1);
20303 bool HasSameOp0 = N0 == SV0;
20304 bool IsSV1Undef = SV1.isUndef();
20305 if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20306 // Commute the operands of this shuffle so that next rule
20308 return DAG.getCommutedVectorShuffle(*SVN);
20311 // Try to fold according to rules:
20312 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20313 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20314 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20315 // Don't try to fold shuffles with illegal type.
20316 // Only fold if this shuffle is the only user of the other shuffle.
20317 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
20318 Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
20319 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
20321 // Don't try to fold splats; they're likely to simplify somehow, or they
20323 if (OtherSV->isSplat())
20326 // The incoming shuffle must be of the same type as the result of the
20327 // current shuffle.
20328 assert(OtherSV->getOperand(0).getValueType() == VT &&
20329 "Shuffle types don't match");
20332 SmallVector<int, 4> Mask;
20333 // Compute the combined shuffle mask for a shuffle with SV0 as the first
20334 // operand, and SV1 as the second operand.
20335 for (unsigned i = 0; i != NumElts; ++i) {
20336 int Idx = SVN->getMaskElt(i);
20338 // Propagate Undef.
20339 Mask.push_back(Idx);
20343 SDValue CurrentVec;
20344 if (Idx < (int)NumElts) {
20345 // This shuffle index refers to the inner shuffle N0. Lookup the inner
20346 // shuffle mask to identify which vector is actually referenced.
20347 Idx = OtherSV->getMaskElt(Idx);
20349 // Propagate Undef.
20350 Mask.push_back(Idx);
20354 CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
20355 : OtherSV->getOperand(1);
20357 // This shuffle index references an element within N1.
20361 // Simple case where 'CurrentVec' is UNDEF.
20362 if (CurrentVec.isUndef()) {
20363 Mask.push_back(-1);
20367 // Canonicalize the shuffle index. We don't know yet if CurrentVec
20368 // will be the first or second operand of the combined shuffle.
20369 Idx = Idx % NumElts;
20370 if (!SV0.getNode() || SV0 == CurrentVec) {
20371 // Ok. CurrentVec is the left hand side.
20372 // Update the mask accordingly.
20374 Mask.push_back(Idx);
20378 // Bail out if we cannot convert the shuffle pair into a single shuffle.
20379 if (SV1.getNode() && SV1 != CurrentVec)
20382 // Ok. CurrentVec is the right hand side.
20383 // Update the mask accordingly.
20385 Mask.push_back(Idx + NumElts);
20388 // Check if all indices in Mask are Undef. In case, propagate Undef.
20389 bool isUndefMask = true;
20390 for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
20391 isUndefMask &= Mask[i] < 0;
20394 return DAG.getUNDEF(VT);
20396 if (!SV0.getNode())
20397 SV0 = DAG.getUNDEF(VT);
20398 if (!SV1.getNode())
20399 SV1 = DAG.getUNDEF(VT);
20401 // Avoid introducing shuffles with illegal mask.
20402 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20403 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20404 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20405 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
20406 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
20407 // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
20408 return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
20411 if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
20417 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
20418 SDValue InVal = N->getOperand(0);
20419 EVT VT = N->getValueType(0);
20421 // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
20422 // with a VECTOR_SHUFFLE and possible truncate.
20423 if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20424 VT.isFixedLengthVector() &&
20425 InVal->getOperand(0).getValueType().isFixedLengthVector()) {
20426 SDValue InVec = InVal->getOperand(0);
20427 SDValue EltNo = InVal->getOperand(1);
20428 auto InVecT = InVec.getValueType();
20429 if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
20430 SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
20431 int Elt = C0->getZExtValue();
20433 // If we have an implict truncate do truncate here as long as it's legal.
20434 // if it's not legal, this should
20435 if (VT.getScalarType() != InVal.getValueType() &&
20436 InVal.getValueType().isScalarInteger() &&
20437 isTypeLegal(VT.getScalarType())) {
20439 DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
20440 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
20442 if (VT.getScalarType() == InVecT.getScalarType() &&
20443 VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
20444 SDValue LegalShuffle =
20445 TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
20446 DAG.getUNDEF(InVecT), NewMask, DAG);
20447 if (LegalShuffle) {
20448 // If the initial vector is the correct size this shuffle is a
20451 return LegalShuffle;
20452 // If not we must truncate the vector.
20453 if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
20454 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
20455 EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
20456 InVecT.getVectorElementType(),
20457 VT.getVectorNumElements());
20458 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
20459 LegalShuffle, ZeroIdx);
20469 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
20470 EVT VT = N->getValueType(0);
20471 SDValue N0 = N->getOperand(0);
20472 SDValue N1 = N->getOperand(1);
20473 SDValue N2 = N->getOperand(2);
20474 uint64_t InsIdx = N->getConstantOperandVal(2);
20476 // If inserting an UNDEF, just return the original vector.
20480 // If this is an insert of an extracted vector into an undef vector, we can
20481 // just use the input to the extract.
20482 if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20483 N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
20484 return N1.getOperand(0);
20486 // If we are inserting a bitcast value into an undef, with the same
20487 // number of elements, just use the bitcast input of the extract.
20488 // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
20489 // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
20490 if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
20491 N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20492 N1.getOperand(0).getOperand(1) == N2 &&
20493 N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
20494 VT.getVectorNumElements() &&
20495 N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
20496 VT.getSizeInBits()) {
20497 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
20500 // If both N1 and N2 are bitcast values on which insert_subvector
20501 // would makes sense, pull the bitcast through.
20502 // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
20503 // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
20504 if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
20505 SDValue CN0 = N0.getOperand(0);
20506 SDValue CN1 = N1.getOperand(0);
20507 EVT CN0VT = CN0.getValueType();
20508 EVT CN1VT = CN1.getValueType();
20509 if (CN0VT.isVector() && CN1VT.isVector() &&
20510 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
20511 CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
20512 SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
20513 CN0.getValueType(), CN0, CN1, N2);
20514 return DAG.getBitcast(VT, NewINSERT);
20518 // Combine INSERT_SUBVECTORs where we are inserting to the same index.
20519 // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
20520 // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
20521 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
20522 N0.getOperand(1).getValueType() == N1.getValueType() &&
20523 N0.getOperand(2) == N2)
20524 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
20527 // Eliminate an intermediate insert into an undef vector:
20528 // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
20529 // insert_subvector undef, X, N2
20530 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
20531 N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
20532 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
20533 N1.getOperand(1), N2);
20535 // Push subvector bitcasts to the output, adjusting the index as we go.
20536 // insert_subvector(bitcast(v), bitcast(s), c1)
20537 // -> bitcast(insert_subvector(v, s, c2))
20538 if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
20539 N1.getOpcode() == ISD::BITCAST) {
20540 SDValue N0Src = peekThroughBitcasts(N0);
20541 SDValue N1Src = peekThroughBitcasts(N1);
20542 EVT N0SrcSVT = N0Src.getValueType().getScalarType();
20543 EVT N1SrcSVT = N1Src.getValueType().getScalarType();
20544 if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
20545 N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
20549 LLVMContext &Ctx = *DAG.getContext();
20550 unsigned NumElts = VT.getVectorNumElements();
20551 unsigned EltSizeInBits = VT.getScalarSizeInBits();
20552 if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
20553 unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
20554 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
20555 NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
20556 } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
20557 unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
20558 if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
20559 NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
20560 NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
20563 if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
20564 SDValue Res = DAG.getBitcast(NewVT, N0Src);
20565 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
20566 return DAG.getBitcast(VT, Res);
20571 // Canonicalize insert_subvector dag nodes.
20573 // (insert_subvector (insert_subvector A, Idx0), Idx1)
20574 // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
20575 if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
20576 N1.getValueType() == N0.getOperand(1).getValueType()) {
20577 unsigned OtherIdx = N0.getConstantOperandVal(2);
20578 if (InsIdx < OtherIdx) {
20580 SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
20581 N0.getOperand(0), N1, N2);
20582 AddToWorklist(NewOp.getNode());
20583 return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
20584 VT, NewOp, N0.getOperand(1), N0.getOperand(2));
20588 // If the input vector is a concatenation, and the insert replaces
20589 // one of the pieces, we can optimize into a single concat_vectors.
20590 if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
20591 N0.getOperand(0).getValueType() == N1.getValueType()) {
20592 unsigned Factor = N1.getValueType().getVectorNumElements();
20593 SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
20594 Ops[InsIdx / Factor] = N1;
20595 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20598 // Simplify source operands based on insertion.
20599 if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20600 return SDValue(N, 0);
20605 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
20606 SDValue N0 = N->getOperand(0);
20608 // fold (fp_to_fp16 (fp16_to_fp op)) -> op
20609 if (N0->getOpcode() == ISD::FP16_TO_FP)
20610 return N0->getOperand(0);
20615 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
20616 SDValue N0 = N->getOperand(0);
20618 // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
20619 if (N0->getOpcode() == ISD::AND) {
20620 ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
20621 if (AndConst && AndConst->getAPIntValue() == 0xffff) {
20622 return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
20630 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
20631 SDValue N0 = N->getOperand(0);
20632 EVT VT = N0.getValueType();
20633 unsigned Opcode = N->getOpcode();
20635 // VECREDUCE over 1-element vector is just an extract.
20636 if (VT.getVectorNumElements() == 1) {
20639 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
20640 DAG.getVectorIdxConstant(0, dl));
20641 if (Res.getValueType() != N->getValueType(0))
20642 Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
20646 // On an boolean vector an and/or reduction is the same as a umin/umax
20647 // reduction. Convert them if the latter is legal while the former isn't.
20648 if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
20649 unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
20650 ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
20651 if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
20652 TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
20653 DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
20654 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
20660 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
20661 /// with the destination vector and a zero vector.
20662 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
20663 /// vector_shuffle V, Zero, <0, 4, 2, 4>
20664 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
20665 assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
20667 EVT VT = N->getValueType(0);
20668 SDValue LHS = N->getOperand(0);
20669 SDValue RHS = peekThroughBitcasts(N->getOperand(1));
20672 // Make sure we're not running after operation legalization where it
20673 // may have custom lowered the vector shuffles.
20674 if (LegalOperations)
20677 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
20680 EVT RVT = RHS.getValueType();
20681 unsigned NumElts = RHS.getNumOperands();
20683 // Attempt to create a valid clear mask, splitting the mask into
20684 // sub elements and checking to see if each is
20685 // all zeros or all ones - suitable for shuffle masking.
20686 auto BuildClearMask = [&](int Split) {
20687 int NumSubElts = NumElts * Split;
20688 int NumSubBits = RVT.getScalarSizeInBits() / Split;
20690 SmallVector<int, 8> Indices;
20691 for (int i = 0; i != NumSubElts; ++i) {
20692 int EltIdx = i / Split;
20693 int SubIdx = i % Split;
20694 SDValue Elt = RHS.getOperand(EltIdx);
20695 // X & undef --> 0 (not undef). So this lane must be converted to choose
20696 // from the zero constant vector (same as if the element had all 0-bits).
20697 if (Elt.isUndef()) {
20698 Indices.push_back(i + NumSubElts);
20703 if (isa<ConstantSDNode>(Elt))
20704 Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
20705 else if (isa<ConstantFPSDNode>(Elt))
20706 Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
20710 // Extract the sub element from the constant bit mask.
20711 if (DAG.getDataLayout().isBigEndian())
20712 Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
20714 Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
20716 if (Bits.isAllOnesValue())
20717 Indices.push_back(i);
20718 else if (Bits == 0)
20719 Indices.push_back(i + NumSubElts);
20724 // Let's see if the target supports this vector_shuffle.
20725 EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
20726 EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
20727 if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
20730 SDValue Zero = DAG.getConstant(0, DL, ClearVT);
20731 return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
20732 DAG.getBitcast(ClearVT, LHS),
20736 // Determine maximum split level (byte level masking).
20738 if (RVT.getScalarSizeInBits() % 8 == 0)
20739 MaxSplit = RVT.getScalarSizeInBits() / 8;
20741 for (int Split = 1; Split <= MaxSplit; ++Split)
20742 if (RVT.getScalarSizeInBits() % Split == 0)
20743 if (SDValue S = BuildClearMask(Split))
20749 /// If a vector binop is performed on splat values, it may be profitable to
20750 /// extract, scalarize, and insert/splat.
20751 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
20752 SDValue N0 = N->getOperand(0);
20753 SDValue N1 = N->getOperand(1);
20754 unsigned Opcode = N->getOpcode();
20755 EVT VT = N->getValueType(0);
20756 EVT EltVT = VT.getVectorElementType();
20757 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20759 // TODO: Remove/replace the extract cost check? If the elements are available
20760 // as scalars, then there may be no extract cost. Should we ask if
20761 // inserting a scalar back into a vector is cheap instead?
20762 int Index0, Index1;
20763 SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
20764 SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
20765 if (!Src0 || !Src1 || Index0 != Index1 ||
20766 Src0.getValueType().getVectorElementType() != EltVT ||
20767 Src1.getValueType().getVectorElementType() != EltVT ||
20768 !TLI.isExtractVecEltCheap(VT, Index0) ||
20769 !TLI.isOperationLegalOrCustom(Opcode, EltVT))
20773 SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
20774 SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
20775 SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
20776 SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
20778 // If all lanes but 1 are undefined, no need to splat the scalar result.
20779 // TODO: Keep track of undefs and use that info in the general case.
20780 if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
20781 count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
20782 count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
20783 // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
20784 // build_vec ..undef, (bo X, Y), undef...
20785 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
20786 Ops[Index0] = ScalarBO;
20787 return DAG.getBuildVector(VT, DL, Ops);
20790 // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
20791 SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
20792 return DAG.getBuildVector(VT, DL, Ops);
20795 /// Visit a binary vector operation, like ADD.
20796 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
20797 assert(N->getValueType(0).isVector() &&
20798 "SimplifyVBinOp only works on vectors!");
20800 SDValue LHS = N->getOperand(0);
20801 SDValue RHS = N->getOperand(1);
20802 SDValue Ops[] = {LHS, RHS};
20803 EVT VT = N->getValueType(0);
20804 unsigned Opcode = N->getOpcode();
20805 SDNodeFlags Flags = N->getFlags();
20807 // See if we can constant fold the vector operation.
20808 if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
20809 Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
20812 // Move unary shuffles with identical masks after a vector binop:
20813 // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
20814 // --> shuffle (VBinOp A, B), Undef, Mask
20815 // This does not require type legality checks because we are creating the
20816 // same types of operations that are in the original sequence. We do have to
20817 // restrict ops like integer div that have immediate UB (eg, div-by-zero)
20818 // though. This code is adapted from the identical transform in instcombine.
20819 if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
20820 Opcode != ISD::UREM && Opcode != ISD::SREM &&
20821 Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
20822 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
20823 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
20824 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
20825 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
20826 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
20828 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
20829 RHS.getOperand(0), Flags);
20830 SDValue UndefV = LHS.getOperand(1);
20831 return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
20834 // Try to sink a splat shuffle after a binop with a uniform constant.
20835 // This is limited to cases where neither the shuffle nor the constant have
20836 // undefined elements because that could be poison-unsafe or inhibit
20837 // demanded elements analysis. It is further limited to not change a splat
20838 // of an inserted scalar because that may be optimized better by
20839 // load-folding or other target-specific behaviors.
20840 if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
20841 Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
20842 Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20843 // binop (splat X), (splat C) --> splat (binop X, C)
20845 SDValue X = Shuf0->getOperand(0);
20846 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
20847 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20850 if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
20851 Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
20852 Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
20853 // binop (splat C), (splat X) --> splat (binop C, X)
20855 SDValue X = Shuf1->getOperand(0);
20856 SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
20857 return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
20862 // The following pattern is likely to emerge with vector reduction ops. Moving
20863 // the binary operation ahead of insertion may allow using a narrower vector
20864 // instruction that has better performance than the wide version of the op:
20865 // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
20866 if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
20867 RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
20868 LHS.getOperand(2) == RHS.getOperand(2) &&
20869 (LHS.hasOneUse() || RHS.hasOneUse())) {
20870 SDValue X = LHS.getOperand(1);
20871 SDValue Y = RHS.getOperand(1);
20872 SDValue Z = LHS.getOperand(2);
20873 EVT NarrowVT = X.getValueType();
20874 if (NarrowVT == Y.getValueType() &&
20875 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20876 // (binop undef, undef) may not return undef, so compute that result.
20879 DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
20880 SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
20881 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
20885 // Make sure all but the first op are undef or constant.
20886 auto ConcatWithConstantOrUndef = [](SDValue Concat) {
20887 return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
20888 std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
20889 [](const SDValue &Op) {
20890 return Op.isUndef() ||
20891 ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
20895 // The following pattern is likely to emerge with vector reduction ops. Moving
20896 // the binary operation ahead of the concat may allow using a narrower vector
20897 // instruction that has better performance than the wide version of the op:
20898 // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
20899 // concat (VBinOp X, Y), VecC
20900 if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
20901 (LHS.hasOneUse() || RHS.hasOneUse())) {
20902 EVT NarrowVT = LHS.getOperand(0).getValueType();
20903 if (NarrowVT == RHS.getOperand(0).getValueType() &&
20904 TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
20906 unsigned NumOperands = LHS.getNumOperands();
20907 SmallVector<SDValue, 4> ConcatOps;
20908 for (unsigned i = 0; i != NumOperands; ++i) {
20909 // This constant fold for operands 1 and up.
20910 ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
20911 RHS.getOperand(i)));
20914 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20918 if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
20924 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
20926 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
20928 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
20929 cast<CondCodeSDNode>(N0.getOperand(2))->get());
20931 // If we got a simplified select_cc node back from SimplifySelectCC, then
20932 // break it down into a new SETCC node, and a new SELECT node, and then return
20933 // the SELECT node, since we were called with a SELECT node.
20934 if (SCC.getNode()) {
20935 // Check to see if we got a select_cc back (to turn into setcc/select).
20936 // Otherwise, just return whatever node we got back, like fabs.
20937 if (SCC.getOpcode() == ISD::SELECT_CC) {
20938 const SDNodeFlags Flags = N0.getNode()->getFlags();
20939 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
20941 SCC.getOperand(0), SCC.getOperand(1),
20942 SCC.getOperand(4), Flags);
20943 AddToWorklist(SETCC.getNode());
20944 SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
20945 SCC.getOperand(2), SCC.getOperand(3));
20946 SelectNode->setFlags(Flags);
20955 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
20956 /// being selected between, see if we can simplify the select. Callers of this
20957 /// should assume that TheSelect is deleted if this returns true. As such, they
20958 /// should return the appropriate thing (e.g. the node) back to the top-level of
20959 /// the DAG combiner loop to avoid it being looked at.
20960 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
20962 // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20963 // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
20964 if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
20965 if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
20966 // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
20967 SDValue Sqrt = RHS;
20970 const ConstantFPSDNode *Zero = nullptr;
20972 if (TheSelect->getOpcode() == ISD::SELECT_CC) {
20973 CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
20974 CmpLHS = TheSelect->getOperand(0);
20975 Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
20977 // SELECT or VSELECT
20978 SDValue Cmp = TheSelect->getOperand(0);
20979 if (Cmp.getOpcode() == ISD::SETCC) {
20980 CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
20981 CmpLHS = Cmp.getOperand(0);
20982 Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
20985 if (Zero && Zero->isZero() &&
20986 Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
20987 CC == ISD::SETULT || CC == ISD::SETLT)) {
20988 // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
20989 CombineTo(TheSelect, Sqrt);
20994 // Cannot simplify select with vector condition
20995 if (TheSelect->getOperand(0).getValueType().isVector()) return false;
20997 // If this is a select from two identical things, try to pull the operation
20998 // through the select.
20999 if (LHS.getOpcode() != RHS.getOpcode() ||
21000 !LHS.hasOneUse() || !RHS.hasOneUse())
21003 // If this is a load and the token chain is identical, replace the select
21004 // of two loads with a load through a select of the address to load from.
21005 // This triggers in things like "select bool X, 10.0, 123.0" after the FP
21006 // constants have been dropped into the constant pool.
21007 if (LHS.getOpcode() == ISD::LOAD) {
21008 LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21009 LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21011 // Token chains must be identical.
21012 if (LHS.getOperand(0) != RHS.getOperand(0) ||
21013 // Do not let this transformation reduce the number of volatile loads.
21014 // Be conservative for atomics for the moment
21015 // TODO: This does appear to be legal for unordered atomics (see D66309)
21016 !LLD->isSimple() || !RLD->isSimple() ||
21017 // FIXME: If either is a pre/post inc/dec load,
21018 // we'd need to split out the address adjustment.
21019 LLD->isIndexed() || RLD->isIndexed() ||
21020 // If this is an EXTLOAD, the VT's must match.
21021 LLD->getMemoryVT() != RLD->getMemoryVT() ||
21022 // If this is an EXTLOAD, the kind of extension must match.
21023 (LLD->getExtensionType() != RLD->getExtensionType() &&
21024 // The only exception is if one of the extensions is anyext.
21025 LLD->getExtensionType() != ISD::EXTLOAD &&
21026 RLD->getExtensionType() != ISD::EXTLOAD) ||
21027 // FIXME: this discards src value information. This is
21028 // over-conservative. It would be beneficial to be able to remember
21029 // both potential memory locations. Since we are discarding
21030 // src value info, don't do the transformation if the memory
21031 // locations are not in the default address space.
21032 LLD->getPointerInfo().getAddrSpace() != 0 ||
21033 RLD->getPointerInfo().getAddrSpace() != 0 ||
21034 // We can't produce a CMOV of a TargetFrameIndex since we won't
21035 // generate the address generation required.
21036 LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21037 RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21038 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21039 LLD->getBasePtr().getValueType()))
21042 // The loads must not depend on one another.
21043 if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
21046 // Check that the select condition doesn't reach either load. If so,
21047 // folding this will induce a cycle into the DAG. If not, this is safe to
21048 // xform, so create a select of the addresses.
21050 SmallPtrSet<const SDNode *, 32> Visited;
21051 SmallVector<const SDNode *, 16> Worklist;
21053 // Always fail if LLD and RLD are not independent. TheSelect is a
21054 // predecessor to all Nodes in question so we need not search past it.
21056 Visited.insert(TheSelect);
21057 Worklist.push_back(LLD);
21058 Worklist.push_back(RLD);
21060 if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
21061 SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21065 if (TheSelect->getOpcode() == ISD::SELECT) {
21066 // We cannot do this optimization if any pair of {RLD, LLD} is a
21067 // predecessor to {RLD, LLD, CondNode}. As we've already compared the
21068 // Loads, we only need to check if CondNode is a successor to one of the
21069 // loads. We can further avoid this if there's no use of their chain
21071 SDNode *CondNode = TheSelect->getOperand(0).getNode();
21072 Worklist.push_back(CondNode);
21074 if ((LLD->hasAnyUseOfValue(1) &&
21075 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21076 (RLD->hasAnyUseOfValue(1) &&
21077 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21080 Addr = DAG.getSelect(SDLoc(TheSelect),
21081 LLD->getBasePtr().getValueType(),
21082 TheSelect->getOperand(0), LLD->getBasePtr(),
21083 RLD->getBasePtr());
21084 } else { // Otherwise SELECT_CC
21085 // We cannot do this optimization if any pair of {RLD, LLD} is a
21086 // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21087 // the Loads, we only need to check if CondLHS/CondRHS is a successor to
21088 // one of the loads. We can further avoid this if there's no use of their
21091 SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21092 SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21093 Worklist.push_back(CondLHS);
21094 Worklist.push_back(CondRHS);
21096 if ((LLD->hasAnyUseOfValue(1) &&
21097 SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21098 (RLD->hasAnyUseOfValue(1) &&
21099 SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21102 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21103 LLD->getBasePtr().getValueType(),
21104 TheSelect->getOperand(0),
21105 TheSelect->getOperand(1),
21106 LLD->getBasePtr(), RLD->getBasePtr(),
21107 TheSelect->getOperand(4));
21111 // It is safe to replace the two loads if they have different alignments,
21112 // but the new load must be the minimum (most restrictive) alignment of the
21114 unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
21115 MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
21116 if (!RLD->isInvariant())
21117 MMOFlags &= ~MachineMemOperand::MOInvariant;
21118 if (!RLD->isDereferenceable())
21119 MMOFlags &= ~MachineMemOperand::MODereferenceable;
21120 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
21121 // FIXME: Discards pointer and AA info.
21122 Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
21123 LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
21126 // FIXME: Discards pointer and AA info.
21127 Load = DAG.getExtLoad(
21128 LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
21129 : LLD->getExtensionType(),
21130 SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
21131 MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
21134 // Users of the select now use the result of the load.
21135 CombineTo(TheSelect, Load);
21137 // Users of the old loads now use the new load's chain. We know the
21138 // old-load value is dead now.
21139 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
21140 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
21147 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
21149 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
21150 SDValue N1, SDValue N2, SDValue N3,
21151 ISD::CondCode CC) {
21152 // If this is a select where the false operand is zero and the compare is a
21153 // check of the sign bit, see if we can perform the "gzip trick":
21154 // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
21155 // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
21156 EVT XType = N0.getValueType();
21157 EVT AType = N2.getValueType();
21158 if (!isNullConstant(N3) || !XType.bitsGE(AType))
21161 // If the comparison is testing for a positive value, we have to invert
21162 // the sign bit mask, so only do that transform if the target has a bitwise
21163 // 'and not' instruction (the invert is free).
21164 if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
21165 // (X > -1) ? A : 0
21166 // (X > 0) ? X : 0 <-- This is canonical signed max.
21167 if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
21169 } else if (CC == ISD::SETLT) {
21171 // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
21172 if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
21178 // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
21180 EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
21181 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21182 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
21183 unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
21184 if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
21185 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21186 SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
21187 AddToWorklist(Shift.getNode());
21189 if (XType.bitsGT(AType)) {
21190 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21191 AddToWorklist(Shift.getNode());
21194 if (CC == ISD::SETGT)
21195 Shift = DAG.getNOT(DL, Shift, AType);
21197 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21201 unsigned ShCt = XType.getSizeInBits() - 1;
21202 if (TLI.shouldAvoidTransformToShift(XType, ShCt))
21205 SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21206 SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
21207 AddToWorklist(Shift.getNode());
21209 if (XType.bitsGT(AType)) {
21210 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21211 AddToWorklist(Shift.getNode());
21214 if (CC == ISD::SETGT)
21215 Shift = DAG.getNOT(DL, Shift, AType);
21217 return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21220 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
21221 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
21222 /// in it. This may be a win when the constant is not otherwise available
21223 /// because it replaces two constant pool loads with one.
21224 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
21225 const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
21226 ISD::CondCode CC) {
21227 if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
21230 // If we are before legalize types, we want the other legalization to happen
21231 // first (for example, to avoid messing with soft float).
21232 auto *TV = dyn_cast<ConstantFPSDNode>(N2);
21233 auto *FV = dyn_cast<ConstantFPSDNode>(N3);
21234 EVT VT = N2.getValueType();
21235 if (!TV || !FV || !TLI.isTypeLegal(VT))
21238 // If a constant can be materialized without loads, this does not make sense.
21239 if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
21240 TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
21241 TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
21244 // If both constants have multiple uses, then we won't need to do an extra
21245 // load. The values are likely around in registers for other users.
21246 if (!TV->hasOneUse() && !FV->hasOneUse())
21249 Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
21250 const_cast<ConstantFP*>(TV->getConstantFPValue()) };
21251 Type *FPTy = Elts[0]->getType();
21252 const DataLayout &TD = DAG.getDataLayout();
21254 // Create a ConstantArray of the two constants.
21255 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
21256 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
21257 TD.getPrefTypeAlign(FPTy));
21258 Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
21260 // Get offsets to the 0 and 1 elements of the array, so we can select between
21262 SDValue Zero = DAG.getIntPtrConstant(0, DL);
21263 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
21264 SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
21266 DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
21267 AddToWorklist(Cond.getNode());
21268 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
21269 AddToWorklist(CstOffset.getNode());
21270 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
21271 AddToWorklist(CPIdx.getNode());
21272 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
21273 MachinePointerInfo::getConstantPool(
21274 DAG.getMachineFunction()), Alignment);
21277 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
21278 /// where 'cond' is the comparison specified by CC.
21279 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
21280 SDValue N2, SDValue N3, ISD::CondCode CC,
21281 bool NotExtCompare) {
21282 // (x ? y : y) -> y.
21283 if (N2 == N3) return N2;
21285 EVT CmpOpVT = N0.getValueType();
21286 EVT CmpResVT = getSetCCResultType(CmpOpVT);
21287 EVT VT = N2.getValueType();
21288 auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
21289 auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21290 auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
21292 // Determine if the condition we're dealing with is constant.
21293 if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
21294 AddToWorklist(SCC.getNode());
21295 if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
21296 // fold select_cc true, x, y -> x
21297 // fold select_cc false, x, y -> y
21298 return !(SCCC->isNullValue()) ? N2 : N3;
21303 convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
21306 if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
21309 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
21310 // where y is has a single bit set.
21311 // A plaintext description would be, we can turn the SELECT_CC into an AND
21312 // when the condition can be materialized as an all-ones register. Any
21313 // single bit-test can be materialized as an all-ones register with
21314 // shift-left and shift-right-arith.
21315 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
21316 N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
21317 SDValue AndLHS = N0->getOperand(0);
21318 auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21319 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
21320 // Shift the tested bit over the sign bit.
21321 const APInt &AndMask = ConstAndRHS->getAPIntValue();
21322 unsigned ShCt = AndMask.getBitWidth() - 1;
21323 if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
21325 DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
21326 getShiftAmountTy(AndLHS.getValueType()));
21327 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
21329 // Now arithmetic right shift it all the way over, so the result is
21330 // either all-ones, or zero.
21332 DAG.getConstant(ShCt, SDLoc(Shl),
21333 getShiftAmountTy(Shl.getValueType()));
21334 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
21336 return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
21341 // fold select C, 16, 0 -> shl C, 4
21342 bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
21343 bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
21345 if ((Fold || Swap) &&
21346 TLI.getBooleanContents(CmpOpVT) ==
21347 TargetLowering::ZeroOrOneBooleanContent &&
21348 (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
21351 CC = ISD::getSetCCInverse(CC, CmpOpVT);
21352 std::swap(N2C, N3C);
21355 // If the caller doesn't want us to simplify this into a zext of a compare,
21357 if (NotExtCompare && N2C->isOne())
21361 // zext (setcc n0, n1)
21363 SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
21364 if (VT.bitsLT(SCC.getValueType()))
21365 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
21367 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21369 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
21370 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21373 AddToWorklist(SCC.getNode());
21374 AddToWorklist(Temp.getNode());
21379 unsigned ShCt = N2C->getAPIntValue().logBase2();
21380 if (TLI.shouldAvoidTransformToShift(VT, ShCt))
21383 // shl setcc result by log2 n2c
21384 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
21385 DAG.getConstant(ShCt, SDLoc(Temp),
21386 getShiftAmountTy(Temp.getValueType())));
21389 // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
21390 // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
21391 // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
21392 // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
21393 // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
21394 // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
21395 // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
21396 // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
21397 if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
21398 SDValue ValueOnZero = N2;
21399 SDValue Count = N3;
21400 // If the condition is NE instead of E, swap the operands.
21401 if (CC == ISD::SETNE)
21402 std::swap(ValueOnZero, Count);
21403 // Check if the value on zero is a constant equal to the bits in the type.
21404 if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
21405 if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
21406 // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
21407 // legal, combine to just cttz.
21408 if ((Count.getOpcode() == ISD::CTTZ ||
21409 Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
21410 N0 == Count.getOperand(0) &&
21411 (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
21412 return DAG.getNode(ISD::CTTZ, DL, VT, N0);
21413 // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
21414 // legal, combine to just ctlz.
21415 if ((Count.getOpcode() == ISD::CTLZ ||
21416 Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
21417 N0 == Count.getOperand(0) &&
21418 (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
21419 return DAG.getNode(ISD::CTLZ, DL, VT, N0);
21427 /// This is a stub for TargetLowering::SimplifySetCC.
21428 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
21429 ISD::CondCode Cond, const SDLoc &DL,
21430 bool foldBooleans) {
21431 TargetLowering::DAGCombinerInfo
21432 DagCombineInfo(DAG, Level, false, this);
21433 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
21436 /// Given an ISD::SDIV node expressing a divide by constant, return
21437 /// a DAG expression to select that will generate the same value by multiplying
21438 /// by a magic number.
21439 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21440 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
21441 // when optimising for minimum size, we don't want to expand a div to a mul
21443 if (DAG.getMachineFunction().getFunction().hasMinSize())
21446 SmallVector<SDNode *, 8> Built;
21447 if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
21448 for (SDNode *N : Built)
21456 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
21457 /// DAG expression that will generate the same value by right shifting.
21458 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
21459 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
21463 // Avoid division by zero.
21464 if (C->isNullValue())
21467 SmallVector<SDNode *, 8> Built;
21468 if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
21469 for (SDNode *N : Built)
21477 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
21478 /// expression that will generate the same value by multiplying by a magic
21480 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21481 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
21482 // when optimising for minimum size, we don't want to expand a div to a mul
21484 if (DAG.getMachineFunction().getFunction().hasMinSize())
21487 SmallVector<SDNode *, 8> Built;
21488 if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
21489 for (SDNode *N : Built)
21497 /// Determines the LogBase2 value for a non-null input value using the
21498 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
21499 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
21500 EVT VT = V.getValueType();
21501 unsigned EltBits = VT.getScalarSizeInBits();
21502 SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
21503 SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
21504 SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
21508 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21509 /// For the reciprocal, we need to find the zero of the function:
21510 /// F(X) = A X - 1 [which has a zero at X = 1/A]
21512 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
21513 /// does not require additional intermediate precision]
21514 /// For the last iteration, put numerator N into it to gain more precision:
21515 /// Result = N X_i + X_i (N - N A X_i)
21516 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
21517 SDNodeFlags Flags) {
21521 // TODO: Handle half and/or extended types?
21522 EVT VT = Op.getValueType();
21523 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21526 // If estimates are explicitly disabled for this function, we're done.
21527 MachineFunction &MF = DAG.getMachineFunction();
21528 int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
21529 if (Enabled == TLI.ReciprocalEstimate::Disabled)
21532 // Estimates may be explicitly enabled for this type with a custom number of
21533 // refinement steps.
21534 int Iterations = TLI.getDivRefinementSteps(VT, MF);
21535 if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
21536 AddToWorklist(Est.getNode());
21540 SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
21542 // Newton iterations: Est = Est + Est (N - Arg * Est)
21543 // If this is the last iteration, also multiply by the numerator.
21544 for (int i = 0; i < Iterations; ++i) {
21545 SDValue MulEst = Est;
21547 if (i == Iterations - 1) {
21548 MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
21549 AddToWorklist(MulEst.getNode());
21552 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
21553 AddToWorklist(NewEst.getNode());
21555 NewEst = DAG.getNode(ISD::FSUB, DL, VT,
21556 (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
21557 AddToWorklist(NewEst.getNode());
21559 NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21560 AddToWorklist(NewEst.getNode());
21562 Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
21563 AddToWorklist(Est.getNode());
21566 // If no iterations are available, multiply with N.
21567 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
21568 AddToWorklist(Est.getNode());
21577 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21578 /// For the reciprocal sqrt, we need to find the zero of the function:
21579 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21581 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
21582 /// As a result, we precompute A/2 prior to the iteration loop.
21583 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
21584 unsigned Iterations,
21585 SDNodeFlags Flags, bool Reciprocal) {
21586 EVT VT = Arg.getValueType();
21588 SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
21590 // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
21591 // this entire sequence requires only one FP constant.
21592 SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
21593 HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
21595 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
21596 for (unsigned i = 0; i < Iterations; ++i) {
21597 SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
21598 NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
21599 NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
21600 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
21603 // If non-reciprocal square root is requested, multiply the result by Arg.
21605 Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
21610 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
21611 /// For the reciprocal sqrt, we need to find the zero of the function:
21612 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
21614 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
21615 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
21616 unsigned Iterations,
21617 SDNodeFlags Flags, bool Reciprocal) {
21618 EVT VT = Arg.getValueType();
21620 SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
21621 SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
21623 // This routine must enter the loop below to work correctly
21624 // when (Reciprocal == false).
21625 assert(Iterations > 0);
21627 // Newton iterations for reciprocal square root:
21628 // E = (E * -0.5) * ((A * E) * E + -3.0)
21629 for (unsigned i = 0; i < Iterations; ++i) {
21630 SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
21631 SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
21632 SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
21634 // When calculating a square root at the last iteration build:
21635 // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
21636 // (notice a common subexpression)
21638 if (Reciprocal || (i + 1) < Iterations) {
21639 // RSQRT: LHS = (E * -0.5)
21640 LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
21642 // SQRT: LHS = (A * E) * -0.5
21643 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
21646 Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
21652 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
21653 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
21654 /// Op can be zero.
21655 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
21660 // TODO: Handle half and/or extended types?
21661 EVT VT = Op.getValueType();
21662 if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
21665 // If estimates are explicitly disabled for this function, we're done.
21666 MachineFunction &MF = DAG.getMachineFunction();
21667 int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
21668 if (Enabled == TLI.ReciprocalEstimate::Disabled)
21671 // Estimates may be explicitly enabled for this type with a custom number of
21672 // refinement steps.
21673 int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
21675 bool UseOneConstNR = false;
21677 TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
21679 AddToWorklist(Est.getNode());
21682 Est = UseOneConstNR
21683 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
21684 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
21687 // The estimate is now completely wrong if the input was exactly 0.0 or
21688 // possibly a denormal. Force the answer to 0.0 for those cases.
21690 EVT CCVT = getSetCCResultType(VT);
21691 ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
21692 DenormalMode DenormMode = DAG.getDenormalMode(VT);
21693 if (DenormMode.Input == DenormalMode::IEEE) {
21694 // This is specifically a check for the handling of denormal inputs,
21697 // fabs(X) < SmallestNormal ? 0.0 : Est
21698 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
21699 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
21700 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
21701 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21702 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
21703 SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
21704 Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
21706 // X == 0.0 ? 0.0 : Est
21707 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
21708 SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
21709 Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
21719 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21720 return buildSqrtEstimateImpl(Op, Flags, true);
21723 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
21724 return buildSqrtEstimateImpl(Op, Flags, false);
21727 /// Return true if there is any possibility that the two addresses overlap.
21728 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
21730 struct MemUseCharacteristics {
21735 Optional<int64_t> NumBytes;
21736 MachineMemOperand *MMO;
21739 auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
21740 if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
21741 int64_t Offset = 0;
21742 if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
21743 Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
21744 ? C->getSExtValue()
21745 : (LSN->getAddressingMode() == ISD::PRE_DEC)
21746 ? -1 * C->getSExtValue()
21749 MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
21750 return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
21751 Offset /*base offset*/,
21752 Optional<int64_t>(Size),
21753 LSN->getMemOperand()};
21755 if (const auto *LN = cast<LifetimeSDNode>(N))
21756 return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
21757 (LN->hasOffset()) ? LN->getOffset() : 0,
21758 (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
21759 : Optional<int64_t>(),
21760 (MachineMemOperand *)nullptr};
21762 return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
21763 (int64_t)0 /*offset*/,
21764 Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
21767 MemUseCharacteristics MUC0 = getCharacteristics(Op0),
21768 MUC1 = getCharacteristics(Op1);
21770 // If they are to the same address, then they must be aliases.
21771 if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
21772 MUC0.Offset == MUC1.Offset)
21775 // If they are both volatile then they cannot be reordered.
21776 if (MUC0.IsVolatile && MUC1.IsVolatile)
21779 // Be conservative about atomics for the moment
21780 // TODO: This is way overconservative for unordered atomics (see D66309)
21781 if (MUC0.IsAtomic && MUC1.IsAtomic)
21784 if (MUC0.MMO && MUC1.MMO) {
21785 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21786 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21790 // Try to prove that there is aliasing, or that there is no aliasing. Either
21791 // way, we can return now. If nothing can be proved, proceed with more tests.
21793 if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
21797 // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
21798 // either are not known.
21799 if (!MUC0.MMO || !MUC1.MMO)
21802 // If one operation reads from invariant memory, and the other may store, they
21803 // cannot alias. These should really be checking the equivalent of mayWrite,
21804 // but it only matters for memory nodes other than load /store.
21805 if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
21806 (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
21809 // If we know required SrcValue1 and SrcValue2 have relatively large
21810 // alignment compared to the size and offset of the access, we may be able
21811 // to prove they do not alias. This check is conservative for now to catch
21812 // cases created by splitting vector types, it only works when the offsets are
21813 // multiples of the size of the data.
21814 int64_t SrcValOffset0 = MUC0.MMO->getOffset();
21815 int64_t SrcValOffset1 = MUC1.MMO->getOffset();
21816 Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
21817 Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
21818 auto &Size0 = MUC0.NumBytes;
21819 auto &Size1 = MUC1.NumBytes;
21820 if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
21821 Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
21822 OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
21823 SrcValOffset1 % *Size1 == 0) {
21824 int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
21825 int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
21827 // There is no overlap between these relatively aligned accesses of
21828 // similar size. Return no alias.
21829 if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
21833 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
21835 : DAG.getSubtarget().useAA();
21837 if (CombinerAAOnlyFunc.getNumOccurrences() &&
21838 CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
21842 if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
21843 Size0.hasValue() && Size1.hasValue()) {
21844 // Use alias analysis information.
21845 int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
21846 int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
21847 int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
21848 AliasResult AAResult = AA->alias(
21849 MemoryLocation(MUC0.MMO->getValue(), Overlap0,
21850 UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
21851 MemoryLocation(MUC1.MMO->getValue(), Overlap1,
21852 UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
21853 if (AAResult == NoAlias)
21857 // Otherwise we have to assume they alias.
21861 /// Walk up chain skipping non-aliasing memory nodes,
21862 /// looking for aliasing nodes and adding them to the Aliases vector.
21863 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
21864 SmallVectorImpl<SDValue> &Aliases) {
21865 SmallVector<SDValue, 8> Chains; // List of chains to visit.
21866 SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
21868 // Get alias information for node.
21869 // TODO: relax aliasing for unordered atomics (see D66309)
21870 const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
21873 Chains.push_back(OriginalChain);
21874 unsigned Depth = 0;
21876 // Attempt to improve chain by a single step
21877 std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
21878 switch (C.getOpcode()) {
21879 case ISD::EntryToken:
21880 // No need to mark EntryToken.
21885 // Get alias information for C.
21886 // TODO: Relax aliasing for unordered atomics (see D66309)
21887 bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
21888 cast<LSBaseSDNode>(C.getNode())->isSimple();
21889 if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
21890 // Look further up the chain.
21891 C = C.getOperand(0);
21894 // Alias, so stop here.
21898 case ISD::CopyFromReg:
21899 // Always forward past past CopyFromReg.
21900 C = C.getOperand(0);
21903 case ISD::LIFETIME_START:
21904 case ISD::LIFETIME_END: {
21905 // We can forward past any lifetime start/end that can be proven not to
21906 // alias the memory access.
21907 if (!isAlias(N, C.getNode())) {
21908 // Look further up the chain.
21909 C = C.getOperand(0);
21919 // Look at each chain and determine if it is an alias. If so, add it to the
21920 // aliases list. If not, then continue up the chain looking for the next
21922 while (!Chains.empty()) {
21923 SDValue Chain = Chains.pop_back_val();
21925 // Don't bother if we've seen Chain before.
21926 if (!Visited.insert(Chain.getNode()).second)
21929 // For TokenFactor nodes, look at each operand and only continue up the
21930 // chain until we reach the depth limit.
21932 // FIXME: The depth check could be made to return the last non-aliasing
21933 // chain we found before we hit a tokenfactor rather than the original
21935 if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
21937 Aliases.push_back(OriginalChain);
21941 if (Chain.getOpcode() == ISD::TokenFactor) {
21942 // We have to check each of the operands of the token factor for "small"
21943 // token factors, so we queue them up. Adding the operands to the queue
21944 // (stack) in reverse order maintains the original order and increases the
21945 // likelihood that getNode will find a matching token factor (CSE.)
21946 if (Chain.getNumOperands() > 16) {
21947 Aliases.push_back(Chain);
21950 for (unsigned n = Chain.getNumOperands(); n;)
21951 Chains.push_back(Chain.getOperand(--n));
21956 if (ImproveChain(Chain)) {
21957 // Updated Chain Found, Consider new chain if one exists.
21958 if (Chain.getNode())
21959 Chains.push_back(Chain);
21963 // No Improved Chain Possible, treat as Alias.
21964 Aliases.push_back(Chain);
21968 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
21969 /// (aliasing node.)
21970 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
21971 if (OptLevel == CodeGenOpt::None)
21974 // Ops for replacing token factor.
21975 SmallVector<SDValue, 8> Aliases;
21977 // Accumulate all the aliases to this node.
21978 GatherAllAliases(N, OldChain, Aliases);
21980 // If no operands then chain to entry token.
21981 if (Aliases.size() == 0)
21982 return DAG.getEntryNode();
21984 // If a single operand then chain to it. We don't need to revisit it.
21985 if (Aliases.size() == 1)
21988 // Construct a custom tailored token factor.
21989 return DAG.getTokenFactor(SDLoc(N), Aliases);
21993 // TODO: Replace with with std::monostate when we move to C++17.
21994 struct UnitT { } Unit;
21995 bool operator==(const UnitT &, const UnitT &) { return true; }
21996 bool operator!=(const UnitT &, const UnitT &) { return false; }
21999 // This function tries to collect a bunch of potentially interesting
22000 // nodes to improve the chains of, all at once. This might seem
22001 // redundant, as this function gets called when visiting every store
22002 // node, so why not let the work be done on each store as it's visited?
22004 // I believe this is mainly important because mergeConsecutiveStores
22005 // is unable to deal with merging stores of different sizes, so unless
22006 // we improve the chains of all the potential candidates up-front
22007 // before running mergeConsecutiveStores, it might only see some of
22008 // the nodes that will eventually be candidates, and then not be able
22009 // to go from a partially-merged state to the desired final
22010 // fully-merged state.
22012 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22013 SmallVector<StoreSDNode *, 8> ChainedStores;
22014 StoreSDNode *STChain = St;
22015 // Intervals records which offsets from BaseIndex have been covered. In
22016 // the common case, every store writes to the immediately previous address
22017 // space and thus merged with the previous interval at insertion time.
22020 llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22024 // This holds the base pointer, index, and the offset in bytes from the base
22026 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22028 // We must have a base and an offset.
22029 if (!BasePtr.getBase().getNode())
22032 // Do not handle stores to undef base pointers.
22033 if (BasePtr.getBase().isUndef())
22036 // BaseIndexOffset assumes that offsets are fixed-size, which
22037 // is not valid for scalable vectors where the offsets are
22038 // scaled by `vscale`, so bail out early.
22039 if (St->getMemoryVT().isScalableVector())
22042 // Add ST's interval.
22043 Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22045 while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22046 // If the chain has more than one use, then we can't reorder the mem ops.
22047 if (!SDValue(Chain, 0)->hasOneUse())
22049 // TODO: Relax for unordered atomics (see D66309)
22050 if (!Chain->isSimple() || Chain->isIndexed())
22053 // Find the base pointer and offset for this memory node.
22054 const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22055 // Check that the base pointer is the same as the original one.
22057 if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22059 int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22060 // Make sure we don't overlap with other intervals by checking the ones to
22061 // the left or right before inserting.
22062 auto I = Intervals.find(Offset);
22063 // If there's a next interval, we should end before it.
22064 if (I != Intervals.end() && I.start() < (Offset + Length))
22066 // If there's a previous interval, we should start after it.
22067 if (I != Intervals.begin() && (--I).stop() <= Offset)
22069 Intervals.insert(Offset, Offset + Length, Unit);
22071 ChainedStores.push_back(Chain);
22075 // If we didn't find a chained store, exit.
22076 if (ChainedStores.size() == 0)
22079 // Improve all chained stores (St and ChainedStores members) starting from
22080 // where the store chain ended and return single TokenFactor.
22081 SDValue NewChain = STChain->getChain();
22082 SmallVector<SDValue, 8> TFOps;
22083 for (unsigned I = ChainedStores.size(); I;) {
22084 StoreSDNode *S = ChainedStores[--I];
22085 SDValue BetterChain = FindBetterChain(S, NewChain);
22086 S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22087 S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22088 TFOps.push_back(SDValue(S, 0));
22089 ChainedStores[I] = S;
22092 // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
22093 SDValue BetterChain = FindBetterChain(St, NewChain);
22095 if (St->isTruncatingStore())
22096 NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
22097 St->getBasePtr(), St->getMemoryVT(),
22098 St->getMemOperand());
22100 NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
22101 St->getBasePtr(), St->getMemOperand());
22103 TFOps.push_back(NewST);
22105 // If we improved every element of TFOps, then we've lost the dependence on
22106 // NewChain to successors of St and we need to add it back to TFOps. Do so at
22107 // the beginning to keep relative order consistent with FindBetterChains.
22108 auto hasImprovedChain = [&](SDValue ST) -> bool {
22109 return ST->getOperand(0) != NewChain;
22111 bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
22113 TFOps.insert(TFOps.begin(), NewChain);
22115 SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
22118 // Add TF and its operands to the worklist.
22119 AddToWorklist(TF.getNode());
22120 for (const SDValue &Op : TF->ops())
22121 AddToWorklist(Op.getNode());
22122 AddToWorklist(STChain);
22126 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
22127 if (OptLevel == CodeGenOpt::None)
22130 const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22132 // We must have a base and an offset.
22133 if (!BasePtr.getBase().getNode())
22136 // Do not handle stores to undef base pointers.
22137 if (BasePtr.getBase().isUndef())
22140 // Directly improve a chain of disjoint stores starting at St.
22141 if (parallelizeChainedStores(St))
22144 // Improve St's Chain..
22145 SDValue BetterChain = FindBetterChain(St, St->getChain());
22146 if (St->getChain() != BetterChain) {
22147 replaceStoreChain(St, BetterChain);
22153 /// This is the entry point for the file.
22154 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
22155 CodeGenOpt::Level OptLevel) {
22156 /// This is the main entry point to this class.
22157 DAGCombiner(*this, AA, OptLevel).Run(Level);