1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/ADT/SetVector.h"
10 #include "llvm/ADT/SmallBitVector.h"
11 #include "llvm/CodeGen/GlobalISel/Combiner.h"
12 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
17 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
18 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
20 #include "llvm/CodeGen/GlobalISel/Utils.h"
21 #include "llvm/CodeGen/LowLevelType.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineDominators.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/CodeGen/TargetLowering.h"
30 #include "llvm/Target/TargetMachine.h"
31 #include "llvm/CodeGen/TargetOpcodes.h"
32 #include "llvm/IR/DataLayout.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/DivisionByConstantInfo.h"
35 #include "llvm/Support/MathExtras.h"
38 #define DEBUG_TYPE "gi-combiner"
41 using namespace MIPatternMatch;
43 // Option to allow testing of the combiner while no targets know about indexed
46 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
47 cl::desc("Force all indexed operations to be "
48 "legal for the GlobalISel combiner"));
50 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
51 MachineIRBuilder &B, GISelKnownBits *KB,
52 MachineDominatorTree *MDT,
53 const LegalizerInfo *LI)
54 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
55 MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
56 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
60 const TargetLowering &CombinerHelper::getTargetLowering() const {
61 return *Builder.getMF().getSubtarget().getTargetLowering();
64 /// \returns The little endian in-memory byte position of byte \p I in a
65 /// \p ByteWidth bytes wide type.
67 /// E.g. Given a 4-byte type x, x[0] -> byte 0
68 static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
69 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
73 /// Determines the LogBase2 value for a non-null input value using the
74 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
75 static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
76 auto &MRI = *MIB.getMRI();
77 LLT Ty = MRI.getType(V);
78 auto Ctlz = MIB.buildCTLZ(Ty, V);
79 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
80 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
83 /// \returns The big endian in-memory byte position of byte \p I in a
84 /// \p ByteWidth bytes wide type.
86 /// E.g. Given a 4-byte type x, x[0] -> byte 3
87 static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
88 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
89 return ByteWidth - I - 1;
92 /// Given a map from byte offsets in memory to indices in a load/store,
93 /// determine if that map corresponds to a little or big endian byte pattern.
95 /// \param MemOffset2Idx maps memory offsets to address offsets.
96 /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
98 /// \returns true if the map corresponds to a big endian byte pattern, false
99 /// if it corresponds to a little endian byte pattern, and None otherwise.
101 /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
104 /// AddrOffset Little endian Big endian
109 static Optional<bool>
110 isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
112 // Need at least two byte positions to decide on endianness.
113 unsigned Width = MemOffset2Idx.size();
116 bool BigEndian = true, LittleEndian = true;
117 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
118 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
119 if (MemOffsetAndIdx == MemOffset2Idx.end())
121 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
122 assert(Idx >= 0 && "Expected non-negative byte offset?");
123 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
124 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
125 if (!BigEndian && !LittleEndian)
129 assert((BigEndian != LittleEndian) &&
130 "Pattern cannot be both big and little endian!");
134 bool CombinerHelper::isLegalOrBeforeLegalizer(
135 const LegalityQuery &Query) const {
136 return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
139 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
140 Register ToReg) const {
141 Observer.changingAllUsesOfReg(MRI, FromReg);
143 if (MRI.constrainRegAttrs(ToReg, FromReg))
144 MRI.replaceRegWith(FromReg, ToReg);
146 Builder.buildCopy(ToReg, FromReg);
148 Observer.finishedChangingAllUsesOfReg();
151 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
152 MachineOperand &FromRegOp,
153 Register ToReg) const {
154 assert(FromRegOp.getParent() && "Expected an operand in an MI");
155 Observer.changingInstr(*FromRegOp.getParent());
157 FromRegOp.setReg(ToReg);
159 Observer.changedInstr(*FromRegOp.getParent());
162 void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
163 unsigned ToOpcode) const {
164 Observer.changingInstr(FromMI);
166 FromMI.setDesc(Builder.getTII().get(ToOpcode));
168 Observer.changedInstr(FromMI);
171 const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
172 return RBI->getRegBank(Reg, MRI, *TRI);
175 void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
177 MRI.setRegBank(Reg, *RegBank);
180 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
181 if (matchCombineCopy(MI)) {
182 applyCombineCopy(MI);
187 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
188 if (MI.getOpcode() != TargetOpcode::COPY)
190 Register DstReg = MI.getOperand(0).getReg();
191 Register SrcReg = MI.getOperand(1).getReg();
192 return canReplaceReg(DstReg, SrcReg, MRI);
194 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
195 Register DstReg = MI.getOperand(0).getReg();
196 Register SrcReg = MI.getOperand(1).getReg();
197 MI.eraseFromParent();
198 replaceRegWith(MRI, DstReg, SrcReg);
201 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
202 bool IsUndef = false;
203 SmallVector<Register, 4> Ops;
204 if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
205 applyCombineConcatVectors(MI, IsUndef, Ops);
211 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
212 SmallVectorImpl<Register> &Ops) {
213 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
214 "Invalid instruction");
216 MachineInstr *Undef = nullptr;
218 // Walk over all the operands of concat vectors and check if they are
219 // build_vector themselves or undef.
220 // Then collect their operands in Ops.
221 for (const MachineOperand &MO : MI.uses()) {
222 Register Reg = MO.getReg();
223 MachineInstr *Def = MRI.getVRegDef(Reg);
224 assert(Def && "Operand not defined");
225 switch (Def->getOpcode()) {
226 case TargetOpcode::G_BUILD_VECTOR:
228 // Remember the operands of the build_vector to fold
229 // them into the yet-to-build flattened concat vectors.
230 for (const MachineOperand &BuildVecMO : Def->uses())
231 Ops.push_back(BuildVecMO.getReg());
233 case TargetOpcode::G_IMPLICIT_DEF: {
234 LLT OpType = MRI.getType(Reg);
235 // Keep one undef value for all the undef operands.
237 Builder.setInsertPt(*MI.getParent(), MI);
238 Undef = Builder.buildUndef(OpType.getScalarType());
240 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
241 OpType.getScalarType() &&
242 "All undefs should have the same type");
243 // Break the undef vector in as many scalar elements as needed
244 // for the flattening.
245 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
246 EltIdx != EltEnd; ++EltIdx)
247 Ops.push_back(Undef->getOperand(0).getReg());
256 void CombinerHelper::applyCombineConcatVectors(
257 MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
258 // We determined that the concat_vectors can be flatten.
259 // Generate the flattened build_vector.
260 Register DstReg = MI.getOperand(0).getReg();
261 Builder.setInsertPt(*MI.getParent(), MI);
262 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
264 // Note: IsUndef is sort of redundant. We could have determine it by
265 // checking that at all Ops are undef. Alternatively, we could have
266 // generate a build_vector of undefs and rely on another combine to
267 // clean that up. For now, given we already gather this information
268 // in tryCombineConcatVectors, just save compile time and issue the
271 Builder.buildUndef(NewDstReg);
273 Builder.buildBuildVector(NewDstReg, Ops);
274 MI.eraseFromParent();
275 replaceRegWith(MRI, DstReg, NewDstReg);
278 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
279 SmallVector<Register, 4> Ops;
280 if (matchCombineShuffleVector(MI, Ops)) {
281 applyCombineShuffleVector(MI, Ops);
287 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
288 SmallVectorImpl<Register> &Ops) {
289 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
290 "Invalid instruction kind");
291 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
292 Register Src1 = MI.getOperand(1).getReg();
293 LLT SrcType = MRI.getType(Src1);
294 // As bizarre as it may look, shuffle vector can actually produce
295 // scalar! This is because at the IR level a <1 x ty> shuffle
296 // vector is perfectly valid.
297 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
298 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
300 // If the resulting vector is smaller than the size of the source
301 // vectors being concatenated, we won't be able to replace the
302 // shuffle vector into a concat_vectors.
304 // Note: We may still be able to produce a concat_vectors fed by
305 // extract_vector_elt and so on. It is less clear that would
306 // be better though, so don't bother for now.
308 // If the destination is a scalar, the size of the sources doesn't
309 // matter. we will lower the shuffle to a plain copy. This will
310 // work only if the source and destination have the same size. But
311 // that's covered by the next condition.
313 // TODO: If the size between the source and destination don't match
314 // we could still emit an extract vector element in that case.
315 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
318 // Check that the shuffle mask can be broken evenly between the
319 // different sources.
320 if (DstNumElts % SrcNumElts != 0)
323 // Mask length is a multiple of the source vector length.
324 // Check if the shuffle is some kind of concatenation of the input
326 unsigned NumConcat = DstNumElts / SrcNumElts;
327 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
328 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
329 for (unsigned i = 0; i != DstNumElts; ++i) {
334 // Ensure the indices in each SrcType sized piece are sequential and that
335 // the same source is used for the whole piece.
336 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
337 (ConcatSrcs[i / SrcNumElts] >= 0 &&
338 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
340 // Remember which source this index came from.
341 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
344 // The shuffle is concatenating multiple vectors together.
345 // Collect the different operands for that.
347 Register Src2 = MI.getOperand(2).getReg();
348 for (auto Src : ConcatSrcs) {
351 Builder.setInsertPt(*MI.getParent(), MI);
352 UndefReg = Builder.buildUndef(SrcType).getReg(0);
354 Ops.push_back(UndefReg);
363 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
364 const ArrayRef<Register> Ops) {
365 Register DstReg = MI.getOperand(0).getReg();
366 Builder.setInsertPt(*MI.getParent(), MI);
367 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
370 Builder.buildCopy(NewDstReg, Ops[0]);
372 Builder.buildMerge(NewDstReg, Ops);
374 MI.eraseFromParent();
375 replaceRegWith(MRI, DstReg, NewDstReg);
380 /// Select a preference between two uses. CurrentUse is the current preference
381 /// while *ForCandidate is attributes of the candidate under consideration.
382 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
383 const LLT TyForCandidate,
384 unsigned OpcodeForCandidate,
385 MachineInstr *MIForCandidate) {
386 if (!CurrentUse.Ty.isValid()) {
387 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
388 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
389 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
393 // We permit the extend to hoist through basic blocks but this is only
394 // sensible if the target has extending loads. If you end up lowering back
395 // into a load and extend during the legalizer then the end result is
396 // hoisting the extend up to the load.
398 // Prefer defined extensions to undefined extensions as these are more
399 // likely to reduce the number of instructions.
400 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
401 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
403 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
404 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
405 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
407 // Prefer sign extensions to zero extensions as sign-extensions tend to be
409 if (CurrentUse.Ty == TyForCandidate) {
410 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
411 OpcodeForCandidate == TargetOpcode::G_ZEXT)
413 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
414 OpcodeForCandidate == TargetOpcode::G_SEXT)
415 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
418 // This is potentially target specific. We've chosen the largest type
419 // because G_TRUNC is usually free. One potential catch with this is that
420 // some targets have a reduced number of larger registers than smaller
421 // registers and this choice potentially increases the live-range for the
423 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
424 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
429 /// Find a suitable place to insert some instructions and insert them. This
430 /// function accounts for special cases like inserting before a PHI node.
431 /// The current strategy for inserting before PHI's is to duplicate the
432 /// instructions for each predecessor. However, while that's ok for G_TRUNC
433 /// on most targets since it generally requires no code, other targets/cases may
434 /// want to try harder to find a dominating block.
435 static void InsertInsnsWithoutSideEffectsBeforeUse(
436 MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
437 std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
438 MachineOperand &UseMO)>
440 MachineInstr &UseMI = *UseMO.getParent();
442 MachineBasicBlock *InsertBB = UseMI.getParent();
444 // If the use is a PHI then we want the predecessor block instead.
446 MachineOperand *PredBB = std::next(&UseMO);
447 InsertBB = PredBB->getMBB();
450 // If the block is the same block as the def then we want to insert just after
451 // the def instead of at the start of the block.
452 if (InsertBB == DefMI.getParent()) {
453 MachineBasicBlock::iterator InsertPt = &DefMI;
454 Inserter(InsertBB, std::next(InsertPt), UseMO);
458 // Otherwise we want the start of the BB
459 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
461 } // end anonymous namespace
463 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
464 PreferredTuple Preferred;
465 if (matchCombineExtendingLoads(MI, Preferred)) {
466 applyCombineExtendingLoads(MI, Preferred);
472 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
473 PreferredTuple &Preferred) {
474 // We match the loads and follow the uses to the extend instead of matching
475 // the extends and following the def to the load. This is because the load
476 // must remain in the same position for correctness (unless we also add code
477 // to find a safe place to sink it) whereas the extend is freely movable.
478 // It also prevents us from duplicating the load for the volatile case or just
480 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
484 Register LoadReg = LoadMI->getDstReg();
486 LLT LoadValueTy = MRI.getType(LoadReg);
487 if (!LoadValueTy.isScalar())
490 // Most architectures are going to legalize <s8 loads into at least a 1 byte
491 // load, and the MMOs can only describe memory accesses in multiples of bytes.
492 // If we try to perform extload combining on those, we can end up with
493 // %a(s8) = extload %ptr (load 1 byte from %ptr)
494 // ... which is an illegal extload instruction.
495 if (LoadValueTy.getSizeInBits() < 8)
498 // For non power-of-2 types, they will very likely be legalized into multiple
499 // loads. Don't bother trying to match them into extending loads.
500 if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
503 // Find the preferred type aside from the any-extends (unless it's the only
504 // one) and non-extending ops. We'll emit an extending load to that type and
505 // and emit a variant of (extend (trunc X)) for the others according to the
506 // relative type sizes. At the same time, pick an extend to use based on the
507 // extend involved in the chosen type.
508 unsigned PreferredOpcode =
510 ? TargetOpcode::G_ANYEXT
511 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
512 Preferred = {LLT(), PreferredOpcode, nullptr};
513 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
514 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
515 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
516 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
517 const auto &MMO = LoadMI->getMMO();
518 // For atomics, only form anyextending loads.
519 if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
521 // Check for legality.
523 LegalityQuery::MemDesc MMDesc(MMO);
524 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
525 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
526 if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
527 .Action != LegalizeActions::Legal)
530 Preferred = ChoosePreferredUse(Preferred,
531 MRI.getType(UseMI.getOperand(0).getReg()),
532 UseMI.getOpcode(), &UseMI);
536 // There were no extends
539 // It should be impossible to chose an extend without selecting a different
540 // type since by definition the result of an extend is larger.
541 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
543 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
547 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
548 PreferredTuple &Preferred) {
549 // Rewrite the load to the chosen extending load.
550 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
552 // Inserter to insert a truncate back to the original type at a given point
553 // with some basic CSE to limit truncate duplication to one per BB.
554 DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
555 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
556 MachineBasicBlock::iterator InsertBefore,
557 MachineOperand &UseMO) {
558 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
559 if (PreviouslyEmitted) {
560 Observer.changingInstr(*UseMO.getParent());
561 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
562 Observer.changedInstr(*UseMO.getParent());
566 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
567 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
568 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
569 EmittedInsns[InsertIntoBB] = NewMI;
570 replaceRegOpWith(MRI, UseMO, NewDstReg);
573 Observer.changingInstr(MI);
575 Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
576 ? TargetOpcode::G_SEXTLOAD
577 : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
578 ? TargetOpcode::G_ZEXTLOAD
579 : TargetOpcode::G_LOAD));
581 // Rewrite all the uses to fix up the types.
582 auto &LoadValue = MI.getOperand(0);
583 SmallVector<MachineOperand *, 4> Uses;
584 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
585 Uses.push_back(&UseMO);
587 for (auto *UseMO : Uses) {
588 MachineInstr *UseMI = UseMO->getParent();
590 // If the extend is compatible with the preferred extend then we should fix
591 // up the type and extend so that it uses the preferred use.
592 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
593 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
594 Register UseDstReg = UseMI->getOperand(0).getReg();
595 MachineOperand &UseSrcMO = UseMI->getOperand(1);
596 const LLT UseDstTy = MRI.getType(UseDstReg);
597 if (UseDstReg != ChosenDstReg) {
598 if (Preferred.Ty == UseDstTy) {
599 // If the use has the same type as the preferred use, then merge
600 // the vregs and erase the extend. For example:
601 // %1:_(s8) = G_LOAD ...
602 // %2:_(s32) = G_SEXT %1(s8)
603 // %3:_(s32) = G_ANYEXT %1(s8)
606 // %2:_(s32) = G_SEXTLOAD ...
608 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
609 Observer.erasingInstr(*UseMO->getParent());
610 UseMO->getParent()->eraseFromParent();
611 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
612 // If the preferred size is smaller, then keep the extend but extend
613 // from the result of the extending load. For example:
614 // %1:_(s8) = G_LOAD ...
615 // %2:_(s32) = G_SEXT %1(s8)
616 // %3:_(s64) = G_ANYEXT %1(s8)
619 // %2:_(s32) = G_SEXTLOAD ...
620 // %3:_(s64) = G_ANYEXT %2:_(s32)
622 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
624 // If the preferred size is large, then insert a truncate. For
626 // %1:_(s8) = G_LOAD ...
627 // %2:_(s64) = G_SEXT %1(s8)
628 // %3:_(s32) = G_ZEXT %1(s8)
631 // %2:_(s64) = G_SEXTLOAD ...
632 // %4:_(s8) = G_TRUNC %2:_(s32)
633 // %3:_(s64) = G_ZEXT %2:_(s8)
635 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
640 // The use is (one of) the uses of the preferred use we chose earlier.
641 // We're going to update the load to def this value later so just erase
643 Observer.erasingInstr(*UseMO->getParent());
644 UseMO->getParent()->eraseFromParent();
648 // The use isn't an extend. Truncate back to the type we originally loaded.
649 // This is free on many targets.
650 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
653 MI.getOperand(0).setReg(ChosenDstReg);
654 Observer.changedInstr(MI);
657 bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
658 BuildFnTy &MatchInfo) {
659 assert(MI.getOpcode() == TargetOpcode::G_AND);
661 // If we have the following code:
662 // %mask = G_CONSTANT 255
663 // %ld = G_LOAD %ptr, (load s16)
664 // %and = G_AND %ld, %mask
666 // Try to fold it into
667 // %ld = G_ZEXTLOAD %ptr, (load s8)
669 Register Dst = MI.getOperand(0).getReg();
670 if (MRI.getType(Dst).isVector())
674 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
678 APInt MaskVal = MaybeMask->Value;
680 if (!MaskVal.isMask())
683 Register SrcReg = MI.getOperand(1).getReg();
684 GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
685 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
689 Register LoadReg = LoadMI->getDstReg();
690 LLT LoadTy = MRI.getType(LoadReg);
691 Register PtrReg = LoadMI->getPointerReg();
692 uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
693 unsigned MaskSizeBits = MaskVal.countTrailingOnes();
695 // The mask may not be larger than the in-memory type, as it might cover sign
697 if (MaskSizeBits > LoadSizeBits)
700 // If the mask covers the whole destination register, there's nothing to
702 if (MaskSizeBits >= LoadTy.getSizeInBits())
705 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
706 // at least byte loads. Avoid creating such loads here
707 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
710 const MachineMemOperand &MMO = LoadMI->getMMO();
711 LegalityQuery::MemDesc MemDesc(MMO);
712 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
713 if (!isLegalOrBeforeLegalizer(
714 {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
717 MatchInfo = [=](MachineIRBuilder &B) {
718 B.setInstrAndDebugLoc(*LoadMI);
719 auto &MF = B.getMF();
720 auto PtrInfo = MMO.getPointerInfo();
721 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
722 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
727 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
728 const MachineInstr &UseMI) {
729 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
730 "shouldn't consider debug uses");
731 assert(DefMI.getParent() == UseMI.getParent());
732 if (&DefMI == &UseMI)
734 const MachineBasicBlock &MBB = *DefMI.getParent();
735 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
736 return &MI == &DefMI || &MI == &UseMI;
738 if (DefOrUse == MBB.end())
739 llvm_unreachable("Block must contain both DefMI and UseMI!");
740 return &*DefOrUse == &DefMI;
743 bool CombinerHelper::dominates(const MachineInstr &DefMI,
744 const MachineInstr &UseMI) {
745 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
746 "shouldn't consider debug uses");
748 return MDT->dominates(&DefMI, &UseMI);
749 else if (DefMI.getParent() != UseMI.getParent())
752 return isPredecessor(DefMI, UseMI);
755 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
756 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
757 Register SrcReg = MI.getOperand(1).getReg();
758 Register LoadUser = SrcReg;
760 if (MRI.getType(SrcReg).isVector())
764 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
767 uint64_t SizeInBits = MI.getOperand(2).getImm();
768 // If the source is a G_SEXTLOAD from the same bit width, then we don't
769 // need any extend at all, just a truncate.
770 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
771 // If truncating more than the original extended value, abort.
772 auto LoadSizeBits = LoadMI->getMemSizeInBits();
773 if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
775 if (LoadSizeBits == SizeInBits)
781 void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
782 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
783 Builder.setInstrAndDebugLoc(MI);
784 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
785 MI.eraseFromParent();
788 bool CombinerHelper::matchSextInRegOfLoad(
789 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
790 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
792 // Only supports scalars for now.
793 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
796 Register SrcReg = MI.getOperand(1).getReg();
797 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
798 if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
799 !LoadDef->isSimple())
802 // If the sign extend extends from a narrower width than the load's width,
803 // then we can narrow the load width when we combine to a G_SEXTLOAD.
804 // Avoid widening the load at all.
805 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
806 LoadDef->getMemSizeInBits());
808 // Don't generate G_SEXTLOADs with a < 1 byte width.
811 // Don't bother creating a non-power-2 sextload, it will likely be broken up
812 // anyway for most targets.
813 if (!isPowerOf2_32(NewSizeBits))
816 const MachineMemOperand &MMO = LoadDef->getMMO();
817 LegalityQuery::MemDesc MMDesc(MMO);
818 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
819 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
820 {MRI.getType(LoadDef->getDstReg()),
821 MRI.getType(LoadDef->getPointerReg())},
825 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
829 void CombinerHelper::applySextInRegOfLoad(
830 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
831 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
833 unsigned ScalarSizeBits;
834 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
835 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
837 // If we have the following:
838 // %ld = G_LOAD %ptr, (load 2)
839 // %ext = G_SEXT_INREG %ld, 8
841 // %ld = G_SEXTLOAD %ptr (load 1)
843 auto &MMO = LoadDef->getMMO();
844 Builder.setInstrAndDebugLoc(*LoadDef);
845 auto &MF = Builder.getMF();
846 auto PtrInfo = MMO.getPointerInfo();
847 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
848 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
849 LoadDef->getPointerReg(), *NewMMO);
850 MI.eraseFromParent();
853 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
854 Register &Base, Register &Offset) {
855 auto &MF = *MI.getParent()->getParent();
856 const auto &TLI = *MF.getSubtarget().getTargetLowering();
859 unsigned Opcode = MI.getOpcode();
860 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
861 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
864 Base = MI.getOperand(1).getReg();
865 MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
866 if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
869 LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
870 // FIXME: The following use traversal needs a bail out for patholigical cases.
871 for (auto &Use : MRI.use_nodbg_instructions(Base)) {
872 if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
875 Offset = Use.getOperand(2).getReg();
876 if (!ForceLegalIndexing &&
877 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
878 LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
883 // Make sure the offset calculation is before the potentially indexed op.
884 // FIXME: we really care about dependency here. The offset calculation might
886 MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
887 if (!OffsetDef || !dominates(*OffsetDef, MI)) {
888 LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
893 // FIXME: check whether all uses of Base are load/store with foldable
894 // addressing modes. If so, using the normal addr-modes is better than
895 // forming an indexed one.
897 bool MemOpDominatesAddrUses = true;
898 for (auto &PtrAddUse :
899 MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
900 if (!dominates(MI, PtrAddUse)) {
901 MemOpDominatesAddrUses = false;
906 if (!MemOpDominatesAddrUses) {
908 dbgs() << " Ignoring candidate as memop does not dominate uses: "
913 LLVM_DEBUG(dbgs() << " Found match: " << Use);
914 Addr = Use.getOperand(0).getReg();
921 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
922 Register &Base, Register &Offset) {
923 auto &MF = *MI.getParent()->getParent();
924 const auto &TLI = *MF.getSubtarget().getTargetLowering();
927 unsigned Opcode = MI.getOpcode();
928 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
929 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
932 Addr = MI.getOperand(1).getReg();
933 MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
934 if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
937 Base = AddrDef->getOperand(1).getReg();
938 Offset = AddrDef->getOperand(2).getReg();
940 LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
942 if (!ForceLegalIndexing &&
943 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
944 LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
948 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
949 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
950 LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
954 if (MI.getOpcode() == TargetOpcode::G_STORE) {
955 // Would require a copy.
956 if (Base == MI.getOperand(0).getReg()) {
957 LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
961 // We're expecting one use of Addr in MI, but it could also be the
962 // value stored, which isn't actually dominated by the instruction.
963 if (MI.getOperand(0).getReg() == Addr) {
964 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
969 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
970 // That might allow us to end base's liveness here by adjusting the constant.
972 for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
973 if (!dominates(MI, UseMI)) {
974 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
982 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
983 IndexedLoadStoreMatchInfo MatchInfo;
984 if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
985 applyCombineIndexedLoadStore(MI, MatchInfo);
991 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
992 unsigned Opcode = MI.getOpcode();
993 if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
994 Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
997 // For now, no targets actually support these opcodes so don't waste time
998 // running these unless we're forced to for testing.
999 if (!ForceLegalIndexing)
1002 MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
1004 if (!MatchInfo.IsPre &&
1005 !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
1012 void CombinerHelper::applyCombineIndexedLoadStore(
1013 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
1014 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1015 MachineIRBuilder MIRBuilder(MI);
1016 unsigned Opcode = MI.getOpcode();
1017 bool IsStore = Opcode == TargetOpcode::G_STORE;
1020 case TargetOpcode::G_LOAD:
1021 NewOpcode = TargetOpcode::G_INDEXED_LOAD;
1023 case TargetOpcode::G_SEXTLOAD:
1024 NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
1026 case TargetOpcode::G_ZEXTLOAD:
1027 NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
1029 case TargetOpcode::G_STORE:
1030 NewOpcode = TargetOpcode::G_INDEXED_STORE;
1033 llvm_unreachable("Unknown load/store opcode");
1036 auto MIB = MIRBuilder.buildInstr(NewOpcode);
1038 MIB.addDef(MatchInfo.Addr);
1039 MIB.addUse(MI.getOperand(0).getReg());
1041 MIB.addDef(MI.getOperand(0).getReg());
1042 MIB.addDef(MatchInfo.Addr);
1045 MIB.addUse(MatchInfo.Base);
1046 MIB.addUse(MatchInfo.Offset);
1047 MIB.addImm(MatchInfo.IsPre);
1048 MI.eraseFromParent();
1049 AddrDef.eraseFromParent();
1051 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1054 bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
1055 MachineInstr *&OtherMI) {
1056 unsigned Opcode = MI.getOpcode();
1057 bool IsDiv, IsSigned;
1061 llvm_unreachable("Unexpected opcode!");
1062 case TargetOpcode::G_SDIV:
1063 case TargetOpcode::G_UDIV: {
1065 IsSigned = Opcode == TargetOpcode::G_SDIV;
1068 case TargetOpcode::G_SREM:
1069 case TargetOpcode::G_UREM: {
1071 IsSigned = Opcode == TargetOpcode::G_SREM;
1076 Register Src1 = MI.getOperand(1).getReg();
1077 unsigned DivOpcode, RemOpcode, DivremOpcode;
1079 DivOpcode = TargetOpcode::G_SDIV;
1080 RemOpcode = TargetOpcode::G_SREM;
1081 DivremOpcode = TargetOpcode::G_SDIVREM;
1083 DivOpcode = TargetOpcode::G_UDIV;
1084 RemOpcode = TargetOpcode::G_UREM;
1085 DivremOpcode = TargetOpcode::G_UDIVREM;
1088 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1092 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1093 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1095 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1098 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1099 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1101 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1103 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1104 if (MI.getParent() == UseMI.getParent() &&
1105 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1106 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1107 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
1116 void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
1117 MachineInstr *&OtherMI) {
1118 unsigned Opcode = MI.getOpcode();
1119 assert(OtherMI && "OtherMI shouldn't be empty.");
1121 Register DestDivReg, DestRemReg;
1122 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1123 DestDivReg = MI.getOperand(0).getReg();
1124 DestRemReg = OtherMI->getOperand(0).getReg();
1126 DestDivReg = OtherMI->getOperand(0).getReg();
1127 DestRemReg = MI.getOperand(0).getReg();
1131 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1133 // Check which instruction is first in the block so we don't break def-use
1134 // deps by "moving" the instruction incorrectly.
1135 if (dominates(MI, *OtherMI))
1136 Builder.setInstrAndDebugLoc(MI);
1138 Builder.setInstrAndDebugLoc(*OtherMI);
1140 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1141 : TargetOpcode::G_UDIVREM,
1142 {DestDivReg, DestRemReg},
1143 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
1144 MI.eraseFromParent();
1145 OtherMI->eraseFromParent();
1148 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
1149 MachineInstr *&BrCond) {
1150 assert(MI.getOpcode() == TargetOpcode::G_BR);
1152 // Try to match the following:
1154 // G_BRCOND %c1, %bb2
1160 // The above pattern does not have a fall through to the successor bb2, always
1161 // resulting in a branch no matter which path is taken. Here we try to find
1162 // and replace that pattern with conditional branch to bb3 and otherwise
1163 // fallthrough to bb2. This is generally better for branch predictors.
1165 MachineBasicBlock *MBB = MI.getParent();
1166 MachineBasicBlock::iterator BrIt(MI);
1167 if (BrIt == MBB->begin())
1169 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1171 BrCond = &*std::prev(BrIt);
1172 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1175 // Check that the next block is the conditional branch target. Also make sure
1176 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1177 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1178 return BrCondTarget != MI.getOperand(0).getMBB() &&
1179 MBB->isLayoutSuccessor(BrCondTarget);
1182 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1183 MachineInstr *&BrCond) {
1184 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1185 Builder.setInstrAndDebugLoc(*BrCond);
1186 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1187 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1188 // this to i1 only since we might not know for sure what kind of
1189 // compare generated the condition value.
1190 auto True = Builder.buildConstant(
1191 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1192 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1194 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1195 Observer.changingInstr(MI);
1196 MI.getOperand(0).setMBB(FallthroughBB);
1197 Observer.changedInstr(MI);
1199 // Change the conditional branch to use the inverted condition and
1200 // new target block.
1201 Observer.changingInstr(*BrCond);
1202 BrCond->getOperand(0).setReg(Xor.getReg(0));
1203 BrCond->getOperand(1).setMBB(BrTarget);
1204 Observer.changedInstr(*BrCond);
1207 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1209 return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1210 Ty.getNumElements());
1211 return IntegerType::get(C, Ty.getSizeInBits());
1214 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
1215 MachineIRBuilder HelperBuilder(MI);
1216 GISelObserverWrapper DummyObserver;
1217 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1218 return Helper.lowerMemcpyInline(MI) ==
1219 LegalizerHelper::LegalizeResult::Legalized;
1222 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1223 MachineIRBuilder HelperBuilder(MI);
1224 GISelObserverWrapper DummyObserver;
1225 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1226 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1227 LegalizerHelper::LegalizeResult::Legalized;
1230 static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
1232 const MachineRegisterInfo &MRI) {
1233 const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
1237 APFloat V = MaybeCst->getValueAPF();
1240 llvm_unreachable("Unexpected opcode!");
1241 case TargetOpcode::G_FNEG: {
1245 case TargetOpcode::G_FABS: {
1249 case TargetOpcode::G_FPTRUNC:
1251 case TargetOpcode::G_FSQRT: {
1253 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1254 V = APFloat(sqrt(V.convertToDouble()));
1257 case TargetOpcode::G_FLOG2: {
1259 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1260 V = APFloat(log2(V.convertToDouble()));
1264 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1265 // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1266 // and `G_FLOG2` reach here.
1268 V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
1272 bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
1273 Optional<APFloat> &Cst) {
1274 Register DstReg = MI.getOperand(0).getReg();
1275 Register SrcReg = MI.getOperand(1).getReg();
1276 LLT DstTy = MRI.getType(DstReg);
1277 Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
1278 return Cst.hasValue();
1281 void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
1282 Optional<APFloat> &Cst) {
1283 assert(Cst.hasValue() && "Optional is unexpectedly empty!");
1284 Builder.setInstrAndDebugLoc(MI);
1285 MachineFunction &MF = Builder.getMF();
1286 auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
1287 Register DstReg = MI.getOperand(0).getReg();
1288 Builder.buildFConstant(DstReg, *FPVal);
1289 MI.eraseFromParent();
1292 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1293 PtrAddChain &MatchInfo) {
1294 // We're trying to match the following pattern:
1295 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1296 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1298 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1300 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1303 Register Add2 = MI.getOperand(1).getReg();
1304 Register Imm1 = MI.getOperand(2).getReg();
1305 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1309 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1310 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1313 Register Base = Add2Def->getOperand(1).getReg();
1314 Register Imm2 = Add2Def->getOperand(2).getReg();
1315 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1319 // Check if the new combined immediate forms an illegal addressing mode.
1320 // Do not combine if it was legal before but would get illegal.
1321 // To do so, we need to find a load/store user of the pointer to get
1323 Type *AccessTy = nullptr;
1324 auto &MF = *MI.getMF();
1325 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1326 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1327 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1328 MF.getFunction().getContext());
1332 TargetLoweringBase::AddrMode AMNew;
1333 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1334 AMNew.BaseOffs = CombinedImm.getSExtValue();
1336 AMNew.HasBaseReg = true;
1337 TargetLoweringBase::AddrMode AMOld;
1338 AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
1339 AMOld.HasBaseReg = true;
1340 unsigned AS = MRI.getType(Add2).getAddressSpace();
1341 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1342 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1343 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1347 // Pass the combined immediate to the apply function.
1348 MatchInfo.Imm = AMNew.BaseOffs;
1349 MatchInfo.Base = Base;
1350 MatchInfo.Bank = getRegBank(Imm2);
1354 void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1355 PtrAddChain &MatchInfo) {
1356 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1357 MachineIRBuilder MIB(MI);
1358 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1359 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1360 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1361 Observer.changingInstr(MI);
1362 MI.getOperand(1).setReg(MatchInfo.Base);
1363 MI.getOperand(2).setReg(NewOffset.getReg(0));
1364 Observer.changedInstr(MI);
1367 bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
1368 RegisterImmPair &MatchInfo) {
1369 // We're trying to match the following pattern with any of
1370 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1371 // %t1 = SHIFT %base, G_CONSTANT imm1
1372 // %root = SHIFT %t1, G_CONSTANT imm2
1374 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1376 unsigned Opcode = MI.getOpcode();
1377 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1378 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1379 Opcode == TargetOpcode::G_USHLSAT) &&
1380 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1382 Register Shl2 = MI.getOperand(1).getReg();
1383 Register Imm1 = MI.getOperand(2).getReg();
1384 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1388 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1389 if (Shl2Def->getOpcode() != Opcode)
1392 Register Base = Shl2Def->getOperand(1).getReg();
1393 Register Imm2 = Shl2Def->getOperand(2).getReg();
1394 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1398 // Pass the combined immediate to the apply function.
1400 (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
1401 MatchInfo.Reg = Base;
1403 // There is no simple replacement for a saturating unsigned left shift that
1404 // exceeds the scalar size.
1405 if (Opcode == TargetOpcode::G_USHLSAT &&
1406 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1412 void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
1413 RegisterImmPair &MatchInfo) {
1414 unsigned Opcode = MI.getOpcode();
1415 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1416 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1417 Opcode == TargetOpcode::G_USHLSAT) &&
1418 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1420 Builder.setInstrAndDebugLoc(MI);
1421 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1422 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1423 auto Imm = MatchInfo.Imm;
1425 if (Imm >= ScalarSizeInBits) {
1426 // Any logical shift that exceeds scalar size will produce zero.
1427 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1428 Builder.buildConstant(MI.getOperand(0), 0);
1429 MI.eraseFromParent();
1432 // Arithmetic shift and saturating signed left shift have no effect beyond
1434 Imm = ScalarSizeInBits - 1;
1437 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1438 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1439 Observer.changingInstr(MI);
1440 MI.getOperand(1).setReg(MatchInfo.Reg);
1441 MI.getOperand(2).setReg(NewImm);
1442 Observer.changedInstr(MI);
1445 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
1446 ShiftOfShiftedLogic &MatchInfo) {
1447 // We're trying to match the following pattern with any of
1448 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1449 // with any of G_AND/G_OR/G_XOR logic instructions.
1450 // %t1 = SHIFT %X, G_CONSTANT C0
1451 // %t2 = LOGIC %t1, %Y
1452 // %root = SHIFT %t2, G_CONSTANT C1
1454 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1455 // %t4 = SHIFT %Y, G_CONSTANT C1
1456 // %root = LOGIC %t3, %t4
1457 unsigned ShiftOpcode = MI.getOpcode();
1458 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1459 ShiftOpcode == TargetOpcode::G_ASHR ||
1460 ShiftOpcode == TargetOpcode::G_LSHR ||
1461 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1462 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1463 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1465 // Match a one-use bitwise logic op.
1466 Register LogicDest = MI.getOperand(1).getReg();
1467 if (!MRI.hasOneNonDBGUse(LogicDest))
1470 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1471 unsigned LogicOpcode = LogicMI->getOpcode();
1472 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1473 LogicOpcode != TargetOpcode::G_XOR)
1476 // Find a matching one-use shift by constant.
1477 const Register C1 = MI.getOperand(2).getReg();
1478 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1482 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1484 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1485 // Shift should match previous one and should be a one-use.
1486 if (MI->getOpcode() != ShiftOpcode ||
1487 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1490 // Must be a constant.
1492 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1496 ShiftVal = MaybeImmVal->Value.getSExtValue();
1500 // Logic ops are commutative, so check each operand for a match.
1501 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1502 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1503 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1504 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1507 if (matchFirstShift(LogicMIOp1, C0Val)) {
1508 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1509 MatchInfo.Shift2 = LogicMIOp1;
1510 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1511 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1512 MatchInfo.Shift2 = LogicMIOp2;
1516 MatchInfo.ValSum = C0Val + C1Val;
1518 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1519 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1522 MatchInfo.Logic = LogicMI;
1526 void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
1527 ShiftOfShiftedLogic &MatchInfo) {
1528 unsigned Opcode = MI.getOpcode();
1529 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1530 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1531 Opcode == TargetOpcode::G_SSHLSAT) &&
1532 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1534 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1535 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1536 Builder.setInstrAndDebugLoc(MI);
1538 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1540 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1542 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1544 Register Shift2Const = MI.getOperand(2).getReg();
1545 Register Shift2 = Builder
1546 .buildInstr(Opcode, {DestType},
1547 {MatchInfo.LogicNonShiftReg, Shift2Const})
1550 Register Dest = MI.getOperand(0).getReg();
1551 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1553 // These were one use so it's safe to remove them.
1554 MatchInfo.Shift2->eraseFromParent();
1555 MatchInfo.Logic->eraseFromParent();
1557 MI.eraseFromParent();
1560 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1561 unsigned &ShiftVal) {
1562 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1564 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1568 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1569 return (static_cast<int32_t>(ShiftVal) != -1);
1572 void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1573 unsigned &ShiftVal) {
1574 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1575 MachineIRBuilder MIB(MI);
1576 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1577 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1578 Observer.changingInstr(MI);
1579 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1580 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1581 Observer.changedInstr(MI);
1584 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1585 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
1586 RegisterImmPair &MatchData) {
1587 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1589 Register LHS = MI.getOperand(1).getReg();
1592 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1593 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1594 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1597 // TODO: Should handle vector splat.
1598 Register RHS = MI.getOperand(2).getReg();
1599 auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1600 if (!MaybeShiftAmtVal)
1604 LLT SrcTy = MRI.getType(ExtSrc);
1606 // We only really care about the legality with the shifted value. We can
1607 // pick any type the constant shift amount, so ask the target what to
1608 // use. Otherwise we would have to guess and hope it is reported as legal.
1609 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1610 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1614 int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
1615 MatchData.Reg = ExtSrc;
1616 MatchData.Imm = ShiftAmt;
1618 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
1619 return MinLeadingZeros >= ShiftAmt;
1622 void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
1623 const RegisterImmPair &MatchData) {
1624 Register ExtSrcReg = MatchData.Reg;
1625 int64_t ShiftAmtVal = MatchData.Imm;
1627 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1628 Builder.setInstrAndDebugLoc(MI);
1629 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1631 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1632 Builder.buildZExt(MI.getOperand(0), NarrowShift);
1633 MI.eraseFromParent();
1636 bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI,
1637 Register &MatchInfo) {
1638 GMerge &Merge = cast<GMerge>(MI);
1639 SmallVector<Register, 16> MergedValues;
1640 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
1641 MergedValues.emplace_back(Merge.getSourceReg(I));
1643 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
1644 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
1647 for (unsigned I = 0; I < MergedValues.size(); ++I)
1648 if (MergedValues[I] != Unmerge->getReg(I))
1651 MatchInfo = Unmerge->getSourceReg();
1655 static Register peekThroughBitcast(Register Reg,
1656 const MachineRegisterInfo &MRI) {
1657 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
1663 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
1664 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
1665 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1666 "Expected an unmerge");
1667 auto &Unmerge = cast<GUnmerge>(MI);
1668 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
1670 auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI);
1674 // Check the source type of the merge.
1675 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
1676 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
1677 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
1678 if (SrcMergeTy != Dst0Ty && !SameSize)
1680 // They are the same now (modulo a bitcast).
1681 // We can collect all the src registers.
1682 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
1683 Operands.push_back(SrcInstr->getSourceReg(Idx));
1687 void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
1688 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
1689 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1690 "Expected an unmerge");
1691 assert((MI.getNumOperands() - 1 == Operands.size()) &&
1692 "Not enough operands to replace all defs");
1693 unsigned NumElems = MI.getNumOperands() - 1;
1695 LLT SrcTy = MRI.getType(Operands[0]);
1696 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1697 bool CanReuseInputDirectly = DstTy == SrcTy;
1698 Builder.setInstrAndDebugLoc(MI);
1699 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
1700 Register DstReg = MI.getOperand(Idx).getReg();
1701 Register SrcReg = Operands[Idx];
1702 if (CanReuseInputDirectly)
1703 replaceRegWith(MRI, DstReg, SrcReg);
1705 Builder.buildCast(DstReg, SrcReg);
1707 MI.eraseFromParent();
1710 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
1711 SmallVectorImpl<APInt> &Csts) {
1712 unsigned SrcIdx = MI.getNumOperands() - 1;
1713 Register SrcReg = MI.getOperand(SrcIdx).getReg();
1714 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
1715 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
1716 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
1718 // Break down the big constant in smaller ones.
1719 const MachineOperand &CstVal = SrcInstr->getOperand(1);
1720 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
1721 ? CstVal.getCImm()->getValue()
1722 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
1724 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
1725 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
1726 // Unmerge a constant.
1727 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
1728 Csts.emplace_back(Val.trunc(ShiftAmt));
1729 Val = Val.lshr(ShiftAmt);
1735 void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
1736 SmallVectorImpl<APInt> &Csts) {
1737 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1738 "Expected an unmerge");
1739 assert((MI.getNumOperands() - 1 == Csts.size()) &&
1740 "Not enough operands to replace all defs");
1741 unsigned NumElems = MI.getNumOperands() - 1;
1742 Builder.setInstrAndDebugLoc(MI);
1743 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
1744 Register DstReg = MI.getOperand(Idx).getReg();
1745 Builder.buildConstant(DstReg, Csts[Idx]);
1748 MI.eraseFromParent();
1751 bool CombinerHelper::matchCombineUnmergeUndef(
1752 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
1753 unsigned SrcIdx = MI.getNumOperands() - 1;
1754 Register SrcReg = MI.getOperand(SrcIdx).getReg();
1755 MatchInfo = [&MI](MachineIRBuilder &B) {
1756 unsigned NumElems = MI.getNumOperands() - 1;
1757 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
1758 Register DstReg = MI.getOperand(Idx).getReg();
1759 B.buildUndef(DstReg);
1762 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
1765 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
1766 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1767 "Expected an unmerge");
1768 // Check that all the lanes are dead except the first one.
1769 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
1770 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
1776 void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
1777 Builder.setInstrAndDebugLoc(MI);
1778 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
1779 // Truncating a vector is going to truncate every single lane,
1780 // whereas we want the full lowbits.
1781 // Do the operation on a scalar instead.
1782 LLT SrcTy = MRI.getType(SrcReg);
1783 if (SrcTy.isVector())
1785 Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
1787 Register Dst0Reg = MI.getOperand(0).getReg();
1788 LLT Dst0Ty = MRI.getType(Dst0Reg);
1789 if (Dst0Ty.isVector()) {
1790 auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
1791 Builder.buildCast(Dst0Reg, MIB);
1793 Builder.buildTrunc(Dst0Reg, SrcReg);
1794 MI.eraseFromParent();
1797 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
1798 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1799 "Expected an unmerge");
1800 Register Dst0Reg = MI.getOperand(0).getReg();
1801 LLT Dst0Ty = MRI.getType(Dst0Reg);
1802 // G_ZEXT on vector applies to each lane, so it will
1803 // affect all destinations. Therefore we won't be able
1804 // to simplify the unmerge to just the first definition.
1805 if (Dst0Ty.isVector())
1807 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
1808 LLT SrcTy = MRI.getType(SrcReg);
1809 if (SrcTy.isVector())
1812 Register ZExtSrcReg;
1813 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
1816 // Finally we can replace the first definition with
1817 // a zext of the source if the definition is big enough to hold
1818 // all of ZExtSrc bits.
1819 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
1820 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
1823 void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
1824 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1825 "Expected an unmerge");
1827 Register Dst0Reg = MI.getOperand(0).getReg();
1829 MachineInstr *ZExtInstr =
1830 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
1831 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
1832 "Expecting a G_ZEXT");
1834 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
1835 LLT Dst0Ty = MRI.getType(Dst0Reg);
1836 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
1838 Builder.setInstrAndDebugLoc(MI);
1840 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
1841 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
1843 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
1844 "ZExt src doesn't fit in destination");
1845 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
1849 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
1851 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
1852 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
1854 MI.eraseFromParent();
1857 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1858 unsigned TargetShiftSize,
1859 unsigned &ShiftVal) {
1860 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1861 MI.getOpcode() == TargetOpcode::G_LSHR ||
1862 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1864 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1865 if (Ty.isVector()) // TODO:
1868 // Don't narrow further than the requested size.
1869 unsigned Size = Ty.getSizeInBits();
1870 if (Size <= TargetShiftSize)
1874 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1878 ShiftVal = MaybeImmVal->Value.getSExtValue();
1879 return ShiftVal >= Size / 2 && ShiftVal < Size;
1882 void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1883 const unsigned &ShiftVal) {
1884 Register DstReg = MI.getOperand(0).getReg();
1885 Register SrcReg = MI.getOperand(1).getReg();
1886 LLT Ty = MRI.getType(SrcReg);
1887 unsigned Size = Ty.getSizeInBits();
1888 unsigned HalfSize = Size / 2;
1889 assert(ShiftVal >= HalfSize);
1891 LLT HalfTy = LLT::scalar(HalfSize);
1893 Builder.setInstr(MI);
1894 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1895 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1897 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1898 Register Narrowed = Unmerge.getReg(1);
1900 // dst = G_LSHR s64:x, C for C >= 32
1902 // lo, hi = G_UNMERGE_VALUES x
1903 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1905 if (NarrowShiftAmt != 0) {
1906 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1907 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1910 auto Zero = Builder.buildConstant(HalfTy, 0);
1911 Builder.buildMerge(DstReg, { Narrowed, Zero });
1912 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1913 Register Narrowed = Unmerge.getReg(0);
1914 // dst = G_SHL s64:x, C for C >= 32
1916 // lo, hi = G_UNMERGE_VALUES x
1917 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1918 if (NarrowShiftAmt != 0) {
1919 Narrowed = Builder.buildShl(HalfTy, Narrowed,
1920 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1923 auto Zero = Builder.buildConstant(HalfTy, 0);
1924 Builder.buildMerge(DstReg, { Zero, Narrowed });
1926 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1927 auto Hi = Builder.buildAShr(
1928 HalfTy, Unmerge.getReg(1),
1929 Builder.buildConstant(HalfTy, HalfSize - 1));
1931 if (ShiftVal == HalfSize) {
1932 // (G_ASHR i64:x, 32) ->
1933 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1934 Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1935 } else if (ShiftVal == Size - 1) {
1936 // Don't need a second shift.
1937 // (G_ASHR i64:x, 63) ->
1938 // %narrowed = (G_ASHR hi_32(x), 31)
1939 // G_MERGE_VALUES %narrowed, %narrowed
1940 Builder.buildMerge(DstReg, { Hi, Hi });
1942 auto Lo = Builder.buildAShr(
1943 HalfTy, Unmerge.getReg(1),
1944 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1946 // (G_ASHR i64:x, C) ->, for C >= 32
1947 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1948 Builder.buildMerge(DstReg, { Lo, Hi });
1952 MI.eraseFromParent();
1955 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1956 unsigned TargetShiftAmount) {
1958 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1959 applyCombineShiftToUnmerge(MI, ShiftAmt);
1966 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1967 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1968 Register DstReg = MI.getOperand(0).getReg();
1969 LLT DstTy = MRI.getType(DstReg);
1970 Register SrcReg = MI.getOperand(1).getReg();
1971 return mi_match(SrcReg, MRI,
1972 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
1975 void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1976 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1977 Register DstReg = MI.getOperand(0).getReg();
1978 Builder.setInstr(MI);
1979 Builder.buildCopy(DstReg, Reg);
1980 MI.eraseFromParent();
1983 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1984 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1985 Register SrcReg = MI.getOperand(1).getReg();
1986 return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
1989 void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1990 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1991 Register DstReg = MI.getOperand(0).getReg();
1992 Builder.setInstr(MI);
1993 Builder.buildZExtOrTrunc(DstReg, Reg);
1994 MI.eraseFromParent();
1997 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
1998 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
1999 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2000 Register LHS = MI.getOperand(1).getReg();
2001 Register RHS = MI.getOperand(2).getReg();
2002 LLT IntTy = MRI.getType(LHS);
2004 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2006 PtrReg.second = false;
2007 for (Register SrcReg : {LHS, RHS}) {
2008 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2009 // Don't handle cases where the integer is implicitly converted to the
2011 LLT PtrTy = MRI.getType(PtrReg.first);
2012 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2016 PtrReg.second = true;
2022 void CombinerHelper::applyCombineAddP2IToPtrAdd(
2023 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2024 Register Dst = MI.getOperand(0).getReg();
2025 Register LHS = MI.getOperand(1).getReg();
2026 Register RHS = MI.getOperand(2).getReg();
2028 const bool DoCommute = PtrReg.second;
2030 std::swap(LHS, RHS);
2033 LLT PtrTy = MRI.getType(LHS);
2035 Builder.setInstrAndDebugLoc(MI);
2036 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2037 Builder.buildPtrToInt(Dst, PtrAdd);
2038 MI.eraseFromParent();
2041 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
2043 auto &PtrAdd = cast<GPtrAdd>(MI);
2044 Register LHS = PtrAdd.getBaseReg();
2045 Register RHS = PtrAdd.getOffsetReg();
2046 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2048 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2050 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2051 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2052 // G_INTTOPTR uses zero-extension
2053 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2054 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2062 void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
2064 auto &PtrAdd = cast<GPtrAdd>(MI);
2065 Register Dst = PtrAdd.getReg(0);
2067 Builder.setInstrAndDebugLoc(MI);
2068 Builder.buildConstant(Dst, NewCst);
2069 PtrAdd.eraseFromParent();
2072 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2073 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2074 Register DstReg = MI.getOperand(0).getReg();
2075 Register SrcReg = MI.getOperand(1).getReg();
2076 LLT DstTy = MRI.getType(DstReg);
2077 return mi_match(SrcReg, MRI,
2078 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2081 bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
2082 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2083 Register DstReg = MI.getOperand(0).getReg();
2084 Register SrcReg = MI.getOperand(1).getReg();
2085 LLT DstTy = MRI.getType(DstReg);
2086 if (mi_match(SrcReg, MRI,
2087 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2088 unsigned DstSize = DstTy.getScalarSizeInBits();
2089 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2090 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2095 bool CombinerHelper::matchCombineExtOfExt(
2096 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2097 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2098 MI.getOpcode() == TargetOpcode::G_SEXT ||
2099 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2100 "Expected a G_[ASZ]EXT");
2101 Register SrcReg = MI.getOperand(1).getReg();
2102 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2103 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2104 unsigned Opc = MI.getOpcode();
2105 unsigned SrcOpc = SrcMI->getOpcode();
2106 if (Opc == SrcOpc ||
2107 (Opc == TargetOpcode::G_ANYEXT &&
2108 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2109 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2110 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2116 void CombinerHelper::applyCombineExtOfExt(
2117 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2118 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2119 MI.getOpcode() == TargetOpcode::G_SEXT ||
2120 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2121 "Expected a G_[ASZ]EXT");
2123 Register Reg = std::get<0>(MatchInfo);
2124 unsigned SrcExtOp = std::get<1>(MatchInfo);
2126 // Combine exts with the same opcode.
2127 if (MI.getOpcode() == SrcExtOp) {
2128 Observer.changingInstr(MI);
2129 MI.getOperand(1).setReg(Reg);
2130 Observer.changedInstr(MI);
2135 // - anyext([sz]ext x) to [sz]ext x
2136 // - sext(zext x) to zext x
2137 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2138 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2139 SrcExtOp == TargetOpcode::G_ZEXT)) {
2140 Register DstReg = MI.getOperand(0).getReg();
2141 Builder.setInstrAndDebugLoc(MI);
2142 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2143 MI.eraseFromParent();
2147 void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
2148 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2149 Register DstReg = MI.getOperand(0).getReg();
2150 Register SrcReg = MI.getOperand(1).getReg();
2151 LLT DstTy = MRI.getType(DstReg);
2153 Builder.setInstrAndDebugLoc(MI);
2154 Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
2156 MI.eraseFromParent();
2159 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
2160 assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
2161 Register SrcReg = MI.getOperand(1).getReg();
2162 return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
2165 bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2166 assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2167 Src = MI.getOperand(1).getReg();
2169 return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
2172 bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
2173 BuildFnTy &MatchInfo) {
2174 assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2175 Register Src = MI.getOperand(1).getReg();
2178 if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
2181 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2182 Observer.changingInstr(MI);
2183 MI.getOperand(1).setReg(NegSrc);
2184 Observer.changedInstr(MI);
2189 bool CombinerHelper::matchCombineTruncOfExt(
2190 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2191 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2192 Register SrcReg = MI.getOperand(1).getReg();
2193 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2194 unsigned SrcOpc = SrcMI->getOpcode();
2195 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2196 SrcOpc == TargetOpcode::G_ZEXT) {
2197 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2203 void CombinerHelper::applyCombineTruncOfExt(
2204 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2205 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2206 Register SrcReg = MatchInfo.first;
2207 unsigned SrcExtOp = MatchInfo.second;
2208 Register DstReg = MI.getOperand(0).getReg();
2209 LLT SrcTy = MRI.getType(SrcReg);
2210 LLT DstTy = MRI.getType(DstReg);
2211 if (SrcTy == DstTy) {
2212 MI.eraseFromParent();
2213 replaceRegWith(MRI, DstReg, SrcReg);
2216 Builder.setInstrAndDebugLoc(MI);
2217 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2218 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2220 Builder.buildTrunc(DstReg, SrcReg);
2221 MI.eraseFromParent();
2224 bool CombinerHelper::matchCombineTruncOfShl(
2225 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2226 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2227 Register DstReg = MI.getOperand(0).getReg();
2228 Register SrcReg = MI.getOperand(1).getReg();
2229 LLT DstTy = MRI.getType(DstReg);
2233 if (MRI.hasOneNonDBGUse(SrcReg) &&
2234 mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
2235 isLegalOrBeforeLegalizer(
2236 {TargetOpcode::G_SHL,
2237 {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
2238 KnownBits Known = KB->getKnownBits(ShiftAmt);
2239 unsigned Size = DstTy.getSizeInBits();
2240 if (Known.countMaxActiveBits() <= Log2_32(Size)) {
2241 MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
2248 void CombinerHelper::applyCombineTruncOfShl(
2249 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2250 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2251 Register DstReg = MI.getOperand(0).getReg();
2252 Register SrcReg = MI.getOperand(1).getReg();
2253 LLT DstTy = MRI.getType(DstReg);
2254 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2256 Register ShiftSrc = MatchInfo.first;
2257 Register ShiftAmt = MatchInfo.second;
2258 Builder.setInstrAndDebugLoc(MI);
2259 auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
2260 Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
2261 MI.eraseFromParent();
2264 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
2265 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2266 return MO.isReg() &&
2267 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2271 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
2272 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2273 return !MO.isReg() ||
2274 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2278 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
2279 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2280 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2281 return all_of(Mask, [](int Elt) { return Elt < 0; });
2284 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
2285 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2286 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2290 bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
2291 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2292 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2296 bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
2297 GSelect &SelMI = cast<GSelect>(MI);
2299 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2302 OpIdx = Cst->isZero() ? 3 : 2;
2306 bool CombinerHelper::eraseInst(MachineInstr &MI) {
2307 MI.eraseFromParent();
2311 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
2312 const MachineOperand &MOP2) {
2313 if (!MOP1.isReg() || !MOP2.isReg())
2315 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2318 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2321 MachineInstr *I1 = InstAndDef1->MI;
2322 MachineInstr *I2 = InstAndDef2->MI;
2324 // Handle a case like this:
2326 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2328 // Even though %0 and %1 are produced by the same instruction they are not
2331 return MOP1.getReg() == MOP2.getReg();
2333 // If we have an instruction which loads or stores, we can't guarantee that
2336 // For example, we may have
2338 // %x1 = G_LOAD %addr (load N from @somewhere)
2342 // %x2 = G_LOAD %addr (load N from @somewhere)
2344 // %or = G_OR %x1, %x2
2346 // It's possible that @foo will modify whatever lives at the address we're
2347 // loading from. To be safe, let's just assume that all loads and stores
2348 // are different (unless we have something which is guaranteed to not
2350 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
2353 // Check for physical registers on the instructions first to avoid cases
2356 // %a = COPY $physreg
2358 // SOMETHING implicit-def $physreg
2360 // %b = COPY $physreg
2362 // These copies are not equivalent.
2363 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2364 return MO.isReg() && MO.getReg().isPhysical();
2366 // Check if we have a case like this:
2368 // %a = COPY $physreg
2371 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2372 // From that, we know that they must have the same value, since they must
2373 // have come from the same COPY.
2374 return I1->isIdenticalTo(*I2);
2377 // We don't have any physical registers, so we don't necessarily need the
2380 // On the off-chance that there's some target instruction feeding into the
2381 // instruction, let's use produceSameValue instead of isIdenticalTo.
2382 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2383 // Handle instructions with multiple defs that produce same values. Values
2384 // are same for operands with same index.
2385 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2386 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2387 // I1 and I2 are different instructions but produce same values,
2388 // %1 and %6 are same, %1 and %7 are not the same value.
2389 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
2390 I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
2395 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
2398 auto *MI = MRI.getVRegDef(MOP.getReg());
2399 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2400 return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 &&
2401 MaybeCst->getSExtValue() == C;
2404 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
2406 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2407 Register OldReg = MI.getOperand(0).getReg();
2408 Register Replacement = MI.getOperand(OpIdx).getReg();
2409 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2410 MI.eraseFromParent();
2411 replaceRegWith(MRI, OldReg, Replacement);
2415 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
2416 Register Replacement) {
2417 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2418 Register OldReg = MI.getOperand(0).getReg();
2419 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2420 MI.eraseFromParent();
2421 replaceRegWith(MRI, OldReg, Replacement);
2425 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
2426 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2427 // Match (cond ? x : x)
2428 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2429 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2433 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
2434 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2435 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2439 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
2440 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2441 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2445 bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
2446 MachineOperand &MO = MI.getOperand(OpIdx);
2447 return MO.isReg() &&
2448 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2451 bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
2453 MachineOperand &MO = MI.getOperand(OpIdx);
2454 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2457 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
2458 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2459 Builder.setInstr(MI);
2460 Builder.buildFConstant(MI.getOperand(0), C);
2461 MI.eraseFromParent();
2465 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2466 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2467 Builder.setInstr(MI);
2468 Builder.buildConstant(MI.getOperand(0), C);
2469 MI.eraseFromParent();
2473 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
2474 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2475 Builder.setInstr(MI);
2476 Builder.buildConstant(MI.getOperand(0), C);
2477 MI.eraseFromParent();
2481 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2482 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2483 Builder.setInstr(MI);
2484 Builder.buildUndef(MI.getOperand(0));
2485 MI.eraseFromParent();
2489 bool CombinerHelper::matchSimplifyAddToSub(
2490 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2491 Register LHS = MI.getOperand(1).getReg();
2492 Register RHS = MI.getOperand(2).getReg();
2493 Register &NewLHS = std::get<0>(MatchInfo);
2494 Register &NewRHS = std::get<1>(MatchInfo);
2496 // Helper lambda to check for opportunities for
2497 // ((0-A) + B) -> B - A
2498 // (A + (0-B)) -> A - B
2499 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2500 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2502 NewLHS = MaybeNewLHS;
2506 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2509 bool CombinerHelper::matchCombineInsertVecElts(
2510 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2511 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2513 Register DstReg = MI.getOperand(0).getReg();
2514 LLT DstTy = MRI.getType(DstReg);
2515 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2516 unsigned NumElts = DstTy.getNumElements();
2517 // If this MI is part of a sequence of insert_vec_elts, then
2518 // don't do the combine in the middle of the sequence.
2519 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2520 TargetOpcode::G_INSERT_VECTOR_ELT)
2522 MachineInstr *CurrInst = &MI;
2523 MachineInstr *TmpInst;
2526 MatchInfo.resize(NumElts);
2528 CurrInst->getOperand(0).getReg(), MRI,
2529 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2530 if (IntImm >= NumElts)
2532 if (!MatchInfo[IntImm])
2533 MatchInfo[IntImm] = TmpReg;
2537 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2539 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2540 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2541 if (!MatchInfo[I - 1].isValid())
2542 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2546 // If we didn't end in a G_IMPLICIT_DEF, bail out.
2547 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2550 void CombinerHelper::applyCombineInsertVecElts(
2551 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2552 Builder.setInstr(MI);
2554 auto GetUndef = [&]() {
2557 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2558 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2561 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2563 MatchInfo[I] = GetUndef();
2565 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2566 MI.eraseFromParent();
2569 void CombinerHelper::applySimplifyAddToSub(
2570 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2571 Builder.setInstr(MI);
2572 Register SubLHS, SubRHS;
2573 std::tie(SubLHS, SubRHS) = MatchInfo;
2574 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2575 MI.eraseFromParent();
2578 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2579 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2580 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2582 // Creates the new hand + logic instruction (but does not insert them.)
2584 // On success, MatchInfo is populated with the new instructions. These are
2585 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2586 unsigned LogicOpcode = MI.getOpcode();
2587 assert(LogicOpcode == TargetOpcode::G_AND ||
2588 LogicOpcode == TargetOpcode::G_OR ||
2589 LogicOpcode == TargetOpcode::G_XOR);
2590 MachineIRBuilder MIB(MI);
2591 Register Dst = MI.getOperand(0).getReg();
2592 Register LHSReg = MI.getOperand(1).getReg();
2593 Register RHSReg = MI.getOperand(2).getReg();
2595 // Don't recompute anything.
2596 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2599 // Make sure we have (hand x, ...), (hand y, ...)
2600 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2601 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2602 if (!LeftHandInst || !RightHandInst)
2604 unsigned HandOpcode = LeftHandInst->getOpcode();
2605 if (HandOpcode != RightHandInst->getOpcode())
2607 if (!LeftHandInst->getOperand(1).isReg() ||
2608 !RightHandInst->getOperand(1).isReg())
2611 // Make sure the types match up, and if we're doing this post-legalization,
2612 // we end up with legal types.
2613 Register X = LeftHandInst->getOperand(1).getReg();
2614 Register Y = RightHandInst->getOperand(1).getReg();
2615 LLT XTy = MRI.getType(X);
2616 LLT YTy = MRI.getType(Y);
2619 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
2622 // Optional extra source register.
2623 Register ExtraHandOpSrcReg;
2624 switch (HandOpcode) {
2627 case TargetOpcode::G_ANYEXT:
2628 case TargetOpcode::G_SEXT:
2629 case TargetOpcode::G_ZEXT: {
2630 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2633 case TargetOpcode::G_AND:
2634 case TargetOpcode::G_ASHR:
2635 case TargetOpcode::G_LSHR:
2636 case TargetOpcode::G_SHL: {
2637 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2638 MachineOperand &ZOp = LeftHandInst->getOperand(2);
2639 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
2641 ExtraHandOpSrcReg = ZOp.getReg();
2646 // Record the steps to build the new instructions.
2648 // Steps to build (logic x, y)
2649 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
2650 OperandBuildSteps LogicBuildSteps = {
2651 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
2652 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
2653 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
2654 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
2656 // Steps to build hand (logic x, y), ...z
2657 OperandBuildSteps HandBuildSteps = {
2658 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
2659 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
2660 if (ExtraHandOpSrcReg.isValid())
2661 HandBuildSteps.push_back(
2662 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
2663 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
2665 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
2669 void CombinerHelper::applyBuildInstructionSteps(
2670 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2671 assert(MatchInfo.InstrsToBuild.size() &&
2672 "Expected at least one instr to build?");
2673 Builder.setInstr(MI);
2674 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
2675 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
2676 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
2677 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
2678 for (auto &OperandFn : InstrToBuild.OperandFns)
2681 MI.eraseFromParent();
2684 bool CombinerHelper::matchAshrShlToSextInreg(
2685 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2686 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2687 int64_t ShlCst, AshrCst;
2689 // FIXME: detect splat constant vectors.
2690 if (!mi_match(MI.getOperand(0).getReg(), MRI,
2691 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
2693 if (ShlCst != AshrCst)
2695 if (!isLegalOrBeforeLegalizer(
2696 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
2698 MatchInfo = std::make_tuple(Src, ShlCst);
2702 void CombinerHelper::applyAshShlToSextInreg(
2703 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2704 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2707 std::tie(Src, ShiftAmt) = MatchInfo;
2708 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
2709 Builder.setInstrAndDebugLoc(MI);
2710 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
2711 MI.eraseFromParent();
2714 /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
2715 bool CombinerHelper::matchOverlappingAnd(
2716 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2717 assert(MI.getOpcode() == TargetOpcode::G_AND);
2719 Register Dst = MI.getOperand(0).getReg();
2720 LLT Ty = MRI.getType(Dst);
2727 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
2730 MatchInfo = [=](MachineIRBuilder &B) {
2732 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
2735 auto Zero = B.buildConstant(Ty, 0);
2736 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
2741 bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
2742 Register &Replacement) {
2745 // %y:_(sN) = G_SOMETHING
2746 // %x:_(sN) = G_SOMETHING
2747 // %res:_(sN) = G_AND %x, %y
2749 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
2751 // Patterns like this can appear as a result of legalization. E.g.
2753 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
2754 // %one:_(s32) = G_CONSTANT i32 1
2755 // %and:_(s32) = G_AND %cmp, %one
2757 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
2758 assert(MI.getOpcode() == TargetOpcode::G_AND);
2762 Register AndDst = MI.getOperand(0).getReg();
2763 LLT DstTy = MRI.getType(AndDst);
2765 // FIXME: This should be removed once GISelKnownBits supports vectors.
2766 if (DstTy.isVector())
2769 Register LHS = MI.getOperand(1).getReg();
2770 Register RHS = MI.getOperand(2).getReg();
2771 KnownBits LHSBits = KB->getKnownBits(LHS);
2772 KnownBits RHSBits = KB->getKnownBits(RHS);
2774 // Check that x & Mask == x.
2775 // x & 1 == x, always
2776 // x & 0 == x, only if x is also 0
2777 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
2779 // Check if we can replace AndDst with the LHS of the G_AND
2780 if (canReplaceReg(AndDst, LHS, MRI) &&
2781 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
2786 // Check if we can replace AndDst with the RHS of the G_AND
2787 if (canReplaceReg(AndDst, RHS, MRI) &&
2788 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
2796 bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
2799 // %y:_(sN) = G_SOMETHING
2800 // %x:_(sN) = G_SOMETHING
2801 // %res:_(sN) = G_OR %x, %y
2803 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
2804 assert(MI.getOpcode() == TargetOpcode::G_OR);
2808 Register OrDst = MI.getOperand(0).getReg();
2809 LLT DstTy = MRI.getType(OrDst);
2811 // FIXME: This should be removed once GISelKnownBits supports vectors.
2812 if (DstTy.isVector())
2815 Register LHS = MI.getOperand(1).getReg();
2816 Register RHS = MI.getOperand(2).getReg();
2817 KnownBits LHSBits = KB->getKnownBits(LHS);
2818 KnownBits RHSBits = KB->getKnownBits(RHS);
2820 // Check that x | Mask == x.
2821 // x | 0 == x, always
2822 // x | 1 == x, only if x is also 1
2823 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
2825 // Check if we can replace OrDst with the LHS of the G_OR
2826 if (canReplaceReg(OrDst, LHS, MRI) &&
2827 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
2832 // Check if we can replace OrDst with the RHS of the G_OR
2833 if (canReplaceReg(OrDst, RHS, MRI) &&
2834 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
2842 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
2843 // If the input is already sign extended, just drop the extension.
2844 Register Src = MI.getOperand(1).getReg();
2845 unsigned ExtBits = MI.getOperand(2).getImm();
2846 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
2847 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
2850 static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
2851 int64_t Cst, bool IsVector, bool IsFP) {
2852 // For i1, Cst will always be -1 regardless of boolean contents.
2853 return (ScalarSizeBits == 1 && Cst == -1) ||
2854 isConstTrueVal(TLI, Cst, IsVector, IsFP);
2857 bool CombinerHelper::matchNotCmp(MachineInstr &MI,
2858 SmallVectorImpl<Register> &RegsToNegate) {
2859 assert(MI.getOpcode() == TargetOpcode::G_XOR);
2860 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2861 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
2864 // We match xor(src, true) here.
2865 if (!mi_match(MI.getOperand(0).getReg(), MRI,
2866 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
2869 if (!MRI.hasOneNonDBGUse(XorSrc))
2872 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
2873 // and ORs. The suffix of RegsToNegate starting from index I is used a work
2874 // list of tree nodes to visit.
2875 RegsToNegate.push_back(XorSrc);
2876 // Remember whether the comparisons are all integer or all floating point.
2879 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
2880 Register Reg = RegsToNegate[I];
2881 if (!MRI.hasOneNonDBGUse(Reg))
2883 MachineInstr *Def = MRI.getVRegDef(Reg);
2884 switch (Def->getOpcode()) {
2886 // Don't match if the tree contains anything other than ANDs, ORs and
2889 case TargetOpcode::G_ICMP:
2893 // When we apply the combine we will invert the predicate.
2895 case TargetOpcode::G_FCMP:
2899 // When we apply the combine we will invert the predicate.
2901 case TargetOpcode::G_AND:
2902 case TargetOpcode::G_OR:
2903 // Implement De Morgan's laws:
2904 // ~(x & y) -> ~x | ~y
2905 // ~(x | y) -> ~x & ~y
2906 // When we apply the combine we will change the opcode and recursively
2907 // negate the operands.
2908 RegsToNegate.push_back(Def->getOperand(1).getReg());
2909 RegsToNegate.push_back(Def->getOperand(2).getReg());
2914 // Now we know whether the comparisons are integer or floating point, check
2915 // the constant in the xor.
2917 if (Ty.isVector()) {
2918 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
2919 auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
2922 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
2925 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
2927 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
2934 void CombinerHelper::applyNotCmp(MachineInstr &MI,
2935 SmallVectorImpl<Register> &RegsToNegate) {
2936 for (Register Reg : RegsToNegate) {
2937 MachineInstr *Def = MRI.getVRegDef(Reg);
2938 Observer.changingInstr(*Def);
2939 // For each comparison, invert the opcode. For each AND and OR, change the
2941 switch (Def->getOpcode()) {
2943 llvm_unreachable("Unexpected opcode");
2944 case TargetOpcode::G_ICMP:
2945 case TargetOpcode::G_FCMP: {
2946 MachineOperand &PredOp = Def->getOperand(1);
2947 CmpInst::Predicate NewP = CmpInst::getInversePredicate(
2948 (CmpInst::Predicate)PredOp.getPredicate());
2949 PredOp.setPredicate(NewP);
2952 case TargetOpcode::G_AND:
2953 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
2955 case TargetOpcode::G_OR:
2956 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
2959 Observer.changedInstr(*Def);
2962 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
2963 MI.eraseFromParent();
2966 bool CombinerHelper::matchXorOfAndWithSameReg(
2967 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2968 // Match (xor (and x, y), y) (or any of its commuted cases)
2969 assert(MI.getOpcode() == TargetOpcode::G_XOR);
2970 Register &X = MatchInfo.first;
2971 Register &Y = MatchInfo.second;
2972 Register AndReg = MI.getOperand(1).getReg();
2973 Register SharedReg = MI.getOperand(2).getReg();
2975 // Find a G_AND on either side of the G_XOR.
2978 // (xor (and x, y), SharedReg)
2979 // (xor SharedReg, (and x, y))
2980 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
2981 std::swap(AndReg, SharedReg);
2982 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
2986 // Only do this if we'll eliminate the G_AND.
2987 if (!MRI.hasOneNonDBGUse(AndReg))
2990 // We can combine if SharedReg is the same as either the LHS or RHS of the
2994 return Y == SharedReg;
2997 void CombinerHelper::applyXorOfAndWithSameReg(
2998 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2999 // Fold (xor (and x, y), y) -> (and (not x), y)
3000 Builder.setInstrAndDebugLoc(MI);
3002 std::tie(X, Y) = MatchInfo;
3003 auto Not = Builder.buildNot(MRI.getType(X), X);
3004 Observer.changingInstr(MI);
3005 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3006 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3007 MI.getOperand(2).setReg(Y);
3008 Observer.changedInstr(MI);
3011 bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
3012 auto &PtrAdd = cast<GPtrAdd>(MI);
3013 Register DstReg = PtrAdd.getReg(0);
3014 LLT Ty = MRI.getType(DstReg);
3015 const DataLayout &DL = Builder.getMF().getDataLayout();
3017 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3020 if (Ty.isPointer()) {
3021 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3022 return ConstVal && *ConstVal == 0;
3025 assert(Ty.isVector() && "Expecting a vector type");
3026 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3027 return isBuildVectorAllZeros(*VecMI, MRI);
3030 void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
3031 auto &PtrAdd = cast<GPtrAdd>(MI);
3032 Builder.setInstrAndDebugLoc(PtrAdd);
3033 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3034 PtrAdd.eraseFromParent();
3037 /// The second source operand is known to be a power of 2.
3038 void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
3039 Register DstReg = MI.getOperand(0).getReg();
3040 Register Src0 = MI.getOperand(1).getReg();
3041 Register Pow2Src1 = MI.getOperand(2).getReg();
3042 LLT Ty = MRI.getType(DstReg);
3043 Builder.setInstrAndDebugLoc(MI);
3045 // Fold (urem x, pow2) -> (and x, pow2-1)
3046 auto NegOne = Builder.buildConstant(Ty, -1);
3047 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3048 Builder.buildAnd(DstReg, Src0, Add);
3049 MI.eraseFromParent();
3052 Optional<SmallVector<Register, 8>>
3053 CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3054 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3055 // We want to detect if Root is part of a tree which represents a bunch
3056 // of loads being merged into a larger load. We'll try to recognize patterns
3057 // like, for example:
3076 // Each "Reg" may have been produced by a load + some arithmetic. This
3077 // function will save each of them.
3078 SmallVector<Register, 8> RegsToVisit;
3079 SmallVector<const MachineInstr *, 7> Ors = {Root};
3081 // In the "worst" case, we're dealing with a load for each byte. So, there
3082 // are at most #bytes - 1 ORs.
3083 const unsigned MaxIter =
3084 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3085 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3088 const MachineInstr *Curr = Ors.pop_back_val();
3089 Register OrLHS = Curr->getOperand(1).getReg();
3090 Register OrRHS = Curr->getOperand(2).getReg();
3092 // In the combine, we want to elimate the entire tree.
3093 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3096 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3097 // something that may be a load + arithmetic.
3098 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3101 RegsToVisit.push_back(OrLHS);
3102 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3105 RegsToVisit.push_back(OrRHS);
3108 // We're going to try and merge each register into a wider power-of-2 type,
3109 // so we ought to have an even number of registers.
3110 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3115 /// Helper function for findLoadOffsetsForLoadOrCombine.
3117 /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3118 /// and then moving that value into a specific byte offset.
3122 /// \returns The load instruction and the byte offset it is moved into.
3123 static Optional<std::pair<GZExtLoad *, int64_t>>
3124 matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3125 const MachineRegisterInfo &MRI) {
3126 assert(MRI.hasOneNonDBGUse(Reg) &&
3127 "Expected Reg to only have one non-debug use?");
3130 if (!mi_match(Reg, MRI,
3131 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3136 if (Shift % MemSizeInBits != 0)
3139 // TODO: Handle other types of loads.
3140 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3144 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3147 return std::make_pair(Load, Shift / MemSizeInBits);
3150 Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3151 CombinerHelper::findLoadOffsetsForLoadOrCombine(
3152 SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
3153 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3155 // Each load found for the pattern. There should be one for each RegsToVisit.
3156 SmallSetVector<const MachineInstr *, 8> Loads;
3158 // The lowest index used in any load. (The lowest "i" for each x[i].)
3159 int64_t LowestIdx = INT64_MAX;
3161 // The load which uses the lowest index.
3162 GZExtLoad *LowestIdxLoad = nullptr;
3164 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3165 SmallSet<int64_t, 8> SeenIdx;
3167 // Ensure each load is in the same MBB.
3168 // TODO: Support multiple MachineBasicBlocks.
3169 MachineBasicBlock *MBB = nullptr;
3170 const MachineMemOperand *MMO = nullptr;
3172 // Earliest instruction-order load in the pattern.
3173 GZExtLoad *EarliestLoad = nullptr;
3175 // Latest instruction-order load in the pattern.
3176 GZExtLoad *LatestLoad = nullptr;
3178 // Base pointer which every load should share.
3181 // We want to find a load for each register. Each load should have some
3182 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3183 // track of the load which uses the lowest index. Later, we will check if we
3184 // can use its pointer in the final, combined load.
3185 for (auto Reg : RegsToVisit) {
3186 // Find the load, and find the position that it will end up in (e.g. a
3188 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3193 std::tie(Load, DstPos) = *LoadAndPos;
3195 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3196 // it is difficult to check for stores/calls/etc between loads.
3197 MachineBasicBlock *LoadMBB = Load->getParent();
3203 // Make sure that the MachineMemOperands of every seen load are compatible.
3204 auto &LoadMMO = Load->getMMO();
3207 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3210 // Find out what the base pointer and index for the load is.
3213 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3214 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3215 LoadPtr = Load->getOperand(1).getReg();
3219 // Don't combine things like a[i], a[i] -> a bigger load.
3220 if (!SeenIdx.insert(Idx).second)
3223 // Every load must share the same base pointer; don't combine things like:
3225 // a[i], b[i + 1] -> a bigger load.
3226 if (!BasePtr.isValid())
3228 if (BasePtr != LoadPtr)
3231 if (Idx < LowestIdx) {
3233 LowestIdxLoad = Load;
3236 // Keep track of the byte offset that this load ends up at. If we have seen
3237 // the byte offset, then stop here. We do not want to combine:
3239 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3240 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3244 // Keep track of the position of the earliest/latest loads in the pattern.
3245 // We will check that there are no load fold barriers between them later
3248 // FIXME: Is there a better way to check for load fold barriers?
3249 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3250 EarliestLoad = Load;
3251 if (!LatestLoad || dominates(*LatestLoad, *Load))
3255 // We found a load for each register. Let's check if each load satisfies the
3257 assert(Loads.size() == RegsToVisit.size() &&
3258 "Expected to find a load for each register?");
3259 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3260 LatestLoad && "Expected at least two loads?");
3262 // Check if there are any stores, calls, etc. between any of the loads. If
3263 // there are, then we can't safely perform the combine.
3265 // MaxIter is chosen based off the (worst case) number of iterations it
3266 // typically takes to succeed in the LLVM test suite plus some padding.
3268 // FIXME: Is there a better way to check for load fold barriers?
3269 const unsigned MaxIter = 20;
3271 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3272 LatestLoad->getIterator())) {
3273 if (Loads.count(&MI))
3275 if (MI.isLoadFoldBarrier())
3277 if (Iter++ == MaxIter)
3281 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3284 bool CombinerHelper::matchLoadOrCombine(
3285 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3286 assert(MI.getOpcode() == TargetOpcode::G_OR);
3287 MachineFunction &MF = *MI.getMF();
3288 // Assuming a little-endian target, transform:
3290 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3292 // s32 val = *((i32)a)
3295 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3297 // s32 val = BSWAP(*((s32)a))
3298 Register Dst = MI.getOperand(0).getReg();
3299 LLT Ty = MRI.getType(Dst);
3303 // We need to combine at least two loads into this type. Since the smallest
3304 // possible load is into a byte, we need at least a 16-bit wide type.
3305 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3306 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3309 // Match a collection of non-OR instructions in the pattern.
3310 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3314 // We have a collection of non-OR instructions. Figure out how wide each of
3315 // the small loads should be based off of the number of potential loads we
3317 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3318 if (NarrowMemSizeInBits % 8 != 0)
3321 // Check if each register feeding into each OR is a load from the same
3322 // base pointer + some arithmetic.
3324 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3326 // Also verify that each of these ends up putting a[i] into the same memory
3327 // offset as a load into a wide type would.
3328 SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
3329 GZExtLoad *LowestIdxLoad, *LatestLoad;
3331 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3332 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3335 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3337 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3338 // we found before, check if this corresponds to a big or little endian byte
3339 // pattern. If it does, then we can represent it using a load + possibly a
3341 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3342 Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3343 if (!IsBigEndian.hasValue())
3345 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3346 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3349 // Make sure that the load from the lowest index produces offset 0 in the
3352 // This ensures that we won't combine something like this:
3354 // load x[i] -> byte 2
3355 // load x[i+1] -> byte 0 ---> wide_load x[i]
3356 // load x[i+2] -> byte 1
3357 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3358 const unsigned ZeroByteOffset =
3360 ? bigEndianByteAt(NumLoadsInTy, 0)
3361 : littleEndianByteAt(NumLoadsInTy, 0);
3362 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3363 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3364 ZeroOffsetIdx->second != LowestIdx)
3367 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3368 // may not use index 0.
3369 Register Ptr = LowestIdxLoad->getPointerReg();
3370 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3371 LegalityQuery::MemDesc MMDesc(MMO);
3372 MMDesc.MemoryTy = Ty;
3373 if (!isLegalOrBeforeLegalizer(
3374 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3376 auto PtrInfo = MMO.getPointerInfo();
3377 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3379 // Load must be allowed and fast on the target.
3380 LLVMContext &C = MF.getFunction().getContext();
3381 auto &DL = MF.getDataLayout();
3383 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3387 MatchInfo = [=](MachineIRBuilder &MIB) {
3388 MIB.setInstrAndDebugLoc(*LatestLoad);
3389 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3390 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3392 MIB.buildBSwap(Dst, LoadDst);
3397 /// Check if the store \p Store is a truncstore that can be merged. That is,
3398 /// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
3399 /// Register then it does not need to match and SrcVal is set to the source
3401 /// On match, returns the start byte offset of the \p SrcVal that is being
3403 static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
3404 MachineRegisterInfo &MRI) {
3406 if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
3409 // The shift amount must be a constant multiple of the narrow type.
3410 // It is translated to the offset address in the wide source value "y".
3412 // x = G_LSHR y, ShiftAmtC
3415 Register FoundSrcVal;
3417 if (!mi_match(TruncVal, MRI,
3418 m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
3419 m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
3420 if (!SrcVal.isValid() || TruncVal == SrcVal) {
3421 if (!SrcVal.isValid())
3423 return 0; // If it's the lowest index store.
3428 unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
3429 if (ShiftAmt % NarrowBits!= 0)
3431 const unsigned Offset = ShiftAmt / NarrowBits;
3433 if (SrcVal.isValid() && FoundSrcVal != SrcVal)
3436 if (!SrcVal.isValid())
3437 SrcVal = FoundSrcVal;
3438 else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
3443 /// Match a pattern where a wide type scalar value is stored by several narrow
3444 /// stores. Fold it into a single store or a BSWAP and a store if the targets
3447 /// Assuming little endian target:
3450 /// p[0] = (val >> 0) & 0xFF;
3451 /// p[1] = (val >> 8) & 0xFF;
3452 /// p[2] = (val >> 16) & 0xFF;
3453 /// p[3] = (val >> 24) & 0xFF;
3455 /// *((i32)p) = val;
3459 /// p[0] = (val >> 24) & 0xFF;
3460 /// p[1] = (val >> 16) & 0xFF;
3461 /// p[2] = (val >> 8) & 0xFF;
3462 /// p[3] = (val >> 0) & 0xFF;
3464 /// *((i32)p) = BSWAP(val);
3465 bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
3466 MergeTruncStoresInfo &MatchInfo) {
3467 auto &StoreMI = cast<GStore>(MI);
3468 LLT MemTy = StoreMI.getMMO().getMemoryType();
3470 // We only handle merging simple stores of 1-4 bytes.
3471 if (!MemTy.isScalar())
3473 switch (MemTy.getSizeInBits()) {
3481 if (!StoreMI.isSimple())
3484 // We do a simple search for mergeable stores prior to this one.
3485 // Any potential alias hazard along the way terminates the search.
3486 SmallVector<GStore *> FoundStores;
3488 // We're looking for:
3489 // 1) a (store(trunc(...)))
3490 // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
3491 // the partial value stored.
3492 // 3) where the offsets form either a little or big-endian sequence.
3494 auto &LastStore = StoreMI;
3496 // The single base pointer that all stores must use.
3499 if (!mi_match(LastStore.getPointerReg(), MRI,
3500 m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
3501 BaseReg = LastStore.getPointerReg();
3505 GStore *LowestIdxStore = &LastStore;
3506 int64_t LowestIdxOffset = LastOffset;
3508 Register WideSrcVal;
3509 auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
3510 if (!LowestShiftAmt)
3511 return false; // Didn't match a trunc.
3512 assert(WideSrcVal.isValid());
3514 LLT WideStoreTy = MRI.getType(WideSrcVal);
3515 // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
3516 if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
3518 const unsigned NumStoresRequired =
3519 WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
3521 SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
3522 OffsetMap[*LowestShiftAmt] = LastOffset;
3523 FoundStores.emplace_back(&LastStore);
3525 // Search the block up for more stores.
3526 // We use a search threshold of 10 instructions here because the combiner
3527 // works top-down within a block, and we don't want to search an unbounded
3528 // number of predecessor instructions trying to find matching stores.
3529 // If we moved this optimization into a separate pass then we could probably
3530 // use a more efficient search without having a hard-coded threshold.
3531 const int MaxInstsToCheck = 10;
3532 int NumInstsChecked = 0;
3533 for (auto II = ++LastStore.getReverseIterator();
3534 II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
3538 if ((NewStore = dyn_cast<GStore>(&*II))) {
3539 if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
3541 } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
3544 continue; // This is a safe instruction we can look past.
3547 Register NewBaseReg;
3549 // Check we're storing to the same base + some offset.
3550 if (!mi_match(NewStore->getPointerReg(), MRI,
3551 m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
3552 NewBaseReg = NewStore->getPointerReg();
3555 if (BaseReg != NewBaseReg)
3558 auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
3559 if (!ShiftByteOffset)
3561 if (MemOffset < LowestIdxOffset) {
3562 LowestIdxOffset = MemOffset;
3563 LowestIdxStore = NewStore;
3566 // Map the offset in the store and the offset in the combined value, and
3567 // early return if it has been set before.
3568 if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
3569 OffsetMap[*ShiftByteOffset] != INT64_MAX)
3571 OffsetMap[*ShiftByteOffset] = MemOffset;
3573 FoundStores.emplace_back(NewStore);
3574 // Reset counter since we've found a matching inst.
3575 NumInstsChecked = 0;
3576 if (FoundStores.size() == NumStoresRequired)
3580 if (FoundStores.size() != NumStoresRequired) {
3584 const auto &DL = LastStore.getMF()->getDataLayout();
3585 auto &C = LastStore.getMF()->getFunction().getContext();
3586 // Check that a store of the wide type is both allowed and fast on the target
3588 bool Allowed = getTargetLowering().allowsMemoryAccess(
3589 C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
3590 if (!Allowed || !Fast)
3593 // Check if the pieces of the value are going to the expected places in memory
3594 // to merge the stores.
3595 unsigned NarrowBits = MemTy.getScalarSizeInBits();
3596 auto checkOffsets = [&](bool MatchLittleEndian) {
3597 if (MatchLittleEndian) {
3598 for (unsigned i = 0; i != NumStoresRequired; ++i)
3599 if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
3601 } else { // MatchBigEndian by reversing loop counter.
3602 for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
3604 if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
3610 // Check if the offsets line up for the native data layout of this target.
3611 bool NeedBswap = false;
3612 bool NeedRotate = false;
3613 if (!checkOffsets(DL.isLittleEndian())) {
3614 // Special-case: check if byte offsets line up for the opposite endian.
3615 if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
3617 else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
3624 !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
3627 !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
3630 MatchInfo.NeedBSwap = NeedBswap;
3631 MatchInfo.NeedRotate = NeedRotate;
3632 MatchInfo.LowestIdxStore = LowestIdxStore;
3633 MatchInfo.WideSrcVal = WideSrcVal;
3634 MatchInfo.FoundStores = std::move(FoundStores);
3638 void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
3639 MergeTruncStoresInfo &MatchInfo) {
3641 Builder.setInstrAndDebugLoc(MI);
3642 Register WideSrcVal = MatchInfo.WideSrcVal;
3643 LLT WideStoreTy = MRI.getType(WideSrcVal);
3645 if (MatchInfo.NeedBSwap) {
3646 WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
3647 } else if (MatchInfo.NeedRotate) {
3648 assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
3649 "Unexpected type for rotate");
3651 Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
3653 Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
3656 Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
3657 MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
3658 MatchInfo.LowestIdxStore->getMMO().getAlign());
3660 // Erase the old stores.
3661 for (auto *ST : MatchInfo.FoundStores)
3662 ST->eraseFromParent();
3665 bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
3666 MachineInstr *&ExtMI) {
3667 assert(MI.getOpcode() == TargetOpcode::G_PHI);
3669 Register DstReg = MI.getOperand(0).getReg();
3671 // TODO: Extending a vector may be expensive, don't do this until heuristics
3673 if (MRI.getType(DstReg).isVector())
3676 // Try to match a phi, whose only use is an extend.
3677 if (!MRI.hasOneNonDBGUse(DstReg))
3679 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3680 switch (ExtMI->getOpcode()) {
3681 case TargetOpcode::G_ANYEXT:
3682 return true; // G_ANYEXT is usually free.
3683 case TargetOpcode::G_ZEXT:
3684 case TargetOpcode::G_SEXT:
3690 // If the target is likely to fold this extend away, don't propagate.
3691 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
3694 // We don't want to propagate the extends unless there's a good chance that
3695 // they'll be optimized in some way.
3696 // Collect the unique incoming values.
3697 SmallPtrSet<MachineInstr *, 4> InSrcs;
3698 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
3699 auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
3700 switch (DefMI->getOpcode()) {
3701 case TargetOpcode::G_LOAD:
3702 case TargetOpcode::G_TRUNC:
3703 case TargetOpcode::G_SEXT:
3704 case TargetOpcode::G_ZEXT:
3705 case TargetOpcode::G_ANYEXT:
3706 case TargetOpcode::G_CONSTANT:
3707 InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
3708 // Don't try to propagate if there are too many places to create new
3709 // extends, chances are it'll increase code size.
3710 if (InSrcs.size() > 2)
3720 void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
3721 MachineInstr *&ExtMI) {
3722 assert(MI.getOpcode() == TargetOpcode::G_PHI);
3723 Register DstReg = ExtMI->getOperand(0).getReg();
3724 LLT ExtTy = MRI.getType(DstReg);
3726 // Propagate the extension into the block of each incoming reg's block.
3727 // Use a SetVector here because PHIs can have duplicate edges, and we want
3728 // deterministic iteration order.
3729 SmallSetVector<MachineInstr *, 8> SrcMIs;
3730 SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
3731 for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
3732 auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
3733 if (!SrcMIs.insert(SrcMI))
3736 // Build an extend after each src inst.
3737 auto *MBB = SrcMI->getParent();
3738 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3739 if (InsertPt != MBB->end() && InsertPt->isPHI())
3740 InsertPt = MBB->getFirstNonPHI();
3742 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3743 Builder.setDebugLoc(MI.getDebugLoc());
3744 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
3745 SrcMI->getOperand(0).getReg());
3746 OldToNewSrcMap[SrcMI] = NewExt;
3749 // Create a new phi with the extended inputs.
3750 Builder.setInstrAndDebugLoc(MI);
3751 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3752 NewPhi.addDef(DstReg);
3753 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
3755 NewPhi.addMBB(MO.getMBB());
3758 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3759 NewPhi.addUse(NewSrc->getOperand(0).getReg());
3761 Builder.insertInstr(NewPhi);
3762 ExtMI->eraseFromParent();
3765 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
3767 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3768 // If we have a constant index, look for a G_BUILD_VECTOR source
3769 // and find the source register that the index maps to.
3770 Register SrcVec = MI.getOperand(1).getReg();
3771 LLT SrcTy = MRI.getType(SrcVec);
3772 if (!isLegalOrBeforeLegalizer(
3773 {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
3776 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3777 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3780 unsigned VecIdx = Cst->Value.getZExtValue();
3781 MachineInstr *BuildVecMI =
3782 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
3784 BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
3787 LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
3788 if (!isLegalOrBeforeLegalizer(
3789 {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
3793 EVT Ty(getMVTForLLT(SrcTy));
3794 if (!MRI.hasOneNonDBGUse(SrcVec) &&
3795 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3798 Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
3802 void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
3804 // Check the type of the register, since it may have come from a
3805 // G_BUILD_VECTOR_TRUNC.
3806 LLT ScalarTy = MRI.getType(Reg);
3807 Register DstReg = MI.getOperand(0).getReg();
3808 LLT DstTy = MRI.getType(DstReg);
3810 Builder.setInstrAndDebugLoc(MI);
3811 if (ScalarTy != DstTy) {
3812 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
3813 Builder.buildTrunc(DstReg, Reg);
3814 MI.eraseFromParent();
3817 replaceSingleDefInstWithReg(MI, Reg);
3820 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
3822 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3823 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3824 // This combine tries to find build_vector's which have every source element
3825 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
3826 // the masked load scalarization is run late in the pipeline. There's already
3827 // a combine for a similar pattern starting from the extract, but that
3828 // doesn't attempt to do it if there are multiple uses of the build_vector,
3829 // which in this case is true. Starting the combine from the build_vector
3830 // feels more natural than trying to find sibling nodes of extracts.
3832 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
3833 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
3834 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
3835 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
3836 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
3838 // replace ext{1,2,3,4} with %s{1,2,3,4}
3840 Register DstReg = MI.getOperand(0).getReg();
3841 LLT DstTy = MRI.getType(DstReg);
3842 unsigned NumElts = DstTy.getNumElements();
3844 SmallBitVector ExtractedElts(NumElts);
3845 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
3846 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
3848 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
3851 unsigned Idx = Cst.getValue().getZExtValue();
3853 return false; // Out of range.
3854 ExtractedElts.set(Idx);
3855 SrcDstPairs.emplace_back(
3856 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
3858 // Match if every element was extracted.
3859 return ExtractedElts.all();
3862 void CombinerHelper::applyExtractAllEltsFromBuildVector(
3864 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3865 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3866 for (auto &Pair : SrcDstPairs) {
3867 auto *ExtMI = Pair.second;
3868 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
3869 ExtMI->eraseFromParent();
3871 MI.eraseFromParent();
3874 void CombinerHelper::applyBuildFn(
3875 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3876 Builder.setInstrAndDebugLoc(MI);
3878 MI.eraseFromParent();
3881 void CombinerHelper::applyBuildFnNoErase(
3882 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3883 Builder.setInstrAndDebugLoc(MI);
3887 bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
3888 BuildFnTy &MatchInfo) {
3889 assert(MI.getOpcode() == TargetOpcode::G_OR);
3891 Register Dst = MI.getOperand(0).getReg();
3892 LLT Ty = MRI.getType(Dst);
3893 unsigned BitWidth = Ty.getScalarSizeInBits();
3895 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
3896 unsigned FshOpc = 0;
3898 // Match (or (shl ...), (lshr ...)).
3899 if (!mi_match(Dst, MRI,
3900 // m_GOr() handles the commuted version as well.
3901 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
3902 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
3905 // Given constants C0 and C1 such that C0 + C1 is bit-width:
3906 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
3907 // TODO: Match constant splat.
3908 int64_t CstShlAmt, CstLShrAmt;
3909 if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) &&
3910 mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) &&
3911 CstShlAmt + CstLShrAmt == BitWidth) {
3912 FshOpc = TargetOpcode::G_FSHR;
3915 } else if (mi_match(LShrAmt, MRI,
3916 m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
3918 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
3919 FshOpc = TargetOpcode::G_FSHL;
3921 } else if (mi_match(ShlAmt, MRI,
3922 m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
3924 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
3925 FshOpc = TargetOpcode::G_FSHR;
3931 LLT AmtTy = MRI.getType(Amt);
3932 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
3935 MatchInfo = [=](MachineIRBuilder &B) {
3936 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
3941 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
3942 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
3943 unsigned Opc = MI.getOpcode();
3944 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3945 Register X = MI.getOperand(1).getReg();
3946 Register Y = MI.getOperand(2).getReg();
3949 unsigned RotateOpc =
3950 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
3951 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
3954 void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
3955 unsigned Opc = MI.getOpcode();
3956 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3957 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
3958 Observer.changingInstr(MI);
3959 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
3960 : TargetOpcode::G_ROTR));
3961 MI.RemoveOperand(2);
3962 Observer.changedInstr(MI);
3965 // Fold (rot x, c) -> (rot x, c % BitSize)
3966 bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
3967 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
3968 MI.getOpcode() == TargetOpcode::G_ROTR);
3970 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3971 Register AmtReg = MI.getOperand(2).getReg();
3972 bool OutOfRange = false;
3973 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
3974 if (auto *CI = dyn_cast<ConstantInt>(C))
3975 OutOfRange |= CI->getValue().uge(Bitsize);
3978 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
3981 void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
3982 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
3983 MI.getOpcode() == TargetOpcode::G_ROTR);
3985 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3986 Builder.setInstrAndDebugLoc(MI);
3987 Register Amt = MI.getOperand(2).getReg();
3988 LLT AmtTy = MRI.getType(Amt);
3989 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
3990 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
3991 Observer.changingInstr(MI);
3992 MI.getOperand(2).setReg(Amt);
3993 Observer.changedInstr(MI);
3996 bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
3997 int64_t &MatchInfo) {
3998 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
3999 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4000 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4001 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4002 Optional<bool> KnownVal;
4005 llvm_unreachable("Unexpected G_ICMP predicate?");
4006 case CmpInst::ICMP_EQ:
4007 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4009 case CmpInst::ICMP_NE:
4010 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4012 case CmpInst::ICMP_SGE:
4013 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4015 case CmpInst::ICMP_SGT:
4016 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4018 case CmpInst::ICMP_SLE:
4019 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4021 case CmpInst::ICMP_SLT:
4022 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4024 case CmpInst::ICMP_UGE:
4025 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4027 case CmpInst::ICMP_UGT:
4028 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4030 case CmpInst::ICMP_ULE:
4031 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4033 case CmpInst::ICMP_ULT:
4034 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4041 ? getICmpTrueVal(getTargetLowering(),
4043 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4049 bool CombinerHelper::matchICmpToLHSKnownBits(
4050 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4051 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4054 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4055 // %cmp = G_ICMP ne %x, 0
4059 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4060 // %cmp = G_ICMP eq %x, 1
4062 // We can replace %cmp with %x assuming true is 1 on the target.
4063 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4064 if (!CmpInst::isEquality(Pred))
4066 Register Dst = MI.getOperand(0).getReg();
4067 LLT DstTy = MRI.getType(Dst);
4068 if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
4069 /* IsFP = */ false) != 1)
4071 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4072 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4074 Register LHS = MI.getOperand(2).getReg();
4075 auto KnownLHS = KB->getKnownBits(LHS);
4076 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4078 // Make sure replacing Dst with the LHS is a legal operation.
4079 LLT LHSTy = MRI.getType(LHS);
4080 unsigned LHSSize = LHSTy.getSizeInBits();
4081 unsigned DstSize = DstTy.getSizeInBits();
4082 unsigned Op = TargetOpcode::COPY;
4083 if (DstSize != LHSSize)
4084 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4085 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4087 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4091 // Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4092 bool CombinerHelper::matchAndOrDisjointMask(
4093 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4094 assert(MI.getOpcode() == TargetOpcode::G_AND);
4096 // Ignore vector types to simplify matching the two constants.
4097 // TODO: do this for vectors and scalars via a demanded bits analysis.
4098 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4105 if (!mi_match(MI, MRI,
4106 m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd))))
4109 // Check if MaskOr could turn on any bits in Src.
4110 if (MaskAnd & MaskOr)
4113 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4114 Observer.changingInstr(MI);
4115 MI.getOperand(1).setReg(Src);
4116 Observer.changedInstr(MI);
4121 /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4122 bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
4123 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4124 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4125 Register Dst = MI.getOperand(0).getReg();
4126 Register Src = MI.getOperand(1).getReg();
4127 LLT Ty = MRI.getType(Src);
4128 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4129 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4131 int64_t Width = MI.getOperand(2).getImm();
4136 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4137 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4139 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4142 MatchInfo = [=](MachineIRBuilder &B) {
4143 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4144 auto Cst2 = B.buildConstant(ExtractTy, Width);
4145 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4150 /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4151 bool CombinerHelper::matchBitfieldExtractFromAnd(
4152 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4153 assert(MI.getOpcode() == TargetOpcode::G_AND);
4154 Register Dst = MI.getOperand(0).getReg();
4155 LLT Ty = MRI.getType(Dst);
4156 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4157 if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
4158 TargetOpcode::G_UBFX, Ty, ExtractTy))
4161 int64_t AndImm, LSBImm;
4163 const unsigned Size = Ty.getScalarSizeInBits();
4164 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4165 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4169 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4170 auto MaybeMask = static_cast<uint64_t>(AndImm);
4171 if (MaybeMask & (MaybeMask + 1))
4174 // LSB must fit within the register.
4175 if (static_cast<uint64_t>(LSBImm) >= Size)
4178 uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
4179 MatchInfo = [=](MachineIRBuilder &B) {
4180 auto WidthCst = B.buildConstant(ExtractTy, Width);
4181 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4182 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4187 bool CombinerHelper::matchBitfieldExtractFromShr(
4188 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4189 const unsigned Opcode = MI.getOpcode();
4190 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4192 const Register Dst = MI.getOperand(0).getReg();
4194 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4195 ? TargetOpcode::G_SBFX
4196 : TargetOpcode::G_UBFX;
4198 // Check if the type we would use for the extract is legal
4199 LLT Ty = MRI.getType(Dst);
4200 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4201 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4207 const unsigned Size = Ty.getScalarSizeInBits();
4209 // Try to match shr (shl x, c1), c2
4210 if (!mi_match(Dst, MRI,
4212 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4216 // Make sure that the shift sizes can fit a bitfield extract
4217 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4220 // Skip this combine if the G_SEXT_INREG combine could handle it
4221 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4224 // Calculate start position and width of the extract
4225 const int64_t Pos = ShrAmt - ShlAmt;
4226 const int64_t Width = Size - ShrAmt;
4228 MatchInfo = [=](MachineIRBuilder &B) {
4229 auto WidthCst = B.buildConstant(ExtractTy, Width);
4230 auto PosCst = B.buildConstant(ExtractTy, Pos);
4231 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4236 bool CombinerHelper::matchBitfieldExtractFromShrAnd(
4237 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4238 const unsigned Opcode = MI.getOpcode();
4239 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4241 const Register Dst = MI.getOperand(0).getReg();
4242 LLT Ty = MRI.getType(Dst);
4243 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4244 if (!getTargetLowering().isConstantUnsignedBitfieldExtractLegal(
4245 TargetOpcode::G_UBFX, Ty, ExtractTy))
4248 // Try to match shr (and x, c1), c2
4252 if (!mi_match(Dst, MRI,
4254 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4258 const unsigned Size = Ty.getScalarSizeInBits();
4259 if (ShrAmt < 0 || ShrAmt >= Size)
4262 // Check that ubfx can do the extraction, with no holes in the mask.
4263 uint64_t UMask = SMask;
4264 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4265 UMask &= maskTrailingOnes<uint64_t>(Size);
4266 if (!isMask_64(UMask))
4269 // Calculate start position and width of the extract.
4270 const int64_t Pos = ShrAmt;
4271 const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
4273 // It's preferable to keep the shift, rather than form G_SBFX.
4274 // TODO: remove the G_AND via demanded bits analysis.
4275 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4278 MatchInfo = [=](MachineIRBuilder &B) {
4279 auto WidthCst = B.buildConstant(ExtractTy, Width);
4280 auto PosCst = B.buildConstant(ExtractTy, Pos);
4281 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4286 bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4287 MachineInstr &PtrAdd) {
4288 assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
4290 Register Src1Reg = PtrAdd.getOperand(1).getReg();
4291 MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
4295 Register Src2Reg = PtrAdd.getOperand(2).getReg();
4297 if (MRI.hasOneNonDBGUse(Src1Reg))
4300 auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
4303 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4307 const APInt &C1APIntVal = *C1;
4308 const APInt &C2APIntVal = *C2;
4309 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4311 for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
4312 // This combine may end up running before ptrtoint/inttoptr combines
4313 // manage to eliminate redundant conversions, so try to look through them.
4314 MachineInstr *ConvUseMI = &UseMI;
4315 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4316 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4317 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4318 Register DefReg = ConvUseMI->getOperand(0).getReg();
4319 if (!MRI.hasOneNonDBGUse(DefReg))
4321 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4322 ConvUseOpc = ConvUseMI->getOpcode();
4324 auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
4325 ConvUseOpc == TargetOpcode::G_STORE;
4328 // Is x[offset2] already not a legal addressing mode? If so then
4329 // reassociating the constants breaks nothing (we test offset2 because
4330 // that's the one we hope to fold into the load or store).
4331 TargetLoweringBase::AddrMode AM;
4332 AM.HasBaseReg = true;
4333 AM.BaseOffs = C2APIntVal.getSExtValue();
4335 MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
4337 getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
4338 PtrAdd.getMF()->getFunction().getContext());
4339 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4340 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4344 // Would x[offset1+offset2] still be a legal addressing mode?
4345 AM.BaseOffs = CombinedValue;
4346 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4354 bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
4356 BuildFnTy &MatchInfo) {
4357 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4358 Register Src1Reg = MI.getOperand(1).getReg();
4359 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4361 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4365 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4366 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4369 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4370 Observer.changingInstr(MI);
4371 MI.getOperand(1).setReg(NewBase.getReg(0));
4372 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4373 Observer.changedInstr(MI);
4375 return !reassociationCanBreakAddressingModePattern(MI);
4378 bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
4381 BuildFnTy &MatchInfo) {
4382 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4383 // if and only if (G_PTR_ADD X, C) has one use.
4385 Optional<ValueAndVReg> LHSCstOff;
4386 if (!mi_match(MI.getBaseReg(), MRI,
4387 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4390 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4391 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4392 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4393 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4395 LHSPtrAdd->moveBefore(&MI);
4396 Register RHSReg = MI.getOffsetReg();
4397 Observer.changingInstr(MI);
4398 MI.getOperand(2).setReg(LHSCstOff->VReg);
4399 Observer.changedInstr(MI);
4400 Observer.changingInstr(*LHSPtrAdd);
4401 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4402 Observer.changedInstr(*LHSPtrAdd);
4404 return !reassociationCanBreakAddressingModePattern(MI);
4407 bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
4410 BuildFnTy &MatchInfo) {
4411 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4412 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4416 Register Src2Reg = MI.getOperand(2).getReg();
4417 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4418 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4419 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4422 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4426 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4427 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4428 Observer.changingInstr(MI);
4429 MI.getOperand(1).setReg(LHSSrc1);
4430 MI.getOperand(2).setReg(NewCst.getReg(0));
4431 Observer.changedInstr(MI);
4433 return !reassociationCanBreakAddressingModePattern(MI);
4436 bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
4437 BuildFnTy &MatchInfo) {
4438 auto &PtrAdd = cast<GPtrAdd>(MI);
4439 // We're trying to match a few pointer computation patterns here for
4440 // re-association opportunities.
4441 // 1) Isolating a constant operand to be on the RHS, e.g.:
4442 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4444 // 2) Folding two constants in each sub-tree as long as such folding
4445 // doesn't break a legal addressing mode.
4446 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4448 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4449 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4450 // iif (G_PTR_ADD X, C) has one use.
4451 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4452 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4454 // Try to match example 2.
4455 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4458 // Try to match example 3.
4459 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4462 // Try to match example 1.
4463 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4469 bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
4470 Register Op1 = MI.getOperand(1).getReg();
4471 Register Op2 = MI.getOperand(2).getReg();
4472 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4475 MatchInfo = *MaybeCst;
4479 bool CombinerHelper::matchNarrowBinopFeedingAnd(
4480 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4481 // Look for a binop feeding into an AND with a mask:
4483 // %add = G_ADD %lhs, %rhs
4484 // %and = G_AND %add, 000...11111111
4486 // Check if it's possible to perform the binop at a narrower width and zext
4487 // back to the original width like so:
4489 // %narrow_lhs = G_TRUNC %lhs
4490 // %narrow_rhs = G_TRUNC %rhs
4491 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4492 // %new_add = G_ZEXT %narrow_add
4493 // %and = G_AND %new_add, 000...11111111
4495 // This can allow later combines to eliminate the G_AND if it turns out
4496 // that the mask is irrelevant.
4497 assert(MI.getOpcode() == TargetOpcode::G_AND);
4498 Register Dst = MI.getOperand(0).getReg();
4499 Register AndLHS = MI.getOperand(1).getReg();
4500 Register AndRHS = MI.getOperand(2).getReg();
4501 LLT WideTy = MRI.getType(Dst);
4503 // If the potential binop has more than one use, then it's possible that one
4504 // of those uses will need its full width.
4505 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4508 // Check if the LHS feeding the AND is impacted by the high bits that we're
4511 // e.g. for 64-bit x, y:
4513 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4514 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4517 unsigned LHSOpc = LHSInst->getOpcode();
4521 case TargetOpcode::G_ADD:
4522 case TargetOpcode::G_SUB:
4523 case TargetOpcode::G_MUL:
4524 case TargetOpcode::G_AND:
4525 case TargetOpcode::G_OR:
4526 case TargetOpcode::G_XOR:
4530 // Find the mask on the RHS.
4531 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4534 auto Mask = Cst->Value;
4538 // No point in combining if there's nothing to truncate.
4539 unsigned NarrowWidth = Mask.countTrailingOnes();
4540 if (NarrowWidth == WideTy.getSizeInBits())
4542 LLT NarrowTy = LLT::scalar(NarrowWidth);
4544 // Check if adding the zext + truncates could be harmful.
4545 auto &MF = *MI.getMF();
4546 const auto &TLI = getTargetLowering();
4547 LLVMContext &Ctx = MF.getFunction().getContext();
4548 auto &DL = MF.getDataLayout();
4549 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4550 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4552 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4553 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4555 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4556 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4557 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4558 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4559 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4561 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4562 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4563 Observer.changingInstr(MI);
4564 MI.getOperand(1).setReg(Ext.getReg(0));
4565 Observer.changedInstr(MI);
4570 bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
4571 unsigned Opc = MI.getOpcode();
4572 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4574 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4577 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4578 Observer.changingInstr(MI);
4579 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4580 : TargetOpcode::G_SADDO;
4581 MI.setDesc(Builder.getTII().get(NewOpc));
4582 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4583 Observer.changedInstr(MI);
4588 MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
4589 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
4590 auto &UDiv = cast<GenericMachineInstr>(MI);
4591 Register Dst = UDiv.getReg(0);
4592 Register LHS = UDiv.getReg(1);
4593 Register RHS = UDiv.getReg(2);
4594 LLT Ty = MRI.getType(Dst);
4595 LLT ScalarTy = Ty.getScalarType();
4596 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
4597 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4598 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
4599 auto &MIB = Builder;
4600 MIB.setInstrAndDebugLoc(MI);
4602 bool UseNPQ = false;
4603 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
4605 auto BuildUDIVPattern = [&](const Constant *C) {
4606 auto *CI = cast<ConstantInt>(C);
4607 const APInt &Divisor = CI->getValue();
4608 UnsignedDivisonByConstantInfo magics =
4609 UnsignedDivisonByConstantInfo::get(Divisor);
4610 unsigned PreShift = 0, PostShift = 0;
4612 // If the divisor is even, we can avoid using the expensive fixup by
4613 // shifting the divided value upfront.
4614 if (magics.IsAdd != 0 && !Divisor[0]) {
4615 PreShift = Divisor.countTrailingZeros();
4616 // Get magic number for the shifted divisor.
4618 UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
4619 assert(magics.IsAdd == 0 && "Should use cheap fixup now");
4622 APInt Magic = magics.Magic;
4625 if (magics.IsAdd == 0 || Divisor.isOneValue()) {
4626 assert(magics.ShiftAmount < Divisor.getBitWidth() &&
4627 "We shouldn't generate an undefined shift!");
4628 PostShift = magics.ShiftAmount;
4631 PostShift = magics.ShiftAmount - 1;
4635 PreShifts.push_back(
4636 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
4637 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
4638 NPQFactors.push_back(
4639 MIB.buildConstant(ScalarTy,
4640 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
4641 : APInt::getZero(EltBits))
4643 PostShifts.push_back(
4644 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
4649 // Collect the shifts/magic values from each element.
4650 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
4652 assert(Matched && "Expected unary predicate match to succeed");
4654 Register PreShift, PostShift, MagicFactor, NPQFactor;
4655 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
4657 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
4658 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
4659 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
4660 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
4662 assert(MRI.getType(RHS).isScalar() &&
4663 "Non-build_vector operation should have been a scalar");
4664 PreShift = PreShifts[0];
4665 MagicFactor = MagicFactors[0];
4666 PostShift = PostShifts[0];
4670 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
4672 // Multiply the numerator (operand 0) by the magic value.
4673 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
4676 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
4678 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4679 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
4681 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
4683 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
4685 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
4688 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
4689 auto One = MIB.buildConstant(Ty, 1);
4690 auto IsOne = MIB.buildICmp(
4691 CmpInst::Predicate::ICMP_EQ,
4692 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
4693 return MIB.buildSelect(Ty, IsOne, LHS, Q);
4696 bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
4697 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
4698 Register Dst = MI.getOperand(0).getReg();
4699 Register RHS = MI.getOperand(2).getReg();
4700 LLT DstTy = MRI.getType(Dst);
4701 auto *RHSDef = MRI.getVRegDef(RHS);
4702 if (!isConstantOrConstantVector(*RHSDef, MRI))
4705 auto &MF = *MI.getMF();
4706 AttributeList Attr = MF.getFunction().getAttributes();
4707 const auto &TLI = getTargetLowering();
4708 LLVMContext &Ctx = MF.getFunction().getContext();
4709 auto &DL = MF.getDataLayout();
4710 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
4713 // Don't do this for minsize because the instruction sequence is usually
4715 if (MF.getFunction().hasMinSize())
4718 // Don't do this if the types are not going to be legal.
4720 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
4722 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
4724 if (!isLegalOrBeforeLegalizer(
4725 {TargetOpcode::G_ICMP,
4726 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
4731 auto CheckEltValue = [&](const Constant *C) {
4732 if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
4733 return !CI->isZero();
4736 return matchUnaryPredicate(MRI, RHS, CheckEltValue);
4739 void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
4740 auto *NewMI = buildUDivUsingMul(MI);
4741 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
4744 bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
4745 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
4746 Register RHS = MI.getOperand(2).getReg();
4747 Register Dst = MI.getOperand(0).getReg();
4748 LLT Ty = MRI.getType(Dst);
4749 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4750 auto MatchPow2ExceptOne = [&](const Constant *C) {
4751 if (auto *CI = dyn_cast<ConstantInt>(C))
4752 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
4755 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
4757 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
4760 void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
4761 Register LHS = MI.getOperand(1).getReg();
4762 Register RHS = MI.getOperand(2).getReg();
4763 Register Dst = MI.getOperand(0).getReg();
4764 LLT Ty = MRI.getType(Dst);
4765 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4766 unsigned NumEltBits = Ty.getScalarSizeInBits();
4768 Builder.setInstrAndDebugLoc(MI);
4769 auto LogBase2 = buildLogBase2(RHS, Builder);
4771 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
4772 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
4773 Builder.buildLShr(Dst, LHS, Trunc);
4774 MI.eraseFromParent();
4777 bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
4778 BuildFnTy &MatchInfo) {
4779 unsigned Opc = MI.getOpcode();
4780 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
4781 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
4782 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
4784 Register Dst = MI.getOperand(0).getReg();
4785 Register X = MI.getOperand(1).getReg();
4786 Register Y = MI.getOperand(2).getReg();
4787 LLT Type = MRI.getType(Dst);
4789 // fold (fadd x, fneg(y)) -> (fsub x, y)
4790 // fold (fadd fneg(y), x) -> (fsub x, y)
4791 // G_ADD is commutative so both cases are checked by m_GFAdd
4792 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
4793 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
4794 Opc = TargetOpcode::G_FSUB;
4796 /// fold (fsub x, fneg(y)) -> (fadd x, y)
4797 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
4798 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
4799 Opc = TargetOpcode::G_FADD;
4801 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
4802 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
4803 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
4804 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
4805 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
4806 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
4807 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
4808 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
4813 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4814 Observer.changingInstr(MI);
4815 MI.setDesc(B.getTII().get(Opc));
4816 MI.getOperand(1).setReg(X);
4817 MI.getOperand(2).setReg(Y);
4818 Observer.changedInstr(MI);
4823 /// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
4824 /// due to global flags or MachineInstr flags.
4825 static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
4826 if (MI.getOpcode() != TargetOpcode::G_FMUL)
4828 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
4831 static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
4832 const MachineRegisterInfo &MRI) {
4833 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
4834 MRI.use_instr_nodbg_end()) >
4835 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
4836 MRI.use_instr_nodbg_end());
4839 bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
4840 bool &AllowFusionGlobally,
4841 bool &HasFMAD, bool &Aggressive,
4842 bool CanReassociate) {
4844 auto *MF = MI.getMF();
4845 const auto &TLI = *MF->getSubtarget().getTargetLowering();
4846 const TargetOptions &Options = MF->getTarget().Options;
4847 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
4849 if (CanReassociate &&
4850 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
4853 // Floating-point multiply-add with intermediate rounding.
4854 HasFMAD = (LI && TLI.isFMADLegal(MI, DstType));
4855 // Floating-point multiply-add without intermediate rounding.
4856 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
4857 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
4858 // No valid opcode, do not combine.
4859 if (!HasFMAD && !HasFMA)
4862 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
4863 Options.UnsafeFPMath || HasFMAD;
4864 // If the addition is not contractable, do not combine.
4865 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
4868 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
4872 bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
4873 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4874 assert(MI.getOpcode() == TargetOpcode::G_FADD);
4876 bool AllowFusionGlobally, HasFMAD, Aggressive;
4877 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
4880 Register Op1 = MI.getOperand(1).getReg();
4881 Register Op2 = MI.getOperand(2).getReg();
4882 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
4883 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
4884 unsigned PreferredFusedOpcode =
4885 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
4887 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
4888 // prefer to fold the multiply with fewer uses.
4889 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
4890 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
4891 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
4892 std::swap(LHS, RHS);
4895 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
4896 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
4897 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
4898 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4899 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
4900 {LHS.MI->getOperand(1).getReg(),
4901 LHS.MI->getOperand(2).getReg(), RHS.Reg});
4906 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
4907 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
4908 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
4909 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4910 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
4911 {RHS.MI->getOperand(1).getReg(),
4912 RHS.MI->getOperand(2).getReg(), LHS.Reg});
4920 bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
4921 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4922 assert(MI.getOpcode() == TargetOpcode::G_FADD);
4924 bool AllowFusionGlobally, HasFMAD, Aggressive;
4925 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
4928 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
4929 Register Op1 = MI.getOperand(1).getReg();
4930 Register Op2 = MI.getOperand(2).getReg();
4931 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
4932 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
4933 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
4935 unsigned PreferredFusedOpcode =
4936 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
4938 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
4939 // prefer to fold the multiply with fewer uses.
4940 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
4941 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
4942 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
4943 std::swap(LHS, RHS);
4946 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
4947 MachineInstr *FpExtSrc;
4948 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
4949 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
4950 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
4951 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
4952 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4953 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
4954 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
4955 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
4956 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
4961 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
4962 // Note: Commutes FADD operands.
4963 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
4964 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
4965 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
4966 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
4967 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4968 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
4969 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
4970 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
4971 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
4979 bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
4980 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4981 assert(MI.getOpcode() == TargetOpcode::G_FADD);
4983 bool AllowFusionGlobally, HasFMAD, Aggressive;
4984 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
4987 Register Op1 = MI.getOperand(1).getReg();
4988 Register Op2 = MI.getOperand(2).getReg();
4989 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
4990 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
4991 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4993 unsigned PreferredFusedOpcode =
4994 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
4996 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
4997 // prefer to fold the multiply with fewer uses.
4998 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
4999 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5000 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5001 std::swap(LHS, RHS);
5004 MachineInstr *FMA = nullptr;
5006 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5007 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5008 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5009 TargetOpcode::G_FMUL) &&
5010 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5011 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5015 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5016 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5017 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5018 TargetOpcode::G_FMUL) &&
5019 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5020 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5026 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5027 Register X = FMA->getOperand(1).getReg();
5028 Register Y = FMA->getOperand(2).getReg();
5029 Register U = FMulMI->getOperand(1).getReg();
5030 Register V = FMulMI->getOperand(2).getReg();
5032 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5033 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5034 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5035 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5044 bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
5045 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5046 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5048 bool AllowFusionGlobally, HasFMAD, Aggressive;
5049 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5055 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5056 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5057 Register Op1 = MI.getOperand(1).getReg();
5058 Register Op2 = MI.getOperand(2).getReg();
5059 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
5060 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
5062 unsigned PreferredFusedOpcode =
5063 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5065 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5066 // prefer to fold the multiply with fewer uses.
5067 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5068 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5069 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5070 std::swap(LHS, RHS);
5073 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5074 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5075 Register Y, MachineIRBuilder &B) {
5076 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5077 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5079 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5081 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5085 MachineInstr *FMulMI, *FMAMI;
5086 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5087 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5088 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5089 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5090 m_GFPExt(m_MInstr(FMulMI))) &&
5091 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5092 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5093 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5094 MatchInfo = [=](MachineIRBuilder &B) {
5095 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5096 FMulMI->getOperand(2).getReg(), RHS.Reg,
5097 LHS.MI->getOperand(1).getReg(),
5098 LHS.MI->getOperand(2).getReg(), B);
5103 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5104 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5105 // FIXME: This turns two single-precision and one double-precision
5106 // operation into two double-precision operations, which might not be
5107 // interesting for all targets, especially GPUs.
5108 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5109 FMAMI->getOpcode() == PreferredFusedOpcode) {
5110 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5111 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5112 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5113 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5114 MatchInfo = [=](MachineIRBuilder &B) {
5115 Register X = FMAMI->getOperand(1).getReg();
5116 Register Y = FMAMI->getOperand(2).getReg();
5117 X = B.buildFPExt(DstType, X).getReg(0);
5118 Y = B.buildFPExt(DstType, Y).getReg(0);
5119 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5120 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5127 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5128 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5129 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5130 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5131 m_GFPExt(m_MInstr(FMulMI))) &&
5132 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5133 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5134 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5135 MatchInfo = [=](MachineIRBuilder &B) {
5136 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5137 FMulMI->getOperand(2).getReg(), LHS.Reg,
5138 RHS.MI->getOperand(1).getReg(),
5139 RHS.MI->getOperand(2).getReg(), B);
5144 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5145 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5146 // FIXME: This turns two single-precision and one double-precision
5147 // operation into two double-precision operations, which might not be
5148 // interesting for all targets, especially GPUs.
5149 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5150 FMAMI->getOpcode() == PreferredFusedOpcode) {
5151 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5152 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5153 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5154 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5155 MatchInfo = [=](MachineIRBuilder &B) {
5156 Register X = FMAMI->getOperand(1).getReg();
5157 Register Y = FMAMI->getOperand(2).getReg();
5158 X = B.buildFPExt(DstType, X).getReg(0);
5159 Y = B.buildFPExt(DstType, Y).getReg(0);
5160 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5161 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5170 bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
5171 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5172 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5174 bool AllowFusionGlobally, HasFMAD, Aggressive;
5175 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5178 Register Op1 = MI.getOperand(1).getReg();
5179 Register Op2 = MI.getOperand(2).getReg();
5180 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
5181 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
5182 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5184 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5185 // prefer to fold the multiply with fewer uses.
5186 int FirstMulHasFewerUses = true;
5187 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5188 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5189 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5190 FirstMulHasFewerUses = false;
5192 unsigned PreferredFusedOpcode =
5193 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5195 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5196 if (FirstMulHasFewerUses &&
5197 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5198 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5199 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5200 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5201 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5202 {LHS.MI->getOperand(1).getReg(),
5203 LHS.MI->getOperand(2).getReg(), NegZ});
5207 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5208 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5209 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5210 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5212 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5213 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5214 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5222 bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
5223 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5224 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5226 bool AllowFusionGlobally, HasFMAD, Aggressive;
5227 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5230 Register LHSReg = MI.getOperand(1).getReg();
5231 Register RHSReg = MI.getOperand(2).getReg();
5232 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5234 unsigned PreferredFusedOpcode =
5235 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5237 MachineInstr *FMulMI;
5238 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5239 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5240 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
5241 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5242 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5243 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5245 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5246 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5247 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5248 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
5253 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5254 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5255 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
5256 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5257 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5258 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5259 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5260 {FMulMI->getOperand(1).getReg(),
5261 FMulMI->getOperand(2).getReg(), LHSReg});
5269 bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
5270 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5271 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5273 bool AllowFusionGlobally, HasFMAD, Aggressive;
5274 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5277 Register LHSReg = MI.getOperand(1).getReg();
5278 Register RHSReg = MI.getOperand(2).getReg();
5279 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5281 unsigned PreferredFusedOpcode =
5282 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5284 MachineInstr *FMulMI;
5285 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5286 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5287 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5288 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
5289 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5291 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5293 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5294 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5295 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5296 {FpExtX, FpExtY, NegZ});
5301 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5302 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5303 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5304 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
5305 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5307 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5308 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
5310 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5311 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5312 {NegY, FpExtZ, LHSReg});
5320 bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
5321 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5322 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5324 bool AllowFusionGlobally, HasFMAD, Aggressive;
5325 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5328 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5329 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5330 Register LHSReg = MI.getOperand(1).getReg();
5331 Register RHSReg = MI.getOperand(2).getReg();
5333 unsigned PreferredFusedOpcode =
5334 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5336 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
5337 MachineIRBuilder &B) {
5338 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
5339 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
5340 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
5343 MachineInstr *FMulMI;
5344 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
5345 // (fneg (fma (fpext x), (fpext y), z))
5346 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
5347 // (fneg (fma (fpext x), (fpext y), z))
5348 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
5349 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
5350 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5351 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
5352 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5353 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5354 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
5355 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
5356 FMulMI->getOperand(2).getReg(), RHSReg, B);
5357 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
5362 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5363 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5364 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
5365 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
5366 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5367 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
5368 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5369 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5370 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
5371 FMulMI->getOperand(2).getReg(), LHSReg, B);
5379 bool CombinerHelper::tryCombine(MachineInstr &MI) {
5380 if (tryCombineCopy(MI))
5382 if (tryCombineExtendingLoads(MI))
5384 if (tryCombineIndexedLoadStore(MI))