1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
15 #include "llvm/CodeGen/GlobalISel/Utils.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/Support/MathExtras.h"
23 #include "llvm/Target/TargetMachine.h"
25 #define DEBUG_TYPE "gi-combiner"
28 using namespace MIPatternMatch;
30 // Option to allow testing of the combiner while no targets know about indexed
33 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
34 cl::desc("Force all indexed operations to be "
35 "legal for the GlobalISel combiner"));
38 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
39 MachineIRBuilder &B, GISelKnownBits *KB,
40 MachineDominatorTree *MDT,
41 const LegalizerInfo *LI)
42 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
43 KB(KB), MDT(MDT), LI(LI) {
47 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
48 Register ToReg) const {
49 Observer.changingAllUsesOfReg(MRI, FromReg);
51 if (MRI.constrainRegAttrs(ToReg, FromReg))
52 MRI.replaceRegWith(FromReg, ToReg);
54 Builder.buildCopy(ToReg, FromReg);
56 Observer.finishedChangingAllUsesOfReg();
59 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
60 MachineOperand &FromRegOp,
61 Register ToReg) const {
62 assert(FromRegOp.getParent() && "Expected an operand in an MI");
63 Observer.changingInstr(*FromRegOp.getParent());
65 FromRegOp.setReg(ToReg);
67 Observer.changedInstr(*FromRegOp.getParent());
70 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
71 if (matchCombineCopy(MI)) {
77 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
78 if (MI.getOpcode() != TargetOpcode::COPY)
80 Register DstReg = MI.getOperand(0).getReg();
81 Register SrcReg = MI.getOperand(1).getReg();
82 return canReplaceReg(DstReg, SrcReg, MRI);
84 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
85 Register DstReg = MI.getOperand(0).getReg();
86 Register SrcReg = MI.getOperand(1).getReg();
88 replaceRegWith(MRI, DstReg, SrcReg);
91 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
93 SmallVector<Register, 4> Ops;
94 if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
95 applyCombineConcatVectors(MI, IsUndef, Ops);
101 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
102 SmallVectorImpl<Register> &Ops) {
103 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
104 "Invalid instruction");
106 MachineInstr *Undef = nullptr;
108 // Walk over all the operands of concat vectors and check if they are
109 // build_vector themselves or undef.
110 // Then collect their operands in Ops.
111 for (const MachineOperand &MO : MI.uses()) {
112 Register Reg = MO.getReg();
113 MachineInstr *Def = MRI.getVRegDef(Reg);
114 assert(Def && "Operand not defined");
115 switch (Def->getOpcode()) {
116 case TargetOpcode::G_BUILD_VECTOR:
118 // Remember the operands of the build_vector to fold
119 // them into the yet-to-build flattened concat vectors.
120 for (const MachineOperand &BuildVecMO : Def->uses())
121 Ops.push_back(BuildVecMO.getReg());
123 case TargetOpcode::G_IMPLICIT_DEF: {
124 LLT OpType = MRI.getType(Reg);
125 // Keep one undef value for all the undef operands.
127 Builder.setInsertPt(*MI.getParent(), MI);
128 Undef = Builder.buildUndef(OpType.getScalarType());
130 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
131 OpType.getScalarType() &&
132 "All undefs should have the same type");
133 // Break the undef vector in as many scalar elements as needed
134 // for the flattening.
135 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
136 EltIdx != EltEnd; ++EltIdx)
137 Ops.push_back(Undef->getOperand(0).getReg());
146 void CombinerHelper::applyCombineConcatVectors(
147 MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
148 // We determined that the concat_vectors can be flatten.
149 // Generate the flattened build_vector.
150 Register DstReg = MI.getOperand(0).getReg();
151 Builder.setInsertPt(*MI.getParent(), MI);
152 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
154 // Note: IsUndef is sort of redundant. We could have determine it by
155 // checking that at all Ops are undef. Alternatively, we could have
156 // generate a build_vector of undefs and rely on another combine to
157 // clean that up. For now, given we already gather this information
158 // in tryCombineConcatVectors, just save compile time and issue the
161 Builder.buildUndef(NewDstReg);
163 Builder.buildBuildVector(NewDstReg, Ops);
164 MI.eraseFromParent();
165 replaceRegWith(MRI, DstReg, NewDstReg);
168 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
169 SmallVector<Register, 4> Ops;
170 if (matchCombineShuffleVector(MI, Ops)) {
171 applyCombineShuffleVector(MI, Ops);
177 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
178 SmallVectorImpl<Register> &Ops) {
179 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
180 "Invalid instruction kind");
181 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
182 Register Src1 = MI.getOperand(1).getReg();
183 LLT SrcType = MRI.getType(Src1);
184 // As bizarre as it may look, shuffle vector can actually produce
185 // scalar! This is because at the IR level a <1 x ty> shuffle
186 // vector is perfectly valid.
187 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
188 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
190 // If the resulting vector is smaller than the size of the source
191 // vectors being concatenated, we won't be able to replace the
192 // shuffle vector into a concat_vectors.
194 // Note: We may still be able to produce a concat_vectors fed by
195 // extract_vector_elt and so on. It is less clear that would
196 // be better though, so don't bother for now.
198 // If the destination is a scalar, the size of the sources doesn't
199 // matter. we will lower the shuffle to a plain copy. This will
200 // work only if the source and destination have the same size. But
201 // that's covered by the next condition.
203 // TODO: If the size between the source and destination don't match
204 // we could still emit an extract vector element in that case.
205 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
208 // Check that the shuffle mask can be broken evenly between the
209 // different sources.
210 if (DstNumElts % SrcNumElts != 0)
213 // Mask length is a multiple of the source vector length.
214 // Check if the shuffle is some kind of concatenation of the input
216 unsigned NumConcat = DstNumElts / SrcNumElts;
217 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
218 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
219 for (unsigned i = 0; i != DstNumElts; ++i) {
224 // Ensure the indices in each SrcType sized piece are sequential and that
225 // the same source is used for the whole piece.
226 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
227 (ConcatSrcs[i / SrcNumElts] >= 0 &&
228 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
230 // Remember which source this index came from.
231 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
234 // The shuffle is concatenating multiple vectors together.
235 // Collect the different operands for that.
237 Register Src2 = MI.getOperand(2).getReg();
238 for (auto Src : ConcatSrcs) {
241 Builder.setInsertPt(*MI.getParent(), MI);
242 UndefReg = Builder.buildUndef(SrcType).getReg(0);
244 Ops.push_back(UndefReg);
253 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
254 const ArrayRef<Register> Ops) {
255 Register DstReg = MI.getOperand(0).getReg();
256 Builder.setInsertPt(*MI.getParent(), MI);
257 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
260 Builder.buildCopy(NewDstReg, Ops[0]);
262 Builder.buildMerge(NewDstReg, Ops);
264 MI.eraseFromParent();
265 replaceRegWith(MRI, DstReg, NewDstReg);
270 /// Select a preference between two uses. CurrentUse is the current preference
271 /// while *ForCandidate is attributes of the candidate under consideration.
272 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
273 const LLT TyForCandidate,
274 unsigned OpcodeForCandidate,
275 MachineInstr *MIForCandidate) {
276 if (!CurrentUse.Ty.isValid()) {
277 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
278 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
279 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
283 // We permit the extend to hoist through basic blocks but this is only
284 // sensible if the target has extending loads. If you end up lowering back
285 // into a load and extend during the legalizer then the end result is
286 // hoisting the extend up to the load.
288 // Prefer defined extensions to undefined extensions as these are more
289 // likely to reduce the number of instructions.
290 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
291 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
293 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
294 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
295 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
297 // Prefer sign extensions to zero extensions as sign-extensions tend to be
299 if (CurrentUse.Ty == TyForCandidate) {
300 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
301 OpcodeForCandidate == TargetOpcode::G_ZEXT)
303 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
304 OpcodeForCandidate == TargetOpcode::G_SEXT)
305 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
308 // This is potentially target specific. We've chosen the largest type
309 // because G_TRUNC is usually free. One potential catch with this is that
310 // some targets have a reduced number of larger registers than smaller
311 // registers and this choice potentially increases the live-range for the
313 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
314 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
319 /// Find a suitable place to insert some instructions and insert them. This
320 /// function accounts for special cases like inserting before a PHI node.
321 /// The current strategy for inserting before PHI's is to duplicate the
322 /// instructions for each predecessor. However, while that's ok for G_TRUNC
323 /// on most targets since it generally requires no code, other targets/cases may
324 /// want to try harder to find a dominating block.
325 static void InsertInsnsWithoutSideEffectsBeforeUse(
326 MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
327 std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
328 MachineOperand &UseMO)>
330 MachineInstr &UseMI = *UseMO.getParent();
332 MachineBasicBlock *InsertBB = UseMI.getParent();
334 // If the use is a PHI then we want the predecessor block instead.
336 MachineOperand *PredBB = std::next(&UseMO);
337 InsertBB = PredBB->getMBB();
340 // If the block is the same block as the def then we want to insert just after
341 // the def instead of at the start of the block.
342 if (InsertBB == DefMI.getParent()) {
343 MachineBasicBlock::iterator InsertPt = &DefMI;
344 Inserter(InsertBB, std::next(InsertPt), UseMO);
348 // Otherwise we want the start of the BB
349 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
351 } // end anonymous namespace
353 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
354 PreferredTuple Preferred;
355 if (matchCombineExtendingLoads(MI, Preferred)) {
356 applyCombineExtendingLoads(MI, Preferred);
362 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
363 PreferredTuple &Preferred) {
364 // We match the loads and follow the uses to the extend instead of matching
365 // the extends and following the def to the load. This is because the load
366 // must remain in the same position for correctness (unless we also add code
367 // to find a safe place to sink it) whereas the extend is freely movable.
368 // It also prevents us from duplicating the load for the volatile case or just
371 if (MI.getOpcode() != TargetOpcode::G_LOAD &&
372 MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
373 MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
376 auto &LoadValue = MI.getOperand(0);
377 assert(LoadValue.isReg() && "Result wasn't a register?");
379 LLT LoadValueTy = MRI.getType(LoadValue.getReg());
380 if (!LoadValueTy.isScalar())
383 // Most architectures are going to legalize <s8 loads into at least a 1 byte
384 // load, and the MMOs can only describe memory accesses in multiples of bytes.
385 // If we try to perform extload combining on those, we can end up with
386 // %a(s8) = extload %ptr (load 1 byte from %ptr)
387 // ... which is an illegal extload instruction.
388 if (LoadValueTy.getSizeInBits() < 8)
391 // For non power-of-2 types, they will very likely be legalized into multiple
392 // loads. Don't bother trying to match them into extending loads.
393 if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
396 // Find the preferred type aside from the any-extends (unless it's the only
397 // one) and non-extending ops. We'll emit an extending load to that type and
398 // and emit a variant of (extend (trunc X)) for the others according to the
399 // relative type sizes. At the same time, pick an extend to use based on the
400 // extend involved in the chosen type.
401 unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
402 ? TargetOpcode::G_ANYEXT
403 : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
404 ? TargetOpcode::G_SEXT
405 : TargetOpcode::G_ZEXT;
406 Preferred = {LLT(), PreferredOpcode, nullptr};
407 for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
408 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
409 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
410 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
411 // Check for legality.
413 LegalityQuery::MemDesc MMDesc;
414 const auto &MMO = **MI.memoperands_begin();
415 MMDesc.SizeInBits = MMO.getSizeInBits();
416 MMDesc.AlignInBits = MMO.getAlign().value() * 8;
417 MMDesc.Ordering = MMO.getOrdering();
418 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
419 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
420 if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
421 LegalizeActions::Legal)
424 Preferred = ChoosePreferredUse(Preferred,
425 MRI.getType(UseMI.getOperand(0).getReg()),
426 UseMI.getOpcode(), &UseMI);
430 // There were no extends
433 // It should be impossible to chose an extend without selecting a different
434 // type since by definition the result of an extend is larger.
435 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
437 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
441 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
442 PreferredTuple &Preferred) {
443 // Rewrite the load to the chosen extending load.
444 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
446 // Inserter to insert a truncate back to the original type at a given point
447 // with some basic CSE to limit truncate duplication to one per BB.
448 DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
449 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
450 MachineBasicBlock::iterator InsertBefore,
451 MachineOperand &UseMO) {
452 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
453 if (PreviouslyEmitted) {
454 Observer.changingInstr(*UseMO.getParent());
455 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
456 Observer.changedInstr(*UseMO.getParent());
460 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
461 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
462 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
463 EmittedInsns[InsertIntoBB] = NewMI;
464 replaceRegOpWith(MRI, UseMO, NewDstReg);
467 Observer.changingInstr(MI);
469 Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
470 ? TargetOpcode::G_SEXTLOAD
471 : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
472 ? TargetOpcode::G_ZEXTLOAD
473 : TargetOpcode::G_LOAD));
475 // Rewrite all the uses to fix up the types.
476 auto &LoadValue = MI.getOperand(0);
477 SmallVector<MachineOperand *, 4> Uses;
478 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
479 Uses.push_back(&UseMO);
481 for (auto *UseMO : Uses) {
482 MachineInstr *UseMI = UseMO->getParent();
484 // If the extend is compatible with the preferred extend then we should fix
485 // up the type and extend so that it uses the preferred use.
486 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
487 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
488 Register UseDstReg = UseMI->getOperand(0).getReg();
489 MachineOperand &UseSrcMO = UseMI->getOperand(1);
490 const LLT UseDstTy = MRI.getType(UseDstReg);
491 if (UseDstReg != ChosenDstReg) {
492 if (Preferred.Ty == UseDstTy) {
493 // If the use has the same type as the preferred use, then merge
494 // the vregs and erase the extend. For example:
495 // %1:_(s8) = G_LOAD ...
496 // %2:_(s32) = G_SEXT %1(s8)
497 // %3:_(s32) = G_ANYEXT %1(s8)
500 // %2:_(s32) = G_SEXTLOAD ...
502 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
503 Observer.erasingInstr(*UseMO->getParent());
504 UseMO->getParent()->eraseFromParent();
505 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
506 // If the preferred size is smaller, then keep the extend but extend
507 // from the result of the extending load. For example:
508 // %1:_(s8) = G_LOAD ...
509 // %2:_(s32) = G_SEXT %1(s8)
510 // %3:_(s64) = G_ANYEXT %1(s8)
513 // %2:_(s32) = G_SEXTLOAD ...
514 // %3:_(s64) = G_ANYEXT %2:_(s32)
516 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
518 // If the preferred size is large, then insert a truncate. For
520 // %1:_(s8) = G_LOAD ...
521 // %2:_(s64) = G_SEXT %1(s8)
522 // %3:_(s32) = G_ZEXT %1(s8)
525 // %2:_(s64) = G_SEXTLOAD ...
526 // %4:_(s8) = G_TRUNC %2:_(s32)
527 // %3:_(s64) = G_ZEXT %2:_(s8)
529 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
534 // The use is (one of) the uses of the preferred use we chose earlier.
535 // We're going to update the load to def this value later so just erase
537 Observer.erasingInstr(*UseMO->getParent());
538 UseMO->getParent()->eraseFromParent();
542 // The use isn't an extend. Truncate back to the type we originally loaded.
543 // This is free on many targets.
544 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
547 MI.getOperand(0).setReg(ChosenDstReg);
548 Observer.changedInstr(MI);
551 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
552 const MachineInstr &UseMI) {
553 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
554 "shouldn't consider debug uses");
555 assert(DefMI.getParent() == UseMI.getParent());
556 if (&DefMI == &UseMI)
559 // Loop through the basic block until we find one of the instructions.
560 MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
561 for (; &*I != &DefMI && &*I != &UseMI; ++I)
562 return &*I == &DefMI;
564 llvm_unreachable("Block must contain instructions");
567 bool CombinerHelper::dominates(const MachineInstr &DefMI,
568 const MachineInstr &UseMI) {
569 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
570 "shouldn't consider debug uses");
572 return MDT->dominates(&DefMI, &UseMI);
573 else if (DefMI.getParent() != UseMI.getParent())
576 return isPredecessor(DefMI, UseMI);
579 bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
580 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
581 Register SrcReg = MI.getOperand(1).getReg();
582 unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
583 unsigned NumSextBits =
584 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
585 MI.getOperand(2).getImm();
586 return SrcSignBits >= NumSextBits;
589 bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
590 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
591 MachineIRBuilder MIB(MI);
592 MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
593 MI.eraseFromParent();
597 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
598 Register &Base, Register &Offset) {
599 auto &MF = *MI.getParent()->getParent();
600 const auto &TLI = *MF.getSubtarget().getTargetLowering();
603 unsigned Opcode = MI.getOpcode();
604 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
605 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
608 Base = MI.getOperand(1).getReg();
609 MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
610 if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
613 LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
615 for (auto &Use : MRI.use_nodbg_instructions(Base)) {
616 if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
619 Offset = Use.getOperand(2).getReg();
620 if (!ForceLegalIndexing &&
621 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
622 LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
627 // Make sure the offset calculation is before the potentially indexed op.
628 // FIXME: we really care about dependency here. The offset calculation might
630 MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
631 if (!OffsetDef || !dominates(*OffsetDef, MI)) {
632 LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
637 // FIXME: check whether all uses of Base are load/store with foldable
638 // addressing modes. If so, using the normal addr-modes is better than
639 // forming an indexed one.
641 bool MemOpDominatesAddrUses = true;
642 for (auto &PtrAddUse :
643 MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
644 if (!dominates(MI, PtrAddUse)) {
645 MemOpDominatesAddrUses = false;
650 if (!MemOpDominatesAddrUses) {
652 dbgs() << " Ignoring candidate as memop does not dominate uses: "
657 LLVM_DEBUG(dbgs() << " Found match: " << Use);
658 Addr = Use.getOperand(0).getReg();
665 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
666 Register &Base, Register &Offset) {
667 auto &MF = *MI.getParent()->getParent();
668 const auto &TLI = *MF.getSubtarget().getTargetLowering();
671 unsigned Opcode = MI.getOpcode();
672 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
673 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
676 Addr = MI.getOperand(1).getReg();
677 MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
678 if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
681 Base = AddrDef->getOperand(1).getReg();
682 Offset = AddrDef->getOperand(2).getReg();
684 LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
686 if (!ForceLegalIndexing &&
687 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
688 LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
692 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
693 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
694 LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
698 if (MI.getOpcode() == TargetOpcode::G_STORE) {
699 // Would require a copy.
700 if (Base == MI.getOperand(0).getReg()) {
701 LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
705 // We're expecting one use of Addr in MI, but it could also be the
706 // value stored, which isn't actually dominated by the instruction.
707 if (MI.getOperand(0).getReg() == Addr) {
708 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
713 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
714 // That might allow us to end base's liveness here by adjusting the constant.
716 for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
717 if (!dominates(MI, UseMI)) {
718 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
726 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
727 IndexedLoadStoreMatchInfo MatchInfo;
728 if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
729 applyCombineIndexedLoadStore(MI, MatchInfo);
735 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
736 unsigned Opcode = MI.getOpcode();
737 if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
738 Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
741 MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
743 if (!MatchInfo.IsPre &&
744 !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
751 void CombinerHelper::applyCombineIndexedLoadStore(
752 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
753 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
754 MachineIRBuilder MIRBuilder(MI);
755 unsigned Opcode = MI.getOpcode();
756 bool IsStore = Opcode == TargetOpcode::G_STORE;
759 case TargetOpcode::G_LOAD:
760 NewOpcode = TargetOpcode::G_INDEXED_LOAD;
762 case TargetOpcode::G_SEXTLOAD:
763 NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
765 case TargetOpcode::G_ZEXTLOAD:
766 NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
768 case TargetOpcode::G_STORE:
769 NewOpcode = TargetOpcode::G_INDEXED_STORE;
772 llvm_unreachable("Unknown load/store opcode");
775 auto MIB = MIRBuilder.buildInstr(NewOpcode);
777 MIB.addDef(MatchInfo.Addr);
778 MIB.addUse(MI.getOperand(0).getReg());
780 MIB.addDef(MI.getOperand(0).getReg());
781 MIB.addDef(MatchInfo.Addr);
784 MIB.addUse(MatchInfo.Base);
785 MIB.addUse(MatchInfo.Offset);
786 MIB.addImm(MatchInfo.IsPre);
787 MI.eraseFromParent();
788 AddrDef.eraseFromParent();
790 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
793 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
794 if (MI.getOpcode() != TargetOpcode::G_BR)
797 // Try to match the following:
799 // %c(s32) = G_ICMP pred, %a, %b
800 // %c1(s1) = G_TRUNC %c(s32)
801 // G_BRCOND %c1, %bb2
807 // The above pattern does not have a fall through to the successor bb2, always
808 // resulting in a branch no matter which path is taken. Here we try to find
809 // and replace that pattern with conditional branch to bb3 and otherwise
810 // fallthrough to bb2.
812 MachineBasicBlock *MBB = MI.getParent();
813 MachineBasicBlock::iterator BrIt(MI);
814 if (BrIt == MBB->begin())
816 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
818 MachineInstr *BrCond = &*std::prev(BrIt);
819 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
822 // Check that the next block is the conditional branch target.
823 if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
826 MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
827 if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
828 !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
833 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
834 if (!matchElideBrByInvertingCond(MI))
836 applyElideBrByInvertingCond(MI);
840 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
841 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
842 MachineBasicBlock::iterator BrIt(MI);
843 MachineInstr *BrCond = &*std::prev(BrIt);
844 MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
846 CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
847 (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
849 // Invert the G_ICMP condition.
850 Observer.changingInstr(*CmpMI);
851 CmpMI->getOperand(1).setPredicate(InversePred);
852 Observer.changedInstr(*CmpMI);
854 // Change the conditional branch target.
855 Observer.changingInstr(*BrCond);
856 BrCond->getOperand(1).setMBB(BrTarget);
857 Observer.changedInstr(*BrCond);
858 MI.eraseFromParent();
861 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
862 // On Darwin, -Os means optimize for size without hurting performance, so
863 // only really optimize for size when -Oz (MinSize) is used.
864 if (MF.getTarget().getTargetTriple().isOSDarwin())
865 return MF.getFunction().hasMinSize();
866 return MF.getFunction().hasOptSize();
869 // Returns a list of types to use for memory op lowering in MemOps. A partial
870 // port of findOptimalMemOpLowering in TargetLowering.
871 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
872 unsigned Limit, const MemOp &Op,
873 unsigned DstAS, unsigned SrcAS,
874 const AttributeList &FuncAttributes,
875 const TargetLowering &TLI) {
876 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
879 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
882 // Use the largest scalar type whose alignment constraints are satisfied.
883 // We only need to check DstAlign here as SrcAlign is always greater or
884 // equal to DstAlign (or zero).
885 Ty = LLT::scalar(64);
886 if (Op.isFixedDstAlign())
887 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
888 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
889 Ty = LLT::scalar(Ty.getSizeInBytes());
890 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
891 // FIXME: check for the largest legal type we can load/store to.
894 unsigned NumMemOps = 0;
895 uint64_t Size = Op.size();
897 unsigned TySize = Ty.getSizeInBytes();
898 while (TySize > Size) {
899 // For now, only use non-vector load / store's for the left-over pieces.
901 // FIXME: check for mem op safety and legality of the types. Not all of
902 // SDAGisms map cleanly to GISel concepts.
903 if (NewTy.isVector())
904 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
905 NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
906 unsigned NewTySize = NewTy.getSizeInBytes();
907 assert(NewTySize > 0 && "Could not find appropriate type");
909 // If the new LLT cannot cover all of the remaining bits, then consider
910 // issuing a (or a pair of) unaligned and overlapping load / store.
912 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
913 MVT VT = getMVTForLLT(Ty);
914 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
915 TLI.allowsMisalignedMemoryAccesses(
916 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
917 MachineMemOperand::MONone, &Fast) &&
926 if (++NumMemOps > Limit)
929 MemOps.push_back(Ty);
936 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
938 return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
939 Ty.getNumElements());
940 return IntegerType::get(C, Ty.getSizeInBits());
943 // Get a vectorized representation of the memset value operand, GISel edition.
944 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
945 MachineRegisterInfo &MRI = *MIB.getMRI();
946 unsigned NumBits = Ty.getScalarSizeInBits();
947 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
948 if (!Ty.isVector() && ValVRegAndVal) {
949 unsigned KnownVal = ValVRegAndVal->Value;
950 APInt Scalar = APInt(8, KnownVal);
951 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
952 return MIB.buildConstant(Ty, SplatVal).getReg(0);
955 // Extend the byte value to the larger type, and then multiply by a magic
956 // value 0x010101... in order to replicate it across every byte.
957 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
958 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
959 return MIB.buildConstant(Ty, 0).getReg(0);
962 LLT ExtType = Ty.getScalarType();
963 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
965 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
966 auto MagicMI = MIB.buildConstant(ExtType, Magic);
967 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
970 // For vector types create a G_BUILD_VECTOR.
972 Val = MIB.buildSplatVector(Ty, Val).getReg(0);
977 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
978 Register Val, unsigned KnownLen,
979 Align Alignment, bool IsVolatile) {
980 auto &MF = *MI.getParent()->getParent();
981 const auto &TLI = *MF.getSubtarget().getTargetLowering();
982 auto &DL = MF.getDataLayout();
983 LLVMContext &C = MF.getFunction().getContext();
985 assert(KnownLen != 0 && "Have a zero length memset length!");
987 bool DstAlignCanChange = false;
988 MachineFrameInfo &MFI = MF.getFrameInfo();
989 bool OptSize = shouldLowerMemFuncForSize(MF);
991 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
992 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
993 DstAlignCanChange = true;
995 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
996 std::vector<LLT> MemOps;
998 const auto &DstMMO = **MI.memoperands_begin();
999 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1001 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1002 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1004 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1005 MemOp::Set(KnownLen, DstAlignCanChange,
1007 /*IsZeroMemset=*/IsZeroVal,
1008 /*IsVolatile=*/IsVolatile),
1009 DstPtrInfo.getAddrSpace(), ~0u,
1010 MF.getFunction().getAttributes(), TLI))
1013 if (DstAlignCanChange) {
1014 // Get an estimate of the type from the LLT.
1015 Type *IRTy = getTypeForLLT(MemOps[0], C);
1016 Align NewAlign = DL.getABITypeAlign(IRTy);
1017 if (NewAlign > Alignment) {
1018 Alignment = NewAlign;
1019 unsigned FI = FIDef->getOperand(1).getIndex();
1020 // Give the stack frame object a larger alignment if needed.
1021 if (MFI.getObjectAlign(FI) < Alignment)
1022 MFI.setObjectAlignment(FI, Alignment);
1026 MachineIRBuilder MIB(MI);
1027 // Find the largest store and generate the bit pattern for it.
1028 LLT LargestTy = MemOps[0];
1029 for (unsigned i = 1; i < MemOps.size(); i++)
1030 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1031 LargestTy = MemOps[i];
1033 // The memset stored value is always defined as an s8, so in order to make it
1034 // work with larger store types we need to repeat the bit pattern across the
1036 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1041 // Generate the stores. For each store type in the list, we generate the
1042 // matching store of that type to the destination address.
1043 LLT PtrTy = MRI.getType(Dst);
1044 unsigned DstOff = 0;
1045 unsigned Size = KnownLen;
1046 for (unsigned I = 0; I < MemOps.size(); I++) {
1048 unsigned TySize = Ty.getSizeInBytes();
1049 if (TySize > Size) {
1050 // Issuing an unaligned load / store pair that overlaps with the previous
1051 // pair. Adjust the offset accordingly.
1052 assert(I == MemOps.size() - 1 && I != 0);
1053 DstOff -= TySize - Size;
1056 // If this store is smaller than the largest store see whether we can get
1057 // the smaller value for free with a truncate.
1058 Register Value = MemSetValue;
1059 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1060 MVT VT = getMVTForLLT(Ty);
1061 MVT LargestVT = getMVTForLLT(LargestTy);
1062 if (!LargestTy.isVector() && !Ty.isVector() &&
1063 TLI.isTruncateFree(LargestVT, VT))
1064 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1066 Value = getMemsetValue(Val, Ty, MIB);
1072 MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1077 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1078 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1081 MIB.buildStore(Value, Ptr, *StoreMMO);
1082 DstOff += Ty.getSizeInBytes();
1086 MI.eraseFromParent();
1090 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1091 Register Src, unsigned KnownLen,
1092 Align DstAlign, Align SrcAlign,
1094 auto &MF = *MI.getParent()->getParent();
1095 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1096 auto &DL = MF.getDataLayout();
1097 LLVMContext &C = MF.getFunction().getContext();
1099 assert(KnownLen != 0 && "Have a zero length memcpy length!");
1101 bool DstAlignCanChange = false;
1102 MachineFrameInfo &MFI = MF.getFrameInfo();
1103 bool OptSize = shouldLowerMemFuncForSize(MF);
1104 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1106 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1107 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1108 DstAlignCanChange = true;
1110 // FIXME: infer better src pointer alignment like SelectionDAG does here.
1111 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1112 // if the memcpy is in a tail call position.
1114 unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1115 std::vector<LLT> MemOps;
1117 const auto &DstMMO = **MI.memoperands_begin();
1118 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1119 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1120 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1122 if (!findGISelOptimalMemOpLowering(
1124 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1126 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1127 MF.getFunction().getAttributes(), TLI))
1130 if (DstAlignCanChange) {
1131 // Get an estimate of the type from the LLT.
1132 Type *IRTy = getTypeForLLT(MemOps[0], C);
1133 Align NewAlign = DL.getABITypeAlign(IRTy);
1135 // Don't promote to an alignment that would require dynamic stack
1137 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1138 if (!TRI->needsStackRealignment(MF))
1139 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1140 NewAlign = NewAlign / 2;
1142 if (NewAlign > Alignment) {
1143 Alignment = NewAlign;
1144 unsigned FI = FIDef->getOperand(1).getIndex();
1145 // Give the stack frame object a larger alignment if needed.
1146 if (MFI.getObjectAlign(FI) < Alignment)
1147 MFI.setObjectAlignment(FI, Alignment);
1151 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1153 MachineIRBuilder MIB(MI);
1154 // Now we need to emit a pair of load and stores for each of the types we've
1155 // collected. I.e. for each type, generate a load from the source pointer of
1156 // that type width, and then generate a corresponding store to the dest buffer
1157 // of that value loaded. This can result in a sequence of loads and stores
1158 // mixed types, depending on what the target specifies as good types to use.
1159 unsigned CurrOffset = 0;
1160 LLT PtrTy = MRI.getType(Src);
1161 unsigned Size = KnownLen;
1162 for (auto CopyTy : MemOps) {
1163 // Issuing an unaligned load / store pair that overlaps with the previous
1164 // pair. Adjust the offset accordingly.
1165 if (CopyTy.getSizeInBytes() > Size)
1166 CurrOffset -= CopyTy.getSizeInBytes() - Size;
1168 // Construct MMOs for the accesses.
1170 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1172 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1175 Register LoadPtr = Src;
1177 if (CurrOffset != 0) {
1178 Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1180 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1182 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1184 // Create the store.
1186 CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1187 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1188 CurrOffset += CopyTy.getSizeInBytes();
1189 Size -= CopyTy.getSizeInBytes();
1192 MI.eraseFromParent();
1196 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1197 Register Src, unsigned KnownLen,
1198 Align DstAlign, Align SrcAlign,
1200 auto &MF = *MI.getParent()->getParent();
1201 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1202 auto &DL = MF.getDataLayout();
1203 LLVMContext &C = MF.getFunction().getContext();
1205 assert(KnownLen != 0 && "Have a zero length memmove length!");
1207 bool DstAlignCanChange = false;
1208 MachineFrameInfo &MFI = MF.getFrameInfo();
1209 bool OptSize = shouldLowerMemFuncForSize(MF);
1210 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1212 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1213 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1214 DstAlignCanChange = true;
1216 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1217 std::vector<LLT> MemOps;
1219 const auto &DstMMO = **MI.memoperands_begin();
1220 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1221 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1222 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1224 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1225 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1227 if (!findGISelOptimalMemOpLowering(
1229 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1230 /*IsVolatile*/ true),
1231 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1232 MF.getFunction().getAttributes(), TLI))
1235 if (DstAlignCanChange) {
1236 // Get an estimate of the type from the LLT.
1237 Type *IRTy = getTypeForLLT(MemOps[0], C);
1238 Align NewAlign = DL.getABITypeAlign(IRTy);
1240 // Don't promote to an alignment that would require dynamic stack
1242 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1243 if (!TRI->needsStackRealignment(MF))
1244 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1245 NewAlign = NewAlign / 2;
1247 if (NewAlign > Alignment) {
1248 Alignment = NewAlign;
1249 unsigned FI = FIDef->getOperand(1).getIndex();
1250 // Give the stack frame object a larger alignment if needed.
1251 if (MFI.getObjectAlign(FI) < Alignment)
1252 MFI.setObjectAlignment(FI, Alignment);
1256 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1258 MachineIRBuilder MIB(MI);
1259 // Memmove requires that we perform the loads first before issuing the stores.
1260 // Apart from that, this loop is pretty much doing the same thing as the
1261 // memcpy codegen function.
1262 unsigned CurrOffset = 0;
1263 LLT PtrTy = MRI.getType(Src);
1264 SmallVector<Register, 16> LoadVals;
1265 for (auto CopyTy : MemOps) {
1266 // Construct MMO for the load.
1268 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1271 Register LoadPtr = Src;
1272 if (CurrOffset != 0) {
1274 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1275 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1277 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1278 CurrOffset += CopyTy.getSizeInBytes();
1282 for (unsigned I = 0; I < MemOps.size(); ++I) {
1283 LLT CopyTy = MemOps[I];
1284 // Now store the values loaded.
1286 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1288 Register StorePtr = Dst;
1289 if (CurrOffset != 0) {
1291 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1292 StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1294 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1295 CurrOffset += CopyTy.getSizeInBytes();
1297 MI.eraseFromParent();
1301 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1302 // This combine is fairly complex so it's not written with a separate
1303 // matcher function.
1304 assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1305 Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1306 assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1307 ID == Intrinsic::memset) &&
1308 "Expected a memcpy like intrinsic");
1310 auto MMOIt = MI.memoperands_begin();
1311 const MachineMemOperand *MemOp = *MMOIt;
1312 bool IsVolatile = MemOp->isVolatile();
1313 // Don't try to optimize volatile.
1317 Align DstAlign = MemOp->getBaseAlign();
1319 Register Dst = MI.getOperand(1).getReg();
1320 Register Src = MI.getOperand(2).getReg();
1321 Register Len = MI.getOperand(3).getReg();
1323 if (ID != Intrinsic::memset) {
1324 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1326 SrcAlign = MemOp->getBaseAlign();
1329 // See if this is a constant length copy
1330 auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1332 return false; // Leave it to the legalizer to lower it to a libcall.
1333 unsigned KnownLen = LenVRegAndVal->Value;
1335 if (KnownLen == 0) {
1336 MI.eraseFromParent();
1340 if (MaxLen && KnownLen > MaxLen)
1343 if (ID == Intrinsic::memcpy)
1344 return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1345 if (ID == Intrinsic::memmove)
1346 return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1347 if (ID == Intrinsic::memset)
1348 return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1352 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1353 PtrAddChain &MatchInfo) {
1354 // We're trying to match the following pattern:
1355 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1356 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1358 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1360 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1363 Register Add2 = MI.getOperand(1).getReg();
1364 Register Imm1 = MI.getOperand(2).getReg();
1365 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1369 MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1370 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1373 Register Base = Add2Def->getOperand(1).getReg();
1374 Register Imm2 = Add2Def->getOperand(2).getReg();
1375 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1379 // Pass the combined immediate to the apply function.
1380 MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1381 MatchInfo.Base = Base;
1385 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1386 PtrAddChain &MatchInfo) {
1387 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1388 MachineIRBuilder MIB(MI);
1389 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1390 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1391 Observer.changingInstr(MI);
1392 MI.getOperand(1).setReg(MatchInfo.Base);
1393 MI.getOperand(2).setReg(NewOffset.getReg(0));
1394 Observer.changedInstr(MI);
1398 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1399 unsigned &ShiftVal) {
1400 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1402 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1403 if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1405 ShiftVal = Log2_64(MaybeImmVal->Value);
1409 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1410 unsigned &ShiftVal) {
1411 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1412 MachineIRBuilder MIB(MI);
1413 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1414 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1415 Observer.changingInstr(MI);
1416 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1417 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1418 Observer.changedInstr(MI);
1422 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1423 unsigned TargetShiftSize,
1424 unsigned &ShiftVal) {
1425 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1426 MI.getOpcode() == TargetOpcode::G_LSHR ||
1427 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1429 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1430 if (Ty.isVector()) // TODO:
1433 // Don't narrow further than the requested size.
1434 unsigned Size = Ty.getSizeInBits();
1435 if (Size <= TargetShiftSize)
1439 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1443 ShiftVal = MaybeImmVal->Value;
1444 return ShiftVal >= Size / 2 && ShiftVal < Size;
1447 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1448 const unsigned &ShiftVal) {
1449 Register DstReg = MI.getOperand(0).getReg();
1450 Register SrcReg = MI.getOperand(1).getReg();
1451 LLT Ty = MRI.getType(SrcReg);
1452 unsigned Size = Ty.getSizeInBits();
1453 unsigned HalfSize = Size / 2;
1454 assert(ShiftVal >= HalfSize);
1456 LLT HalfTy = LLT::scalar(HalfSize);
1458 Builder.setInstr(MI);
1459 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1460 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1462 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1463 Register Narrowed = Unmerge.getReg(1);
1465 // dst = G_LSHR s64:x, C for C >= 32
1467 // lo, hi = G_UNMERGE_VALUES x
1468 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1470 if (NarrowShiftAmt != 0) {
1471 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1472 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1475 auto Zero = Builder.buildConstant(HalfTy, 0);
1476 Builder.buildMerge(DstReg, { Narrowed, Zero });
1477 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1478 Register Narrowed = Unmerge.getReg(0);
1479 // dst = G_SHL s64:x, C for C >= 32
1481 // lo, hi = G_UNMERGE_VALUES x
1482 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1483 if (NarrowShiftAmt != 0) {
1484 Narrowed = Builder.buildShl(HalfTy, Narrowed,
1485 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1488 auto Zero = Builder.buildConstant(HalfTy, 0);
1489 Builder.buildMerge(DstReg, { Zero, Narrowed });
1491 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1492 auto Hi = Builder.buildAShr(
1493 HalfTy, Unmerge.getReg(1),
1494 Builder.buildConstant(HalfTy, HalfSize - 1));
1496 if (ShiftVal == HalfSize) {
1497 // (G_ASHR i64:x, 32) ->
1498 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1499 Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1500 } else if (ShiftVal == Size - 1) {
1501 // Don't need a second shift.
1502 // (G_ASHR i64:x, 63) ->
1503 // %narrowed = (G_ASHR hi_32(x), 31)
1504 // G_MERGE_VALUES %narrowed, %narrowed
1505 Builder.buildMerge(DstReg, { Hi, Hi });
1507 auto Lo = Builder.buildAShr(
1508 HalfTy, Unmerge.getReg(1),
1509 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1511 // (G_ASHR i64:x, C) ->, for C >= 32
1512 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1513 Builder.buildMerge(DstReg, { Lo, Hi });
1517 MI.eraseFromParent();
1521 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1522 unsigned TargetShiftAmount) {
1524 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1525 applyCombineShiftToUnmerge(MI, ShiftAmt);
1532 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1533 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1534 return MO.isReg() &&
1535 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1539 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1540 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1541 return !MO.isReg() ||
1542 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1546 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1547 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1548 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1549 return all_of(Mask, [](int Elt) { return Elt < 0; });
1552 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
1553 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1554 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
1558 bool CombinerHelper::eraseInst(MachineInstr &MI) {
1559 MI.eraseFromParent();
1563 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1564 const MachineOperand &MOP2) {
1565 if (!MOP1.isReg() || !MOP2.isReg())
1567 MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1570 MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1574 // Handle a case like this:
1576 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
1578 // Even though %0 and %1 are produced by the same instruction they are not
1581 return MOP1.getReg() == MOP2.getReg();
1583 // If we have an instruction which loads or stores, we can't guarantee that
1586 // For example, we may have
1588 // %x1 = G_LOAD %addr (load N from @somewhere)
1592 // %x2 = G_LOAD %addr (load N from @somewhere)
1594 // %or = G_OR %x1, %x2
1596 // It's possible that @foo will modify whatever lives at the address we're
1597 // loading from. To be safe, let's just assume that all loads and stores
1598 // are different (unless we have something which is guaranteed to not
1600 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
1603 // Check for physical registers on the instructions first to avoid cases
1606 // %a = COPY $physreg
1608 // SOMETHING implicit-def $physreg
1610 // %b = COPY $physreg
1612 // These copies are not equivalent.
1613 if (any_of(I1->uses(), [](const MachineOperand &MO) {
1614 return MO.isReg() && MO.getReg().isPhysical();
1616 // Check if we have a case like this:
1618 // %a = COPY $physreg
1621 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1622 // From that, we know that they must have the same value, since they must
1623 // have come from the same COPY.
1624 return I1->isIdenticalTo(*I2);
1627 // We don't have any physical registers, so we don't necessarily need the
1630 // On the off-chance that there's some target instruction feeding into the
1631 // instruction, let's use produceSameValue instead of isIdenticalTo.
1632 return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1635 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1638 // MIPatternMatch doesn't let us look through G_ZEXT etc.
1639 auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
1640 return ValAndVReg && ValAndVReg->Value == C;
1643 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1645 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1646 Register OldReg = MI.getOperand(0).getReg();
1647 Register Replacement = MI.getOperand(OpIdx).getReg();
1648 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1649 MI.eraseFromParent();
1650 replaceRegWith(MRI, OldReg, Replacement);
1654 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1655 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1656 // Match (cond ? x : x)
1657 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1658 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1662 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1663 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1664 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1668 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
1669 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
1670 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
1674 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1675 assert(MI.getNumDefs() == 1 && "Expected only one def?");
1676 Builder.setInstr(MI);
1677 Builder.buildFConstant(MI.getOperand(0), C);
1678 MI.eraseFromParent();
1682 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
1683 assert(MI.getNumDefs() == 1 && "Expected only one def?");
1684 Builder.setInstr(MI);
1685 Builder.buildConstant(MI.getOperand(0), C);
1686 MI.eraseFromParent();
1690 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
1691 assert(MI.getNumDefs() == 1 && "Expected only one def?");
1692 Builder.setInstr(MI);
1693 Builder.buildUndef(MI.getOperand(0));
1694 MI.eraseFromParent();
1698 bool CombinerHelper::matchSimplifyAddToSub(
1699 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1700 Register LHS = MI.getOperand(1).getReg();
1701 Register RHS = MI.getOperand(2).getReg();
1702 Register &NewLHS = std::get<0>(MatchInfo);
1703 Register &NewRHS = std::get<1>(MatchInfo);
1705 // Helper lambda to check for opportunities for
1706 // ((0-A) + B) -> B - A
1707 // (A + (0-B)) -> A - B
1708 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
1710 if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
1713 NewLHS = MaybeNewLHS;
1717 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
1720 bool CombinerHelper::applySimplifyAddToSub(
1721 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1722 Builder.setInstr(MI);
1723 Register SubLHS, SubRHS;
1724 std::tie(SubLHS, SubRHS) = MatchInfo;
1725 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
1726 MI.eraseFromParent();
1730 bool CombinerHelper::tryCombine(MachineInstr &MI) {
1731 if (tryCombineCopy(MI))
1733 if (tryCombineExtendingLoads(MI))
1735 if (tryCombineIndexedLoadStore(MI))