1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/ADT/SetVector.h"
10 #include "llvm/ADT/SmallBitVector.h"
11 #include "llvm/CodeGen/GlobalISel/Combiner.h"
12 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
16 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/LowLevelType.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineInstr.h"
24 #include "llvm/CodeGen/MachineMemOperand.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/CodeGen/TargetLowering.h"
28 #include "llvm/CodeGen/TargetOpcodes.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetMachine.h"
33 #define DEBUG_TYPE "gi-combiner"
36 using namespace MIPatternMatch;
38 // Option to allow testing of the combiner while no targets know about indexed
41 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
42 cl::desc("Force all indexed operations to be "
43 "legal for the GlobalISel combiner"));
45 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
46 MachineIRBuilder &B, GISelKnownBits *KB,
47 MachineDominatorTree *MDT,
48 const LegalizerInfo *LI)
49 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
50 KB(KB), MDT(MDT), LI(LI) {
54 const TargetLowering &CombinerHelper::getTargetLowering() const {
55 return *Builder.getMF().getSubtarget().getTargetLowering();
58 /// \returns The little endian in-memory byte position of byte \p I in a
59 /// \p ByteWidth bytes wide type.
61 /// E.g. Given a 4-byte type x, x[0] -> byte 0
62 static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
63 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
67 /// \returns The big endian in-memory byte position of byte \p I in a
68 /// \p ByteWidth bytes wide type.
70 /// E.g. Given a 4-byte type x, x[0] -> byte 3
71 static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
72 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
73 return ByteWidth - I - 1;
76 /// Given a map from byte offsets in memory to indices in a load/store,
77 /// determine if that map corresponds to a little or big endian byte pattern.
79 /// \param MemOffset2Idx maps memory offsets to address offsets.
80 /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
82 /// \returns true if the map corresponds to a big endian byte pattern, false
83 /// if it corresponds to a little endian byte pattern, and None otherwise.
85 /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
88 /// AddrOffset Little endian Big endian
94 isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
96 // Need at least two byte positions to decide on endianness.
97 unsigned Width = MemOffset2Idx.size();
100 bool BigEndian = true, LittleEndian = true;
101 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
102 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
103 if (MemOffsetAndIdx == MemOffset2Idx.end())
105 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
106 assert(Idx >= 0 && "Expected non-negative byte offset?");
107 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
108 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
109 if (!BigEndian && !LittleEndian)
113 assert((BigEndian != LittleEndian) &&
114 "Pattern cannot be both big and little endian!");
118 bool CombinerHelper::isLegalOrBeforeLegalizer(
119 const LegalityQuery &Query) const {
120 return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
123 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
124 Register ToReg) const {
125 Observer.changingAllUsesOfReg(MRI, FromReg);
127 if (MRI.constrainRegAttrs(ToReg, FromReg))
128 MRI.replaceRegWith(FromReg, ToReg);
130 Builder.buildCopy(ToReg, FromReg);
132 Observer.finishedChangingAllUsesOfReg();
135 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
136 MachineOperand &FromRegOp,
137 Register ToReg) const {
138 assert(FromRegOp.getParent() && "Expected an operand in an MI");
139 Observer.changingInstr(*FromRegOp.getParent());
141 FromRegOp.setReg(ToReg);
143 Observer.changedInstr(*FromRegOp.getParent());
146 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
147 if (matchCombineCopy(MI)) {
148 applyCombineCopy(MI);
153 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
154 if (MI.getOpcode() != TargetOpcode::COPY)
156 Register DstReg = MI.getOperand(0).getReg();
157 Register SrcReg = MI.getOperand(1).getReg();
158 return canReplaceReg(DstReg, SrcReg, MRI);
160 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
161 Register DstReg = MI.getOperand(0).getReg();
162 Register SrcReg = MI.getOperand(1).getReg();
163 MI.eraseFromParent();
164 replaceRegWith(MRI, DstReg, SrcReg);
167 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
168 bool IsUndef = false;
169 SmallVector<Register, 4> Ops;
170 if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
171 applyCombineConcatVectors(MI, IsUndef, Ops);
177 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
178 SmallVectorImpl<Register> &Ops) {
179 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
180 "Invalid instruction");
182 MachineInstr *Undef = nullptr;
184 // Walk over all the operands of concat vectors and check if they are
185 // build_vector themselves or undef.
186 // Then collect their operands in Ops.
187 for (const MachineOperand &MO : MI.uses()) {
188 Register Reg = MO.getReg();
189 MachineInstr *Def = MRI.getVRegDef(Reg);
190 assert(Def && "Operand not defined");
191 switch (Def->getOpcode()) {
192 case TargetOpcode::G_BUILD_VECTOR:
194 // Remember the operands of the build_vector to fold
195 // them into the yet-to-build flattened concat vectors.
196 for (const MachineOperand &BuildVecMO : Def->uses())
197 Ops.push_back(BuildVecMO.getReg());
199 case TargetOpcode::G_IMPLICIT_DEF: {
200 LLT OpType = MRI.getType(Reg);
201 // Keep one undef value for all the undef operands.
203 Builder.setInsertPt(*MI.getParent(), MI);
204 Undef = Builder.buildUndef(OpType.getScalarType());
206 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
207 OpType.getScalarType() &&
208 "All undefs should have the same type");
209 // Break the undef vector in as many scalar elements as needed
210 // for the flattening.
211 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
212 EltIdx != EltEnd; ++EltIdx)
213 Ops.push_back(Undef->getOperand(0).getReg());
222 void CombinerHelper::applyCombineConcatVectors(
223 MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
224 // We determined that the concat_vectors can be flatten.
225 // Generate the flattened build_vector.
226 Register DstReg = MI.getOperand(0).getReg();
227 Builder.setInsertPt(*MI.getParent(), MI);
228 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
230 // Note: IsUndef is sort of redundant. We could have determine it by
231 // checking that at all Ops are undef. Alternatively, we could have
232 // generate a build_vector of undefs and rely on another combine to
233 // clean that up. For now, given we already gather this information
234 // in tryCombineConcatVectors, just save compile time and issue the
237 Builder.buildUndef(NewDstReg);
239 Builder.buildBuildVector(NewDstReg, Ops);
240 MI.eraseFromParent();
241 replaceRegWith(MRI, DstReg, NewDstReg);
244 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
245 SmallVector<Register, 4> Ops;
246 if (matchCombineShuffleVector(MI, Ops)) {
247 applyCombineShuffleVector(MI, Ops);
253 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
254 SmallVectorImpl<Register> &Ops) {
255 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
256 "Invalid instruction kind");
257 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
258 Register Src1 = MI.getOperand(1).getReg();
259 LLT SrcType = MRI.getType(Src1);
260 // As bizarre as it may look, shuffle vector can actually produce
261 // scalar! This is because at the IR level a <1 x ty> shuffle
262 // vector is perfectly valid.
263 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
264 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
266 // If the resulting vector is smaller than the size of the source
267 // vectors being concatenated, we won't be able to replace the
268 // shuffle vector into a concat_vectors.
270 // Note: We may still be able to produce a concat_vectors fed by
271 // extract_vector_elt and so on. It is less clear that would
272 // be better though, so don't bother for now.
274 // If the destination is a scalar, the size of the sources doesn't
275 // matter. we will lower the shuffle to a plain copy. This will
276 // work only if the source and destination have the same size. But
277 // that's covered by the next condition.
279 // TODO: If the size between the source and destination don't match
280 // we could still emit an extract vector element in that case.
281 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
284 // Check that the shuffle mask can be broken evenly between the
285 // different sources.
286 if (DstNumElts % SrcNumElts != 0)
289 // Mask length is a multiple of the source vector length.
290 // Check if the shuffle is some kind of concatenation of the input
292 unsigned NumConcat = DstNumElts / SrcNumElts;
293 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
294 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
295 for (unsigned i = 0; i != DstNumElts; ++i) {
300 // Ensure the indices in each SrcType sized piece are sequential and that
301 // the same source is used for the whole piece.
302 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
303 (ConcatSrcs[i / SrcNumElts] >= 0 &&
304 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
306 // Remember which source this index came from.
307 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
310 // The shuffle is concatenating multiple vectors together.
311 // Collect the different operands for that.
313 Register Src2 = MI.getOperand(2).getReg();
314 for (auto Src : ConcatSrcs) {
317 Builder.setInsertPt(*MI.getParent(), MI);
318 UndefReg = Builder.buildUndef(SrcType).getReg(0);
320 Ops.push_back(UndefReg);
329 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
330 const ArrayRef<Register> Ops) {
331 Register DstReg = MI.getOperand(0).getReg();
332 Builder.setInsertPt(*MI.getParent(), MI);
333 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
336 Builder.buildCopy(NewDstReg, Ops[0]);
338 Builder.buildMerge(NewDstReg, Ops);
340 MI.eraseFromParent();
341 replaceRegWith(MRI, DstReg, NewDstReg);
346 /// Select a preference between two uses. CurrentUse is the current preference
347 /// while *ForCandidate is attributes of the candidate under consideration.
348 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
349 const LLT TyForCandidate,
350 unsigned OpcodeForCandidate,
351 MachineInstr *MIForCandidate) {
352 if (!CurrentUse.Ty.isValid()) {
353 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
354 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
355 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
359 // We permit the extend to hoist through basic blocks but this is only
360 // sensible if the target has extending loads. If you end up lowering back
361 // into a load and extend during the legalizer then the end result is
362 // hoisting the extend up to the load.
364 // Prefer defined extensions to undefined extensions as these are more
365 // likely to reduce the number of instructions.
366 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
367 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
369 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
370 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
371 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
373 // Prefer sign extensions to zero extensions as sign-extensions tend to be
375 if (CurrentUse.Ty == TyForCandidate) {
376 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
377 OpcodeForCandidate == TargetOpcode::G_ZEXT)
379 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
380 OpcodeForCandidate == TargetOpcode::G_SEXT)
381 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
384 // This is potentially target specific. We've chosen the largest type
385 // because G_TRUNC is usually free. One potential catch with this is that
386 // some targets have a reduced number of larger registers than smaller
387 // registers and this choice potentially increases the live-range for the
389 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
390 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
395 /// Find a suitable place to insert some instructions and insert them. This
396 /// function accounts for special cases like inserting before a PHI node.
397 /// The current strategy for inserting before PHI's is to duplicate the
398 /// instructions for each predecessor. However, while that's ok for G_TRUNC
399 /// on most targets since it generally requires no code, other targets/cases may
400 /// want to try harder to find a dominating block.
401 static void InsertInsnsWithoutSideEffectsBeforeUse(
402 MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
403 std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
404 MachineOperand &UseMO)>
406 MachineInstr &UseMI = *UseMO.getParent();
408 MachineBasicBlock *InsertBB = UseMI.getParent();
410 // If the use is a PHI then we want the predecessor block instead.
412 MachineOperand *PredBB = std::next(&UseMO);
413 InsertBB = PredBB->getMBB();
416 // If the block is the same block as the def then we want to insert just after
417 // the def instead of at the start of the block.
418 if (InsertBB == DefMI.getParent()) {
419 MachineBasicBlock::iterator InsertPt = &DefMI;
420 Inserter(InsertBB, std::next(InsertPt), UseMO);
424 // Otherwise we want the start of the BB
425 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
427 } // end anonymous namespace
429 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
430 PreferredTuple Preferred;
431 if (matchCombineExtendingLoads(MI, Preferred)) {
432 applyCombineExtendingLoads(MI, Preferred);
438 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
439 PreferredTuple &Preferred) {
440 // We match the loads and follow the uses to the extend instead of matching
441 // the extends and following the def to the load. This is because the load
442 // must remain in the same position for correctness (unless we also add code
443 // to find a safe place to sink it) whereas the extend is freely movable.
444 // It also prevents us from duplicating the load for the volatile case or just
446 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
450 Register LoadReg = LoadMI->getDstReg();
452 LLT LoadValueTy = MRI.getType(LoadReg);
453 if (!LoadValueTy.isScalar())
456 // Most architectures are going to legalize <s8 loads into at least a 1 byte
457 // load, and the MMOs can only describe memory accesses in multiples of bytes.
458 // If we try to perform extload combining on those, we can end up with
459 // %a(s8) = extload %ptr (load 1 byte from %ptr)
460 // ... which is an illegal extload instruction.
461 if (LoadValueTy.getSizeInBits() < 8)
464 // For non power-of-2 types, they will very likely be legalized into multiple
465 // loads. Don't bother trying to match them into extending loads.
466 if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
469 // Find the preferred type aside from the any-extends (unless it's the only
470 // one) and non-extending ops. We'll emit an extending load to that type and
471 // and emit a variant of (extend (trunc X)) for the others according to the
472 // relative type sizes. At the same time, pick an extend to use based on the
473 // extend involved in the chosen type.
474 unsigned PreferredOpcode =
476 ? TargetOpcode::G_ANYEXT
477 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
478 Preferred = {LLT(), PreferredOpcode, nullptr};
479 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
480 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
481 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
482 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
483 const auto &MMO = LoadMI->getMMO();
484 // For atomics, only form anyextending loads.
485 if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
487 // Check for legality.
489 LegalityQuery::MemDesc MMDesc;
490 MMDesc.MemoryTy = MMO.getMemoryType();
491 MMDesc.AlignInBits = MMO.getAlign().value() * 8;
492 MMDesc.Ordering = MMO.getSuccessOrdering();
493 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
494 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
495 if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
496 .Action != LegalizeActions::Legal)
499 Preferred = ChoosePreferredUse(Preferred,
500 MRI.getType(UseMI.getOperand(0).getReg()),
501 UseMI.getOpcode(), &UseMI);
505 // There were no extends
508 // It should be impossible to chose an extend without selecting a different
509 // type since by definition the result of an extend is larger.
510 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
512 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
516 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
517 PreferredTuple &Preferred) {
518 // Rewrite the load to the chosen extending load.
519 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
521 // Inserter to insert a truncate back to the original type at a given point
522 // with some basic CSE to limit truncate duplication to one per BB.
523 DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
524 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
525 MachineBasicBlock::iterator InsertBefore,
526 MachineOperand &UseMO) {
527 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
528 if (PreviouslyEmitted) {
529 Observer.changingInstr(*UseMO.getParent());
530 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
531 Observer.changedInstr(*UseMO.getParent());
535 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
536 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
537 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
538 EmittedInsns[InsertIntoBB] = NewMI;
539 replaceRegOpWith(MRI, UseMO, NewDstReg);
542 Observer.changingInstr(MI);
544 Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
545 ? TargetOpcode::G_SEXTLOAD
546 : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
547 ? TargetOpcode::G_ZEXTLOAD
548 : TargetOpcode::G_LOAD));
550 // Rewrite all the uses to fix up the types.
551 auto &LoadValue = MI.getOperand(0);
552 SmallVector<MachineOperand *, 4> Uses;
553 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
554 Uses.push_back(&UseMO);
556 for (auto *UseMO : Uses) {
557 MachineInstr *UseMI = UseMO->getParent();
559 // If the extend is compatible with the preferred extend then we should fix
560 // up the type and extend so that it uses the preferred use.
561 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
562 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
563 Register UseDstReg = UseMI->getOperand(0).getReg();
564 MachineOperand &UseSrcMO = UseMI->getOperand(1);
565 const LLT UseDstTy = MRI.getType(UseDstReg);
566 if (UseDstReg != ChosenDstReg) {
567 if (Preferred.Ty == UseDstTy) {
568 // If the use has the same type as the preferred use, then merge
569 // the vregs and erase the extend. For example:
570 // %1:_(s8) = G_LOAD ...
571 // %2:_(s32) = G_SEXT %1(s8)
572 // %3:_(s32) = G_ANYEXT %1(s8)
575 // %2:_(s32) = G_SEXTLOAD ...
577 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
578 Observer.erasingInstr(*UseMO->getParent());
579 UseMO->getParent()->eraseFromParent();
580 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
581 // If the preferred size is smaller, then keep the extend but extend
582 // from the result of the extending load. For example:
583 // %1:_(s8) = G_LOAD ...
584 // %2:_(s32) = G_SEXT %1(s8)
585 // %3:_(s64) = G_ANYEXT %1(s8)
588 // %2:_(s32) = G_SEXTLOAD ...
589 // %3:_(s64) = G_ANYEXT %2:_(s32)
591 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
593 // If the preferred size is large, then insert a truncate. For
595 // %1:_(s8) = G_LOAD ...
596 // %2:_(s64) = G_SEXT %1(s8)
597 // %3:_(s32) = G_ZEXT %1(s8)
600 // %2:_(s64) = G_SEXTLOAD ...
601 // %4:_(s8) = G_TRUNC %2:_(s32)
602 // %3:_(s64) = G_ZEXT %2:_(s8)
604 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
609 // The use is (one of) the uses of the preferred use we chose earlier.
610 // We're going to update the load to def this value later so just erase
612 Observer.erasingInstr(*UseMO->getParent());
613 UseMO->getParent()->eraseFromParent();
617 // The use isn't an extend. Truncate back to the type we originally loaded.
618 // This is free on many targets.
619 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
622 MI.getOperand(0).setReg(ChosenDstReg);
623 Observer.changedInstr(MI);
626 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
627 const MachineInstr &UseMI) {
628 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
629 "shouldn't consider debug uses");
630 assert(DefMI.getParent() == UseMI.getParent());
631 if (&DefMI == &UseMI)
633 const MachineBasicBlock &MBB = *DefMI.getParent();
634 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
635 return &MI == &DefMI || &MI == &UseMI;
637 if (DefOrUse == MBB.end())
638 llvm_unreachable("Block must contain both DefMI and UseMI!");
639 return &*DefOrUse == &DefMI;
642 bool CombinerHelper::dominates(const MachineInstr &DefMI,
643 const MachineInstr &UseMI) {
644 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
645 "shouldn't consider debug uses");
647 return MDT->dominates(&DefMI, &UseMI);
648 else if (DefMI.getParent() != UseMI.getParent())
651 return isPredecessor(DefMI, UseMI);
654 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
655 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
656 Register SrcReg = MI.getOperand(1).getReg();
657 Register LoadUser = SrcReg;
659 if (MRI.getType(SrcReg).isVector())
663 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
666 uint64_t SizeInBits = MI.getOperand(2).getImm();
667 // If the source is a G_SEXTLOAD from the same bit width, then we don't
668 // need any extend at all, just a truncate.
669 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
670 // If truncating more than the original extended value, abort.
671 auto LoadSizeBits = LoadMI->getMemSizeInBits();
672 if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
674 if (LoadSizeBits == SizeInBits)
680 void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
681 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
682 Builder.setInstrAndDebugLoc(MI);
683 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
684 MI.eraseFromParent();
687 bool CombinerHelper::matchSextInRegOfLoad(
688 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
689 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
691 // Only supports scalars for now.
692 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
695 Register SrcReg = MI.getOperand(1).getReg();
696 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
697 if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
698 !LoadDef->isSimple())
701 // If the sign extend extends from a narrower width than the load's width,
702 // then we can narrow the load width when we combine to a G_SEXTLOAD.
703 // Avoid widening the load at all.
704 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
705 LoadDef->getMemSizeInBits());
707 // Don't generate G_SEXTLOADs with a < 1 byte width.
710 // Don't bother creating a non-power-2 sextload, it will likely be broken up
711 // anyway for most targets.
712 if (!isPowerOf2_32(NewSizeBits))
714 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
718 void CombinerHelper::applySextInRegOfLoad(
719 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
720 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
722 unsigned ScalarSizeBits;
723 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
724 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
726 // If we have the following:
727 // %ld = G_LOAD %ptr, (load 2)
728 // %ext = G_SEXT_INREG %ld, 8
730 // %ld = G_SEXTLOAD %ptr (load 1)
732 auto &MMO = LoadDef->getMMO();
733 Builder.setInstrAndDebugLoc(*LoadDef);
734 auto &MF = Builder.getMF();
735 auto PtrInfo = MMO.getPointerInfo();
736 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
737 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
738 LoadDef->getPointerReg(), *NewMMO);
739 MI.eraseFromParent();
742 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
743 Register &Base, Register &Offset) {
744 auto &MF = *MI.getParent()->getParent();
745 const auto &TLI = *MF.getSubtarget().getTargetLowering();
748 unsigned Opcode = MI.getOpcode();
749 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
750 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
753 Base = MI.getOperand(1).getReg();
754 MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
755 if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
758 LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
759 // FIXME: The following use traversal needs a bail out for patholigical cases.
760 for (auto &Use : MRI.use_nodbg_instructions(Base)) {
761 if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
764 Offset = Use.getOperand(2).getReg();
765 if (!ForceLegalIndexing &&
766 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
767 LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
772 // Make sure the offset calculation is before the potentially indexed op.
773 // FIXME: we really care about dependency here. The offset calculation might
775 MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
776 if (!OffsetDef || !dominates(*OffsetDef, MI)) {
777 LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
782 // FIXME: check whether all uses of Base are load/store with foldable
783 // addressing modes. If so, using the normal addr-modes is better than
784 // forming an indexed one.
786 bool MemOpDominatesAddrUses = true;
787 for (auto &PtrAddUse :
788 MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
789 if (!dominates(MI, PtrAddUse)) {
790 MemOpDominatesAddrUses = false;
795 if (!MemOpDominatesAddrUses) {
797 dbgs() << " Ignoring candidate as memop does not dominate uses: "
802 LLVM_DEBUG(dbgs() << " Found match: " << Use);
803 Addr = Use.getOperand(0).getReg();
810 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
811 Register &Base, Register &Offset) {
812 auto &MF = *MI.getParent()->getParent();
813 const auto &TLI = *MF.getSubtarget().getTargetLowering();
816 unsigned Opcode = MI.getOpcode();
817 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
818 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
821 Addr = MI.getOperand(1).getReg();
822 MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
823 if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
826 Base = AddrDef->getOperand(1).getReg();
827 Offset = AddrDef->getOperand(2).getReg();
829 LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
831 if (!ForceLegalIndexing &&
832 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
833 LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
837 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
838 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
839 LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
843 if (MI.getOpcode() == TargetOpcode::G_STORE) {
844 // Would require a copy.
845 if (Base == MI.getOperand(0).getReg()) {
846 LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
850 // We're expecting one use of Addr in MI, but it could also be the
851 // value stored, which isn't actually dominated by the instruction.
852 if (MI.getOperand(0).getReg() == Addr) {
853 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
858 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
859 // That might allow us to end base's liveness here by adjusting the constant.
861 for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
862 if (!dominates(MI, UseMI)) {
863 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
871 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
872 IndexedLoadStoreMatchInfo MatchInfo;
873 if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
874 applyCombineIndexedLoadStore(MI, MatchInfo);
880 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
881 unsigned Opcode = MI.getOpcode();
882 if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
883 Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
886 // For now, no targets actually support these opcodes so don't waste time
887 // running these unless we're forced to for testing.
888 if (!ForceLegalIndexing)
891 MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
893 if (!MatchInfo.IsPre &&
894 !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
901 void CombinerHelper::applyCombineIndexedLoadStore(
902 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
903 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
904 MachineIRBuilder MIRBuilder(MI);
905 unsigned Opcode = MI.getOpcode();
906 bool IsStore = Opcode == TargetOpcode::G_STORE;
909 case TargetOpcode::G_LOAD:
910 NewOpcode = TargetOpcode::G_INDEXED_LOAD;
912 case TargetOpcode::G_SEXTLOAD:
913 NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
915 case TargetOpcode::G_ZEXTLOAD:
916 NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
918 case TargetOpcode::G_STORE:
919 NewOpcode = TargetOpcode::G_INDEXED_STORE;
922 llvm_unreachable("Unknown load/store opcode");
925 auto MIB = MIRBuilder.buildInstr(NewOpcode);
927 MIB.addDef(MatchInfo.Addr);
928 MIB.addUse(MI.getOperand(0).getReg());
930 MIB.addDef(MI.getOperand(0).getReg());
931 MIB.addDef(MatchInfo.Addr);
934 MIB.addUse(MatchInfo.Base);
935 MIB.addUse(MatchInfo.Offset);
936 MIB.addImm(MatchInfo.IsPre);
937 MI.eraseFromParent();
938 AddrDef.eraseFromParent();
940 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
943 bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
944 MachineInstr *&OtherMI) {
945 unsigned Opcode = MI.getOpcode();
946 bool IsDiv, IsSigned;
950 llvm_unreachable("Unexpected opcode!");
951 case TargetOpcode::G_SDIV:
952 case TargetOpcode::G_UDIV: {
954 IsSigned = Opcode == TargetOpcode::G_SDIV;
957 case TargetOpcode::G_SREM:
958 case TargetOpcode::G_UREM: {
960 IsSigned = Opcode == TargetOpcode::G_SREM;
965 Register Src1 = MI.getOperand(1).getReg();
966 unsigned DivOpcode, RemOpcode, DivremOpcode;
968 DivOpcode = TargetOpcode::G_SDIV;
969 RemOpcode = TargetOpcode::G_SREM;
970 DivremOpcode = TargetOpcode::G_SDIVREM;
972 DivOpcode = TargetOpcode::G_UDIV;
973 RemOpcode = TargetOpcode::G_UREM;
974 DivremOpcode = TargetOpcode::G_UDIVREM;
977 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
981 // %div:_ = G_[SU]DIV %src1:_, %src2:_
982 // %rem:_ = G_[SU]REM %src1:_, %src2:_
984 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
987 // %rem:_ = G_[SU]REM %src1:_, %src2:_
988 // %div:_ = G_[SU]DIV %src1:_, %src2:_
990 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
992 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
993 if (MI.getParent() == UseMI.getParent() &&
994 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
995 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
996 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
1005 void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
1006 MachineInstr *&OtherMI) {
1007 unsigned Opcode = MI.getOpcode();
1008 assert(OtherMI && "OtherMI shouldn't be empty.");
1010 Register DestDivReg, DestRemReg;
1011 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1012 DestDivReg = MI.getOperand(0).getReg();
1013 DestRemReg = OtherMI->getOperand(0).getReg();
1015 DestDivReg = OtherMI->getOperand(0).getReg();
1016 DestRemReg = MI.getOperand(0).getReg();
1020 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1022 // Check which instruction is first in the block so we don't break def-use
1023 // deps by "moving" the instruction incorrectly.
1024 if (dominates(MI, *OtherMI))
1025 Builder.setInstrAndDebugLoc(MI);
1027 Builder.setInstrAndDebugLoc(*OtherMI);
1029 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1030 : TargetOpcode::G_UDIVREM,
1031 {DestDivReg, DestRemReg},
1032 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
1033 MI.eraseFromParent();
1034 OtherMI->eraseFromParent();
1037 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
1038 MachineInstr *&BrCond) {
1039 assert(MI.getOpcode() == TargetOpcode::G_BR);
1041 // Try to match the following:
1043 // G_BRCOND %c1, %bb2
1049 // The above pattern does not have a fall through to the successor bb2, always
1050 // resulting in a branch no matter which path is taken. Here we try to find
1051 // and replace that pattern with conditional branch to bb3 and otherwise
1052 // fallthrough to bb2. This is generally better for branch predictors.
1054 MachineBasicBlock *MBB = MI.getParent();
1055 MachineBasicBlock::iterator BrIt(MI);
1056 if (BrIt == MBB->begin())
1058 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1060 BrCond = &*std::prev(BrIt);
1061 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1064 // Check that the next block is the conditional branch target. Also make sure
1065 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1066 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1067 return BrCondTarget != MI.getOperand(0).getMBB() &&
1068 MBB->isLayoutSuccessor(BrCondTarget);
1071 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1072 MachineInstr *&BrCond) {
1073 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1074 Builder.setInstrAndDebugLoc(*BrCond);
1075 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1076 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1077 // this to i1 only since we might not know for sure what kind of
1078 // compare generated the condition value.
1079 auto True = Builder.buildConstant(
1080 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1081 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1083 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1084 Observer.changingInstr(MI);
1085 MI.getOperand(0).setMBB(FallthroughBB);
1086 Observer.changedInstr(MI);
1088 // Change the conditional branch to use the inverted condition and
1089 // new target block.
1090 Observer.changingInstr(*BrCond);
1091 BrCond->getOperand(0).setReg(Xor.getReg(0));
1092 BrCond->getOperand(1).setMBB(BrTarget);
1093 Observer.changedInstr(*BrCond);
1096 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
1097 // On Darwin, -Os means optimize for size without hurting performance, so
1098 // only really optimize for size when -Oz (MinSize) is used.
1099 if (MF.getTarget().getTargetTriple().isOSDarwin())
1100 return MF.getFunction().hasMinSize();
1101 return MF.getFunction().hasOptSize();
1104 // Returns a list of types to use for memory op lowering in MemOps. A partial
1105 // port of findOptimalMemOpLowering in TargetLowering.
1106 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
1107 unsigned Limit, const MemOp &Op,
1108 unsigned DstAS, unsigned SrcAS,
1109 const AttributeList &FuncAttributes,
1110 const TargetLowering &TLI) {
1111 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
1114 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
1117 // Use the largest scalar type whose alignment constraints are satisfied.
1118 // We only need to check DstAlign here as SrcAlign is always greater or
1119 // equal to DstAlign (or zero).
1120 Ty = LLT::scalar(64);
1121 if (Op.isFixedDstAlign())
1122 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
1123 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
1124 Ty = LLT::scalar(Ty.getSizeInBytes());
1125 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
1126 // FIXME: check for the largest legal type we can load/store to.
1129 unsigned NumMemOps = 0;
1130 uint64_t Size = Op.size();
1132 unsigned TySize = Ty.getSizeInBytes();
1133 while (TySize > Size) {
1134 // For now, only use non-vector load / store's for the left-over pieces.
1136 // FIXME: check for mem op safety and legality of the types. Not all of
1137 // SDAGisms map cleanly to GISel concepts.
1138 if (NewTy.isVector())
1139 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
1140 NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
1141 unsigned NewTySize = NewTy.getSizeInBytes();
1142 assert(NewTySize > 0 && "Could not find appropriate type");
1144 // If the new LLT cannot cover all of the remaining bits, then consider
1145 // issuing a (or a pair of) unaligned and overlapping load / store.
1147 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
1148 MVT VT = getMVTForLLT(Ty);
1149 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
1150 TLI.allowsMisalignedMemoryAccesses(
1151 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
1152 MachineMemOperand::MONone, &Fast) &&
1161 if (++NumMemOps > Limit)
1164 MemOps.push_back(Ty);
1171 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1173 return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1174 Ty.getNumElements());
1175 return IntegerType::get(C, Ty.getSizeInBits());
1178 // Get a vectorized representation of the memset value operand, GISel edition.
1179 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1180 MachineRegisterInfo &MRI = *MIB.getMRI();
1181 unsigned NumBits = Ty.getScalarSizeInBits();
1182 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1183 if (!Ty.isVector() && ValVRegAndVal) {
1184 APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
1185 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1186 return MIB.buildConstant(Ty, SplatVal).getReg(0);
1189 // Extend the byte value to the larger type, and then multiply by a magic
1190 // value 0x010101... in order to replicate it across every byte.
1191 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1192 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1193 return MIB.buildConstant(Ty, 0).getReg(0);
1196 LLT ExtType = Ty.getScalarType();
1197 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1199 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1200 auto MagicMI = MIB.buildConstant(ExtType, Magic);
1201 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1204 // For vector types create a G_BUILD_VECTOR.
1206 Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1211 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1212 Register Val, uint64_t KnownLen,
1213 Align Alignment, bool IsVolatile) {
1214 auto &MF = *MI.getParent()->getParent();
1215 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1216 auto &DL = MF.getDataLayout();
1217 LLVMContext &C = MF.getFunction().getContext();
1219 assert(KnownLen != 0 && "Have a zero length memset length!");
1221 bool DstAlignCanChange = false;
1222 MachineFrameInfo &MFI = MF.getFrameInfo();
1223 bool OptSize = shouldLowerMemFuncForSize(MF);
1225 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1226 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1227 DstAlignCanChange = true;
1229 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1230 std::vector<LLT> MemOps;
1232 const auto &DstMMO = **MI.memoperands_begin();
1233 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1235 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1236 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1238 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1239 MemOp::Set(KnownLen, DstAlignCanChange,
1241 /*IsZeroMemset=*/IsZeroVal,
1242 /*IsVolatile=*/IsVolatile),
1243 DstPtrInfo.getAddrSpace(), ~0u,
1244 MF.getFunction().getAttributes(), TLI))
1247 if (DstAlignCanChange) {
1248 // Get an estimate of the type from the LLT.
1249 Type *IRTy = getTypeForLLT(MemOps[0], C);
1250 Align NewAlign = DL.getABITypeAlign(IRTy);
1251 if (NewAlign > Alignment) {
1252 Alignment = NewAlign;
1253 unsigned FI = FIDef->getOperand(1).getIndex();
1254 // Give the stack frame object a larger alignment if needed.
1255 if (MFI.getObjectAlign(FI) < Alignment)
1256 MFI.setObjectAlignment(FI, Alignment);
1260 MachineIRBuilder MIB(MI);
1261 // Find the largest store and generate the bit pattern for it.
1262 LLT LargestTy = MemOps[0];
1263 for (unsigned i = 1; i < MemOps.size(); i++)
1264 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1265 LargestTy = MemOps[i];
1267 // The memset stored value is always defined as an s8, so in order to make it
1268 // work with larger store types we need to repeat the bit pattern across the
1270 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1275 // Generate the stores. For each store type in the list, we generate the
1276 // matching store of that type to the destination address.
1277 LLT PtrTy = MRI.getType(Dst);
1278 unsigned DstOff = 0;
1279 unsigned Size = KnownLen;
1280 for (unsigned I = 0; I < MemOps.size(); I++) {
1282 unsigned TySize = Ty.getSizeInBytes();
1283 if (TySize > Size) {
1284 // Issuing an unaligned load / store pair that overlaps with the previous
1285 // pair. Adjust the offset accordingly.
1286 assert(I == MemOps.size() - 1 && I != 0);
1287 DstOff -= TySize - Size;
1290 // If this store is smaller than the largest store see whether we can get
1291 // the smaller value for free with a truncate.
1292 Register Value = MemSetValue;
1293 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1294 MVT VT = getMVTForLLT(Ty);
1295 MVT LargestVT = getMVTForLLT(LargestTy);
1296 if (!LargestTy.isVector() && !Ty.isVector() &&
1297 TLI.isTruncateFree(LargestVT, VT))
1298 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1300 Value = getMemsetValue(Val, Ty, MIB);
1306 MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
1311 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1312 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1315 MIB.buildStore(Value, Ptr, *StoreMMO);
1316 DstOff += Ty.getSizeInBytes();
1320 MI.eraseFromParent();
1324 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
1325 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
1327 Register Dst = MI.getOperand(0).getReg();
1328 Register Src = MI.getOperand(1).getReg();
1329 Register Len = MI.getOperand(2).getReg();
1331 const auto *MMOIt = MI.memoperands_begin();
1332 const MachineMemOperand *MemOp = *MMOIt;
1333 bool IsVolatile = MemOp->isVolatile();
1335 // See if this is a constant length copy
1336 auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1337 // FIXME: support dynamically sized G_MEMCPY_INLINE
1338 assert(LenVRegAndVal.hasValue() &&
1339 "inline memcpy with dynamic size is not yet supported");
1340 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
1341 if (KnownLen == 0) {
1342 MI.eraseFromParent();
1346 const auto &DstMMO = **MI.memoperands_begin();
1347 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1348 Align DstAlign = DstMMO.getBaseAlign();
1349 Align SrcAlign = SrcMMO.getBaseAlign();
1351 return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1355 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
1356 Register Src, uint64_t KnownLen,
1357 Align DstAlign, Align SrcAlign,
1359 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
1360 return optimizeMemcpy(MI, Dst, Src, KnownLen,
1361 std::numeric_limits<uint64_t>::max(), DstAlign,
1362 SrcAlign, IsVolatile);
1365 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1366 Register Src, uint64_t KnownLen,
1367 uint64_t Limit, Align DstAlign,
1368 Align SrcAlign, bool IsVolatile) {
1369 auto &MF = *MI.getParent()->getParent();
1370 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1371 auto &DL = MF.getDataLayout();
1372 LLVMContext &C = MF.getFunction().getContext();
1374 assert(KnownLen != 0 && "Have a zero length memcpy length!");
1376 bool DstAlignCanChange = false;
1377 MachineFrameInfo &MFI = MF.getFrameInfo();
1378 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1380 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1381 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1382 DstAlignCanChange = true;
1384 // FIXME: infer better src pointer alignment like SelectionDAG does here.
1385 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1386 // if the memcpy is in a tail call position.
1388 std::vector<LLT> MemOps;
1390 const auto &DstMMO = **MI.memoperands_begin();
1391 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1392 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1393 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1395 if (!findGISelOptimalMemOpLowering(
1397 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1399 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1400 MF.getFunction().getAttributes(), TLI))
1403 if (DstAlignCanChange) {
1404 // Get an estimate of the type from the LLT.
1405 Type *IRTy = getTypeForLLT(MemOps[0], C);
1406 Align NewAlign = DL.getABITypeAlign(IRTy);
1408 // Don't promote to an alignment that would require dynamic stack
1410 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1411 if (!TRI->hasStackRealignment(MF))
1412 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1413 NewAlign = NewAlign / 2;
1415 if (NewAlign > Alignment) {
1416 Alignment = NewAlign;
1417 unsigned FI = FIDef->getOperand(1).getIndex();
1418 // Give the stack frame object a larger alignment if needed.
1419 if (MFI.getObjectAlign(FI) < Alignment)
1420 MFI.setObjectAlignment(FI, Alignment);
1424 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1426 MachineIRBuilder MIB(MI);
1427 // Now we need to emit a pair of load and stores for each of the types we've
1428 // collected. I.e. for each type, generate a load from the source pointer of
1429 // that type width, and then generate a corresponding store to the dest buffer
1430 // of that value loaded. This can result in a sequence of loads and stores
1431 // mixed types, depending on what the target specifies as good types to use.
1432 unsigned CurrOffset = 0;
1433 LLT PtrTy = MRI.getType(Src);
1434 unsigned Size = KnownLen;
1435 for (auto CopyTy : MemOps) {
1436 // Issuing an unaligned load / store pair that overlaps with the previous
1437 // pair. Adjust the offset accordingly.
1438 if (CopyTy.getSizeInBytes() > Size)
1439 CurrOffset -= CopyTy.getSizeInBytes() - Size;
1441 // Construct MMOs for the accesses.
1443 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1445 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1448 Register LoadPtr = Src;
1450 if (CurrOffset != 0) {
1451 Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1453 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1455 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1457 // Create the store.
1459 CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1460 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1461 CurrOffset += CopyTy.getSizeInBytes();
1462 Size -= CopyTy.getSizeInBytes();
1465 MI.eraseFromParent();
1469 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1470 Register Src, uint64_t KnownLen,
1471 Align DstAlign, Align SrcAlign,
1473 auto &MF = *MI.getParent()->getParent();
1474 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1475 auto &DL = MF.getDataLayout();
1476 LLVMContext &C = MF.getFunction().getContext();
1478 assert(KnownLen != 0 && "Have a zero length memmove length!");
1480 bool DstAlignCanChange = false;
1481 MachineFrameInfo &MFI = MF.getFrameInfo();
1482 bool OptSize = shouldLowerMemFuncForSize(MF);
1483 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1485 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1486 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1487 DstAlignCanChange = true;
1489 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1490 std::vector<LLT> MemOps;
1492 const auto &DstMMO = **MI.memoperands_begin();
1493 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1494 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1495 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1497 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1498 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1500 if (!findGISelOptimalMemOpLowering(
1502 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1503 /*IsVolatile*/ true),
1504 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1505 MF.getFunction().getAttributes(), TLI))
1508 if (DstAlignCanChange) {
1509 // Get an estimate of the type from the LLT.
1510 Type *IRTy = getTypeForLLT(MemOps[0], C);
1511 Align NewAlign = DL.getABITypeAlign(IRTy);
1513 // Don't promote to an alignment that would require dynamic stack
1515 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1516 if (!TRI->hasStackRealignment(MF))
1517 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1518 NewAlign = NewAlign / 2;
1520 if (NewAlign > Alignment) {
1521 Alignment = NewAlign;
1522 unsigned FI = FIDef->getOperand(1).getIndex();
1523 // Give the stack frame object a larger alignment if needed.
1524 if (MFI.getObjectAlign(FI) < Alignment)
1525 MFI.setObjectAlignment(FI, Alignment);
1529 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1531 MachineIRBuilder MIB(MI);
1532 // Memmove requires that we perform the loads first before issuing the stores.
1533 // Apart from that, this loop is pretty much doing the same thing as the
1534 // memcpy codegen function.
1535 unsigned CurrOffset = 0;
1536 LLT PtrTy = MRI.getType(Src);
1537 SmallVector<Register, 16> LoadVals;
1538 for (auto CopyTy : MemOps) {
1539 // Construct MMO for the load.
1541 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1544 Register LoadPtr = Src;
1545 if (CurrOffset != 0) {
1547 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1548 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1550 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1551 CurrOffset += CopyTy.getSizeInBytes();
1555 for (unsigned I = 0; I < MemOps.size(); ++I) {
1556 LLT CopyTy = MemOps[I];
1557 // Now store the values loaded.
1559 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1561 Register StorePtr = Dst;
1562 if (CurrOffset != 0) {
1564 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1565 StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1567 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1568 CurrOffset += CopyTy.getSizeInBytes();
1570 MI.eraseFromParent();
1574 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1575 const unsigned Opc = MI.getOpcode();
1576 // This combine is fairly complex so it's not written with a separate
1577 // matcher function.
1578 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
1579 Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
1581 auto MMOIt = MI.memoperands_begin();
1582 const MachineMemOperand *MemOp = *MMOIt;
1584 Align DstAlign = MemOp->getBaseAlign();
1586 Register Dst = MI.getOperand(0).getReg();
1587 Register Src = MI.getOperand(1).getReg();
1588 Register Len = MI.getOperand(2).getReg();
1590 if (Opc != TargetOpcode::G_MEMSET) {
1591 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1593 SrcAlign = MemOp->getBaseAlign();
1596 // See if this is a constant length copy
1597 auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1599 return false; // Leave it to the legalizer to lower it to a libcall.
1600 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
1602 if (KnownLen == 0) {
1603 MI.eraseFromParent();
1607 bool IsVolatile = MemOp->isVolatile();
1608 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
1609 return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1612 // Don't try to optimize volatile.
1616 if (MaxLen && KnownLen > MaxLen)
1619 if (Opc == TargetOpcode::G_MEMCPY) {
1620 auto &MF = *MI.getParent()->getParent();
1621 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1622 bool OptSize = shouldLowerMemFuncForSize(MF);
1623 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1624 return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
1627 if (Opc == TargetOpcode::G_MEMMOVE)
1628 return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1629 if (Opc == TargetOpcode::G_MEMSET)
1630 return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1634 static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
1636 const MachineRegisterInfo &MRI) {
1637 const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
1641 APFloat V = MaybeCst->getValueAPF();
1644 llvm_unreachable("Unexpected opcode!");
1645 case TargetOpcode::G_FNEG: {
1649 case TargetOpcode::G_FABS: {
1653 case TargetOpcode::G_FPTRUNC:
1655 case TargetOpcode::G_FSQRT: {
1657 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1658 V = APFloat(sqrt(V.convertToDouble()));
1661 case TargetOpcode::G_FLOG2: {
1663 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1664 V = APFloat(log2(V.convertToDouble()));
1668 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1669 // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1670 // and `G_FLOG2` reach here.
1672 V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
1676 bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
1677 Optional<APFloat> &Cst) {
1678 Register DstReg = MI.getOperand(0).getReg();
1679 Register SrcReg = MI.getOperand(1).getReg();
1680 LLT DstTy = MRI.getType(DstReg);
1681 Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
1682 return Cst.hasValue();
1685 void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
1686 Optional<APFloat> &Cst) {
1687 assert(Cst.hasValue() && "Optional is unexpectedly empty!");
1688 Builder.setInstrAndDebugLoc(MI);
1689 MachineFunction &MF = Builder.getMF();
1690 auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
1691 Register DstReg = MI.getOperand(0).getReg();
1692 Builder.buildFConstant(DstReg, *FPVal);
1693 MI.eraseFromParent();
1696 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1697 PtrAddChain &MatchInfo) {
1698 // We're trying to match the following pattern:
1699 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1700 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1702 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1704 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1707 Register Add2 = MI.getOperand(1).getReg();
1708 Register Imm1 = MI.getOperand(2).getReg();
1709 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1713 // Don't do this combine if there multiple uses of the first PTR_ADD,
1714 // since we may be able to compute the second PTR_ADD as an immediate
1715 // offset anyway. Folding the first offset into the second may cause us
1716 // to go beyond the bounds of our legal addressing modes.
1717 if (!MRI.hasOneNonDBGUse(Add2))
1720 MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1721 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1724 Register Base = Add2Def->getOperand(1).getReg();
1725 Register Imm2 = Add2Def->getOperand(2).getReg();
1726 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1730 // Pass the combined immediate to the apply function.
1731 MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
1732 MatchInfo.Base = Base;
1736 void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1737 PtrAddChain &MatchInfo) {
1738 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1739 MachineIRBuilder MIB(MI);
1740 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1741 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1742 Observer.changingInstr(MI);
1743 MI.getOperand(1).setReg(MatchInfo.Base);
1744 MI.getOperand(2).setReg(NewOffset.getReg(0));
1745 Observer.changedInstr(MI);
1748 bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
1749 RegisterImmPair &MatchInfo) {
1750 // We're trying to match the following pattern with any of
1751 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1752 // %t1 = SHIFT %base, G_CONSTANT imm1
1753 // %root = SHIFT %t1, G_CONSTANT imm2
1755 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1757 unsigned Opcode = MI.getOpcode();
1758 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1759 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1760 Opcode == TargetOpcode::G_USHLSAT) &&
1761 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1763 Register Shl2 = MI.getOperand(1).getReg();
1764 Register Imm1 = MI.getOperand(2).getReg();
1765 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1769 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1770 if (Shl2Def->getOpcode() != Opcode)
1773 Register Base = Shl2Def->getOperand(1).getReg();
1774 Register Imm2 = Shl2Def->getOperand(2).getReg();
1775 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1779 // Pass the combined immediate to the apply function.
1781 (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
1782 MatchInfo.Reg = Base;
1784 // There is no simple replacement for a saturating unsigned left shift that
1785 // exceeds the scalar size.
1786 if (Opcode == TargetOpcode::G_USHLSAT &&
1787 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1793 void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
1794 RegisterImmPair &MatchInfo) {
1795 unsigned Opcode = MI.getOpcode();
1796 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1797 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1798 Opcode == TargetOpcode::G_USHLSAT) &&
1799 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1801 Builder.setInstrAndDebugLoc(MI);
1802 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1803 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1804 auto Imm = MatchInfo.Imm;
1806 if (Imm >= ScalarSizeInBits) {
1807 // Any logical shift that exceeds scalar size will produce zero.
1808 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1809 Builder.buildConstant(MI.getOperand(0), 0);
1810 MI.eraseFromParent();
1813 // Arithmetic shift and saturating signed left shift have no effect beyond
1815 Imm = ScalarSizeInBits - 1;
1818 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1819 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1820 Observer.changingInstr(MI);
1821 MI.getOperand(1).setReg(MatchInfo.Reg);
1822 MI.getOperand(2).setReg(NewImm);
1823 Observer.changedInstr(MI);
1826 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
1827 ShiftOfShiftedLogic &MatchInfo) {
1828 // We're trying to match the following pattern with any of
1829 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1830 // with any of G_AND/G_OR/G_XOR logic instructions.
1831 // %t1 = SHIFT %X, G_CONSTANT C0
1832 // %t2 = LOGIC %t1, %Y
1833 // %root = SHIFT %t2, G_CONSTANT C1
1835 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1836 // %t4 = SHIFT %Y, G_CONSTANT C1
1837 // %root = LOGIC %t3, %t4
1838 unsigned ShiftOpcode = MI.getOpcode();
1839 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1840 ShiftOpcode == TargetOpcode::G_ASHR ||
1841 ShiftOpcode == TargetOpcode::G_LSHR ||
1842 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1843 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1844 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1846 // Match a one-use bitwise logic op.
1847 Register LogicDest = MI.getOperand(1).getReg();
1848 if (!MRI.hasOneNonDBGUse(LogicDest))
1851 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1852 unsigned LogicOpcode = LogicMI->getOpcode();
1853 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1854 LogicOpcode != TargetOpcode::G_XOR)
1857 // Find a matching one-use shift by constant.
1858 const Register C1 = MI.getOperand(2).getReg();
1859 auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
1863 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1865 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1866 // Shift should match previous one and should be a one-use.
1867 if (MI->getOpcode() != ShiftOpcode ||
1868 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1871 // Must be a constant.
1873 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1877 ShiftVal = MaybeImmVal->Value.getSExtValue();
1881 // Logic ops are commutative, so check each operand for a match.
1882 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1883 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1884 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1885 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1888 if (matchFirstShift(LogicMIOp1, C0Val)) {
1889 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1890 MatchInfo.Shift2 = LogicMIOp1;
1891 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1892 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1893 MatchInfo.Shift2 = LogicMIOp2;
1897 MatchInfo.ValSum = C0Val + C1Val;
1899 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1900 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1903 MatchInfo.Logic = LogicMI;
1907 void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
1908 ShiftOfShiftedLogic &MatchInfo) {
1909 unsigned Opcode = MI.getOpcode();
1910 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1911 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1912 Opcode == TargetOpcode::G_SSHLSAT) &&
1913 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1915 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1916 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1917 Builder.setInstrAndDebugLoc(MI);
1919 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1921 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1923 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1925 Register Shift2Const = MI.getOperand(2).getReg();
1926 Register Shift2 = Builder
1927 .buildInstr(Opcode, {DestType},
1928 {MatchInfo.LogicNonShiftReg, Shift2Const})
1931 Register Dest = MI.getOperand(0).getReg();
1932 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1934 // These were one use so it's safe to remove them.
1935 MatchInfo.Shift2->eraseFromParent();
1936 MatchInfo.Logic->eraseFromParent();
1938 MI.eraseFromParent();
1941 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1942 unsigned &ShiftVal) {
1943 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1945 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1949 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1950 return (static_cast<int32_t>(ShiftVal) != -1);
1953 void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1954 unsigned &ShiftVal) {
1955 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1956 MachineIRBuilder MIB(MI);
1957 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1958 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1959 Observer.changingInstr(MI);
1960 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1961 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1962 Observer.changedInstr(MI);
1965 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1966 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
1967 RegisterImmPair &MatchData) {
1968 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1970 Register LHS = MI.getOperand(1).getReg();
1973 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1974 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1975 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1978 // TODO: Should handle vector splat.
1979 Register RHS = MI.getOperand(2).getReg();
1980 auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
1981 if (!MaybeShiftAmtVal)
1985 LLT SrcTy = MRI.getType(ExtSrc);
1987 // We only really care about the legality with the shifted value. We can
1988 // pick any type the constant shift amount, so ask the target what to
1989 // use. Otherwise we would have to guess and hope it is reported as legal.
1990 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1991 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1995 int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
1996 MatchData.Reg = ExtSrc;
1997 MatchData.Imm = ShiftAmt;
1999 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
2000 return MinLeadingZeros >= ShiftAmt;
2003 void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
2004 const RegisterImmPair &MatchData) {
2005 Register ExtSrcReg = MatchData.Reg;
2006 int64_t ShiftAmtVal = MatchData.Imm;
2008 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2009 Builder.setInstrAndDebugLoc(MI);
2010 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2012 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2013 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2014 MI.eraseFromParent();
2017 bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI,
2018 Register &MatchInfo) {
2019 GMerge &Merge = cast<GMerge>(MI);
2020 SmallVector<Register, 16> MergedValues;
2021 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2022 MergedValues.emplace_back(Merge.getSourceReg(I));
2024 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2025 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2028 for (unsigned I = 0; I < MergedValues.size(); ++I)
2029 if (MergedValues[I] != Unmerge->getReg(I))
2032 MatchInfo = Unmerge->getSourceReg();
2036 static Register peekThroughBitcast(Register Reg,
2037 const MachineRegisterInfo &MRI) {
2038 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2044 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
2045 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2046 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2047 "Expected an unmerge");
2049 peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
2051 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2052 if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
2053 SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
2054 SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
2057 // Check the source type of the merge.
2058 LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
2059 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2060 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2061 if (SrcMergeTy != Dst0Ty && !SameSize)
2063 // They are the same now (modulo a bitcast).
2064 // We can collect all the src registers.
2065 for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
2067 Operands.push_back(SrcInstr->getOperand(Idx).getReg());
2071 void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
2072 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2073 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2074 "Expected an unmerge");
2075 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2076 "Not enough operands to replace all defs");
2077 unsigned NumElems = MI.getNumOperands() - 1;
2079 LLT SrcTy = MRI.getType(Operands[0]);
2080 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2081 bool CanReuseInputDirectly = DstTy == SrcTy;
2082 Builder.setInstrAndDebugLoc(MI);
2083 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2084 Register DstReg = MI.getOperand(Idx).getReg();
2085 Register SrcReg = Operands[Idx];
2086 if (CanReuseInputDirectly)
2087 replaceRegWith(MRI, DstReg, SrcReg);
2089 Builder.buildCast(DstReg, SrcReg);
2091 MI.eraseFromParent();
2094 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
2095 SmallVectorImpl<APInt> &Csts) {
2096 unsigned SrcIdx = MI.getNumOperands() - 1;
2097 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2098 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2099 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2100 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2102 // Break down the big constant in smaller ones.
2103 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2104 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2105 ? CstVal.getCImm()->getValue()
2106 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2108 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2109 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2110 // Unmerge a constant.
2111 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2112 Csts.emplace_back(Val.trunc(ShiftAmt));
2113 Val = Val.lshr(ShiftAmt);
2119 void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
2120 SmallVectorImpl<APInt> &Csts) {
2121 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2122 "Expected an unmerge");
2123 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2124 "Not enough operands to replace all defs");
2125 unsigned NumElems = MI.getNumOperands() - 1;
2126 Builder.setInstrAndDebugLoc(MI);
2127 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2128 Register DstReg = MI.getOperand(Idx).getReg();
2129 Builder.buildConstant(DstReg, Csts[Idx]);
2132 MI.eraseFromParent();
2135 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2136 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2137 "Expected an unmerge");
2138 // Check that all the lanes are dead except the first one.
2139 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2140 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2146 void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2147 Builder.setInstrAndDebugLoc(MI);
2148 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2149 // Truncating a vector is going to truncate every single lane,
2150 // whereas we want the full lowbits.
2151 // Do the operation on a scalar instead.
2152 LLT SrcTy = MRI.getType(SrcReg);
2153 if (SrcTy.isVector())
2155 Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
2157 Register Dst0Reg = MI.getOperand(0).getReg();
2158 LLT Dst0Ty = MRI.getType(Dst0Reg);
2159 if (Dst0Ty.isVector()) {
2160 auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
2161 Builder.buildCast(Dst0Reg, MIB);
2163 Builder.buildTrunc(Dst0Reg, SrcReg);
2164 MI.eraseFromParent();
2167 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
2168 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2169 "Expected an unmerge");
2170 Register Dst0Reg = MI.getOperand(0).getReg();
2171 LLT Dst0Ty = MRI.getType(Dst0Reg);
2172 // G_ZEXT on vector applies to each lane, so it will
2173 // affect all destinations. Therefore we won't be able
2174 // to simplify the unmerge to just the first definition.
2175 if (Dst0Ty.isVector())
2177 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2178 LLT SrcTy = MRI.getType(SrcReg);
2179 if (SrcTy.isVector())
2182 Register ZExtSrcReg;
2183 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2186 // Finally we can replace the first definition with
2187 // a zext of the source if the definition is big enough to hold
2188 // all of ZExtSrc bits.
2189 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2190 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2193 void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
2194 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2195 "Expected an unmerge");
2197 Register Dst0Reg = MI.getOperand(0).getReg();
2199 MachineInstr *ZExtInstr =
2200 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2201 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2202 "Expecting a G_ZEXT");
2204 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2205 LLT Dst0Ty = MRI.getType(Dst0Reg);
2206 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2208 Builder.setInstrAndDebugLoc(MI);
2210 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2211 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2213 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2214 "ZExt src doesn't fit in destination");
2215 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2219 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2221 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2222 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2224 MI.eraseFromParent();
2227 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
2228 unsigned TargetShiftSize,
2229 unsigned &ShiftVal) {
2230 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2231 MI.getOpcode() == TargetOpcode::G_LSHR ||
2232 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2234 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2235 if (Ty.isVector()) // TODO:
2238 // Don't narrow further than the requested size.
2239 unsigned Size = Ty.getSizeInBits();
2240 if (Size <= TargetShiftSize)
2244 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2248 ShiftVal = MaybeImmVal->Value.getSExtValue();
2249 return ShiftVal >= Size / 2 && ShiftVal < Size;
2252 void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
2253 const unsigned &ShiftVal) {
2254 Register DstReg = MI.getOperand(0).getReg();
2255 Register SrcReg = MI.getOperand(1).getReg();
2256 LLT Ty = MRI.getType(SrcReg);
2257 unsigned Size = Ty.getSizeInBits();
2258 unsigned HalfSize = Size / 2;
2259 assert(ShiftVal >= HalfSize);
2261 LLT HalfTy = LLT::scalar(HalfSize);
2263 Builder.setInstr(MI);
2264 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2265 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2267 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2268 Register Narrowed = Unmerge.getReg(1);
2270 // dst = G_LSHR s64:x, C for C >= 32
2272 // lo, hi = G_UNMERGE_VALUES x
2273 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2275 if (NarrowShiftAmt != 0) {
2276 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2277 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2280 auto Zero = Builder.buildConstant(HalfTy, 0);
2281 Builder.buildMerge(DstReg, { Narrowed, Zero });
2282 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2283 Register Narrowed = Unmerge.getReg(0);
2284 // dst = G_SHL s64:x, C for C >= 32
2286 // lo, hi = G_UNMERGE_VALUES x
2287 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2288 if (NarrowShiftAmt != 0) {
2289 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2290 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2293 auto Zero = Builder.buildConstant(HalfTy, 0);
2294 Builder.buildMerge(DstReg, { Zero, Narrowed });
2296 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2297 auto Hi = Builder.buildAShr(
2298 HalfTy, Unmerge.getReg(1),
2299 Builder.buildConstant(HalfTy, HalfSize - 1));
2301 if (ShiftVal == HalfSize) {
2302 // (G_ASHR i64:x, 32) ->
2303 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2304 Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
2305 } else if (ShiftVal == Size - 1) {
2306 // Don't need a second shift.
2307 // (G_ASHR i64:x, 63) ->
2308 // %narrowed = (G_ASHR hi_32(x), 31)
2309 // G_MERGE_VALUES %narrowed, %narrowed
2310 Builder.buildMerge(DstReg, { Hi, Hi });
2312 auto Lo = Builder.buildAShr(
2313 HalfTy, Unmerge.getReg(1),
2314 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2316 // (G_ASHR i64:x, C) ->, for C >= 32
2317 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2318 Builder.buildMerge(DstReg, { Lo, Hi });
2322 MI.eraseFromParent();
2325 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
2326 unsigned TargetShiftAmount) {
2328 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2329 applyCombineShiftToUnmerge(MI, ShiftAmt);
2336 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2337 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2338 Register DstReg = MI.getOperand(0).getReg();
2339 LLT DstTy = MRI.getType(DstReg);
2340 Register SrcReg = MI.getOperand(1).getReg();
2341 return mi_match(SrcReg, MRI,
2342 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2345 void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2346 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2347 Register DstReg = MI.getOperand(0).getReg();
2348 Builder.setInstr(MI);
2349 Builder.buildCopy(DstReg, Reg);
2350 MI.eraseFromParent();
2353 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2354 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2355 Register SrcReg = MI.getOperand(1).getReg();
2356 return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
2359 void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2360 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2361 Register DstReg = MI.getOperand(0).getReg();
2362 Builder.setInstr(MI);
2363 Builder.buildZExtOrTrunc(DstReg, Reg);
2364 MI.eraseFromParent();
2367 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2368 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2369 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2370 Register LHS = MI.getOperand(1).getReg();
2371 Register RHS = MI.getOperand(2).getReg();
2372 LLT IntTy = MRI.getType(LHS);
2374 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2376 PtrReg.second = false;
2377 for (Register SrcReg : {LHS, RHS}) {
2378 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2379 // Don't handle cases where the integer is implicitly converted to the
2381 LLT PtrTy = MRI.getType(PtrReg.first);
2382 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2386 PtrReg.second = true;
2392 void CombinerHelper::applyCombineAddP2IToPtrAdd(
2393 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2394 Register Dst = MI.getOperand(0).getReg();
2395 Register LHS = MI.getOperand(1).getReg();
2396 Register RHS = MI.getOperand(2).getReg();
2398 const bool DoCommute = PtrReg.second;
2400 std::swap(LHS, RHS);
2403 LLT PtrTy = MRI.getType(LHS);
2405 Builder.setInstrAndDebugLoc(MI);
2406 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2407 Builder.buildPtrToInt(Dst, PtrAdd);
2408 MI.eraseFromParent();
2411 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
2413 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2414 Register LHS = MI.getOperand(1).getReg();
2415 Register RHS = MI.getOperand(2).getReg();
2416 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2418 if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
2420 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2421 NewCst = Cst + *RHSCst;
2429 void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
2431 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2432 Register Dst = MI.getOperand(0).getReg();
2434 Builder.setInstrAndDebugLoc(MI);
2435 Builder.buildConstant(Dst, NewCst);
2436 MI.eraseFromParent();
2439 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2440 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2441 Register DstReg = MI.getOperand(0).getReg();
2442 Register SrcReg = MI.getOperand(1).getReg();
2443 LLT DstTy = MRI.getType(DstReg);
2444 return mi_match(SrcReg, MRI,
2445 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2448 bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
2449 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2450 Register DstReg = MI.getOperand(0).getReg();
2451 Register SrcReg = MI.getOperand(1).getReg();
2452 LLT DstTy = MRI.getType(DstReg);
2453 if (mi_match(SrcReg, MRI,
2454 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2455 unsigned DstSize = DstTy.getScalarSizeInBits();
2456 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2457 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2462 bool CombinerHelper::matchCombineExtOfExt(
2463 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2464 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2465 MI.getOpcode() == TargetOpcode::G_SEXT ||
2466 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2467 "Expected a G_[ASZ]EXT");
2468 Register SrcReg = MI.getOperand(1).getReg();
2469 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2470 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2471 unsigned Opc = MI.getOpcode();
2472 unsigned SrcOpc = SrcMI->getOpcode();
2473 if (Opc == SrcOpc ||
2474 (Opc == TargetOpcode::G_ANYEXT &&
2475 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2476 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2477 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2483 void CombinerHelper::applyCombineExtOfExt(
2484 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2485 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2486 MI.getOpcode() == TargetOpcode::G_SEXT ||
2487 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2488 "Expected a G_[ASZ]EXT");
2490 Register Reg = std::get<0>(MatchInfo);
2491 unsigned SrcExtOp = std::get<1>(MatchInfo);
2493 // Combine exts with the same opcode.
2494 if (MI.getOpcode() == SrcExtOp) {
2495 Observer.changingInstr(MI);
2496 MI.getOperand(1).setReg(Reg);
2497 Observer.changedInstr(MI);
2502 // - anyext([sz]ext x) to [sz]ext x
2503 // - sext(zext x) to zext x
2504 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2505 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2506 SrcExtOp == TargetOpcode::G_ZEXT)) {
2507 Register DstReg = MI.getOperand(0).getReg();
2508 Builder.setInstrAndDebugLoc(MI);
2509 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2510 MI.eraseFromParent();
2514 void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
2515 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2516 Register DstReg = MI.getOperand(0).getReg();
2517 Register SrcReg = MI.getOperand(1).getReg();
2518 LLT DstTy = MRI.getType(DstReg);
2520 Builder.setInstrAndDebugLoc(MI);
2521 Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
2523 MI.eraseFromParent();
2526 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
2527 assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
2528 Register SrcReg = MI.getOperand(1).getReg();
2529 return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
2532 bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2533 assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2534 Src = MI.getOperand(1).getReg();
2536 return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
2539 bool CombinerHelper::matchCombineTruncOfExt(
2540 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2541 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2542 Register SrcReg = MI.getOperand(1).getReg();
2543 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2544 unsigned SrcOpc = SrcMI->getOpcode();
2545 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2546 SrcOpc == TargetOpcode::G_ZEXT) {
2547 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2553 void CombinerHelper::applyCombineTruncOfExt(
2554 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2555 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2556 Register SrcReg = MatchInfo.first;
2557 unsigned SrcExtOp = MatchInfo.second;
2558 Register DstReg = MI.getOperand(0).getReg();
2559 LLT SrcTy = MRI.getType(SrcReg);
2560 LLT DstTy = MRI.getType(DstReg);
2561 if (SrcTy == DstTy) {
2562 MI.eraseFromParent();
2563 replaceRegWith(MRI, DstReg, SrcReg);
2566 Builder.setInstrAndDebugLoc(MI);
2567 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2568 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2570 Builder.buildTrunc(DstReg, SrcReg);
2571 MI.eraseFromParent();
2574 bool CombinerHelper::matchCombineTruncOfShl(
2575 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2576 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2577 Register DstReg = MI.getOperand(0).getReg();
2578 Register SrcReg = MI.getOperand(1).getReg();
2579 LLT DstTy = MRI.getType(DstReg);
2583 if (MRI.hasOneNonDBGUse(SrcReg) &&
2584 mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
2585 isLegalOrBeforeLegalizer(
2586 {TargetOpcode::G_SHL,
2587 {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
2588 KnownBits Known = KB->getKnownBits(ShiftAmt);
2589 unsigned Size = DstTy.getSizeInBits();
2590 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
2591 MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
2598 void CombinerHelper::applyCombineTruncOfShl(
2599 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2600 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2601 Register DstReg = MI.getOperand(0).getReg();
2602 Register SrcReg = MI.getOperand(1).getReg();
2603 LLT DstTy = MRI.getType(DstReg);
2604 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2606 Register ShiftSrc = MatchInfo.first;
2607 Register ShiftAmt = MatchInfo.second;
2608 Builder.setInstrAndDebugLoc(MI);
2609 auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
2610 Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
2611 MI.eraseFromParent();
2614 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
2615 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2616 return MO.isReg() &&
2617 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2621 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
2622 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2623 return !MO.isReg() ||
2624 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2628 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
2629 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2630 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2631 return all_of(Mask, [](int Elt) { return Elt < 0; });
2634 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
2635 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2636 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2640 bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
2641 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2642 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2646 bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
2647 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2648 if (auto MaybeCstCmp =
2649 getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
2650 OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
2656 bool CombinerHelper::eraseInst(MachineInstr &MI) {
2657 MI.eraseFromParent();
2661 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
2662 const MachineOperand &MOP2) {
2663 if (!MOP1.isReg() || !MOP2.isReg())
2665 MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
2668 MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
2672 // Handle a case like this:
2674 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2676 // Even though %0 and %1 are produced by the same instruction they are not
2679 return MOP1.getReg() == MOP2.getReg();
2681 // If we have an instruction which loads or stores, we can't guarantee that
2684 // For example, we may have
2686 // %x1 = G_LOAD %addr (load N from @somewhere)
2690 // %x2 = G_LOAD %addr (load N from @somewhere)
2692 // %or = G_OR %x1, %x2
2694 // It's possible that @foo will modify whatever lives at the address we're
2695 // loading from. To be safe, let's just assume that all loads and stores
2696 // are different (unless we have something which is guaranteed to not
2698 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
2701 // Check for physical registers on the instructions first to avoid cases
2704 // %a = COPY $physreg
2706 // SOMETHING implicit-def $physreg
2708 // %b = COPY $physreg
2710 // These copies are not equivalent.
2711 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2712 return MO.isReg() && MO.getReg().isPhysical();
2714 // Check if we have a case like this:
2716 // %a = COPY $physreg
2719 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2720 // From that, we know that they must have the same value, since they must
2721 // have come from the same COPY.
2722 return I1->isIdenticalTo(*I2);
2725 // We don't have any physical registers, so we don't necessarily need the
2728 // On the off-chance that there's some target instruction feeding into the
2729 // instruction, let's use produceSameValue instead of isIdenticalTo.
2730 return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
2733 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
2736 // MIPatternMatch doesn't let us look through G_ZEXT etc.
2737 auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
2738 return ValAndVReg && ValAndVReg->Value == C;
2741 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
2743 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2744 Register OldReg = MI.getOperand(0).getReg();
2745 Register Replacement = MI.getOperand(OpIdx).getReg();
2746 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2747 MI.eraseFromParent();
2748 replaceRegWith(MRI, OldReg, Replacement);
2752 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
2753 Register Replacement) {
2754 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2755 Register OldReg = MI.getOperand(0).getReg();
2756 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2757 MI.eraseFromParent();
2758 replaceRegWith(MRI, OldReg, Replacement);
2762 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
2763 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2764 // Match (cond ? x : x)
2765 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2766 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2770 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
2771 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2772 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2776 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
2777 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2778 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2782 bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
2783 MachineOperand &MO = MI.getOperand(OpIdx);
2784 return MO.isReg() &&
2785 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2788 bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
2790 MachineOperand &MO = MI.getOperand(OpIdx);
2791 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2794 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
2795 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2796 Builder.setInstr(MI);
2797 Builder.buildFConstant(MI.getOperand(0), C);
2798 MI.eraseFromParent();
2802 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2803 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2804 Builder.setInstr(MI);
2805 Builder.buildConstant(MI.getOperand(0), C);
2806 MI.eraseFromParent();
2810 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
2811 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2812 Builder.setInstr(MI);
2813 Builder.buildConstant(MI.getOperand(0), C);
2814 MI.eraseFromParent();
2818 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2819 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2820 Builder.setInstr(MI);
2821 Builder.buildUndef(MI.getOperand(0));
2822 MI.eraseFromParent();
2826 bool CombinerHelper::matchSimplifyAddToSub(
2827 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2828 Register LHS = MI.getOperand(1).getReg();
2829 Register RHS = MI.getOperand(2).getReg();
2830 Register &NewLHS = std::get<0>(MatchInfo);
2831 Register &NewRHS = std::get<1>(MatchInfo);
2833 // Helper lambda to check for opportunities for
2834 // ((0-A) + B) -> B - A
2835 // (A + (0-B)) -> A - B
2836 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2837 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2839 NewLHS = MaybeNewLHS;
2843 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2846 bool CombinerHelper::matchCombineInsertVecElts(
2847 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2848 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2850 Register DstReg = MI.getOperand(0).getReg();
2851 LLT DstTy = MRI.getType(DstReg);
2852 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2853 unsigned NumElts = DstTy.getNumElements();
2854 // If this MI is part of a sequence of insert_vec_elts, then
2855 // don't do the combine in the middle of the sequence.
2856 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2857 TargetOpcode::G_INSERT_VECTOR_ELT)
2859 MachineInstr *CurrInst = &MI;
2860 MachineInstr *TmpInst;
2863 MatchInfo.resize(NumElts);
2865 CurrInst->getOperand(0).getReg(), MRI,
2866 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2867 if (IntImm >= NumElts)
2869 if (!MatchInfo[IntImm])
2870 MatchInfo[IntImm] = TmpReg;
2874 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2876 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2877 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2878 if (!MatchInfo[I - 1].isValid())
2879 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2883 // If we didn't end in a G_IMPLICIT_DEF, bail out.
2884 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2887 void CombinerHelper::applyCombineInsertVecElts(
2888 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2889 Builder.setInstr(MI);
2891 auto GetUndef = [&]() {
2894 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2895 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2898 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2900 MatchInfo[I] = GetUndef();
2902 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2903 MI.eraseFromParent();
2906 void CombinerHelper::applySimplifyAddToSub(
2907 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2908 Builder.setInstr(MI);
2909 Register SubLHS, SubRHS;
2910 std::tie(SubLHS, SubRHS) = MatchInfo;
2911 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2912 MI.eraseFromParent();
2915 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2916 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2917 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2919 // Creates the new hand + logic instruction (but does not insert them.)
2921 // On success, MatchInfo is populated with the new instructions. These are
2922 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2923 unsigned LogicOpcode = MI.getOpcode();
2924 assert(LogicOpcode == TargetOpcode::G_AND ||
2925 LogicOpcode == TargetOpcode::G_OR ||
2926 LogicOpcode == TargetOpcode::G_XOR);
2927 MachineIRBuilder MIB(MI);
2928 Register Dst = MI.getOperand(0).getReg();
2929 Register LHSReg = MI.getOperand(1).getReg();
2930 Register RHSReg = MI.getOperand(2).getReg();
2932 // Don't recompute anything.
2933 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2936 // Make sure we have (hand x, ...), (hand y, ...)
2937 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2938 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2939 if (!LeftHandInst || !RightHandInst)
2941 unsigned HandOpcode = LeftHandInst->getOpcode();
2942 if (HandOpcode != RightHandInst->getOpcode())
2944 if (!LeftHandInst->getOperand(1).isReg() ||
2945 !RightHandInst->getOperand(1).isReg())
2948 // Make sure the types match up, and if we're doing this post-legalization,
2949 // we end up with legal types.
2950 Register X = LeftHandInst->getOperand(1).getReg();
2951 Register Y = RightHandInst->getOperand(1).getReg();
2952 LLT XTy = MRI.getType(X);
2953 LLT YTy = MRI.getType(Y);
2956 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
2959 // Optional extra source register.
2960 Register ExtraHandOpSrcReg;
2961 switch (HandOpcode) {
2964 case TargetOpcode::G_ANYEXT:
2965 case TargetOpcode::G_SEXT:
2966 case TargetOpcode::G_ZEXT: {
2967 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2970 case TargetOpcode::G_AND:
2971 case TargetOpcode::G_ASHR:
2972 case TargetOpcode::G_LSHR:
2973 case TargetOpcode::G_SHL: {
2974 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2975 MachineOperand &ZOp = LeftHandInst->getOperand(2);
2976 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
2978 ExtraHandOpSrcReg = ZOp.getReg();
2983 // Record the steps to build the new instructions.
2985 // Steps to build (logic x, y)
2986 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
2987 OperandBuildSteps LogicBuildSteps = {
2988 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
2989 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
2990 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
2991 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
2993 // Steps to build hand (logic x, y), ...z
2994 OperandBuildSteps HandBuildSteps = {
2995 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
2996 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
2997 if (ExtraHandOpSrcReg.isValid())
2998 HandBuildSteps.push_back(
2999 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3000 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3002 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3006 void CombinerHelper::applyBuildInstructionSteps(
3007 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
3008 assert(MatchInfo.InstrsToBuild.size() &&
3009 "Expected at least one instr to build?");
3010 Builder.setInstr(MI);
3011 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3012 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3013 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3014 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3015 for (auto &OperandFn : InstrToBuild.OperandFns)
3018 MI.eraseFromParent();
3021 bool CombinerHelper::matchAshrShlToSextInreg(
3022 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3023 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3024 int64_t ShlCst, AshrCst;
3026 // FIXME: detect splat constant vectors.
3027 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3028 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
3030 if (ShlCst != AshrCst)
3032 if (!isLegalOrBeforeLegalizer(
3033 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3035 MatchInfo = std::make_tuple(Src, ShlCst);
3039 void CombinerHelper::applyAshShlToSextInreg(
3040 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3041 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3044 std::tie(Src, ShiftAmt) = MatchInfo;
3045 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3046 Builder.setInstrAndDebugLoc(MI);
3047 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3048 MI.eraseFromParent();
3051 /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3052 bool CombinerHelper::matchOverlappingAnd(
3053 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3054 assert(MI.getOpcode() == TargetOpcode::G_AND);
3056 Register Dst = MI.getOperand(0).getReg();
3057 LLT Ty = MRI.getType(Dst);
3064 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3067 MatchInfo = [=](MachineIRBuilder &B) {
3069 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3072 auto Zero = B.buildConstant(Ty, 0);
3073 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3078 bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
3079 Register &Replacement) {
3082 // %y:_(sN) = G_SOMETHING
3083 // %x:_(sN) = G_SOMETHING
3084 // %res:_(sN) = G_AND %x, %y
3086 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3088 // Patterns like this can appear as a result of legalization. E.g.
3090 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3091 // %one:_(s32) = G_CONSTANT i32 1
3092 // %and:_(s32) = G_AND %cmp, %one
3094 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3095 assert(MI.getOpcode() == TargetOpcode::G_AND);
3099 Register AndDst = MI.getOperand(0).getReg();
3100 LLT DstTy = MRI.getType(AndDst);
3102 // FIXME: This should be removed once GISelKnownBits supports vectors.
3103 if (DstTy.isVector())
3106 Register LHS = MI.getOperand(1).getReg();
3107 Register RHS = MI.getOperand(2).getReg();
3108 KnownBits LHSBits = KB->getKnownBits(LHS);
3109 KnownBits RHSBits = KB->getKnownBits(RHS);
3111 // Check that x & Mask == x.
3112 // x & 1 == x, always
3113 // x & 0 == x, only if x is also 0
3114 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3116 // Check if we can replace AndDst with the LHS of the G_AND
3117 if (canReplaceReg(AndDst, LHS, MRI) &&
3118 (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3123 // Check if we can replace AndDst with the RHS of the G_AND
3124 if (canReplaceReg(AndDst, RHS, MRI) &&
3125 (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3133 bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
3136 // %y:_(sN) = G_SOMETHING
3137 // %x:_(sN) = G_SOMETHING
3138 // %res:_(sN) = G_OR %x, %y
3140 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3141 assert(MI.getOpcode() == TargetOpcode::G_OR);
3145 Register OrDst = MI.getOperand(0).getReg();
3146 LLT DstTy = MRI.getType(OrDst);
3148 // FIXME: This should be removed once GISelKnownBits supports vectors.
3149 if (DstTy.isVector())
3152 Register LHS = MI.getOperand(1).getReg();
3153 Register RHS = MI.getOperand(2).getReg();
3154 KnownBits LHSBits = KB->getKnownBits(LHS);
3155 KnownBits RHSBits = KB->getKnownBits(RHS);
3157 // Check that x | Mask == x.
3158 // x | 0 == x, always
3159 // x | 1 == x, only if x is also 1
3160 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3162 // Check if we can replace OrDst with the LHS of the G_OR
3163 if (canReplaceReg(OrDst, LHS, MRI) &&
3164 (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3169 // Check if we can replace OrDst with the RHS of the G_OR
3170 if (canReplaceReg(OrDst, RHS, MRI) &&
3171 (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3179 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
3180 // If the input is already sign extended, just drop the extension.
3181 Register Src = MI.getOperand(1).getReg();
3182 unsigned ExtBits = MI.getOperand(2).getImm();
3183 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3184 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3187 static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3188 int64_t Cst, bool IsVector, bool IsFP) {
3189 // For i1, Cst will always be -1 regardless of boolean contents.
3190 return (ScalarSizeBits == 1 && Cst == -1) ||
3191 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3194 bool CombinerHelper::matchNotCmp(MachineInstr &MI,
3195 SmallVectorImpl<Register> &RegsToNegate) {
3196 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3197 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3198 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3201 // We match xor(src, true) here.
3202 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3203 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3206 if (!MRI.hasOneNonDBGUse(XorSrc))
3209 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3210 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3211 // list of tree nodes to visit.
3212 RegsToNegate.push_back(XorSrc);
3213 // Remember whether the comparisons are all integer or all floating point.
3216 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3217 Register Reg = RegsToNegate[I];
3218 if (!MRI.hasOneNonDBGUse(Reg))
3220 MachineInstr *Def = MRI.getVRegDef(Reg);
3221 switch (Def->getOpcode()) {
3223 // Don't match if the tree contains anything other than ANDs, ORs and
3226 case TargetOpcode::G_ICMP:
3230 // When we apply the combine we will invert the predicate.
3232 case TargetOpcode::G_FCMP:
3236 // When we apply the combine we will invert the predicate.
3238 case TargetOpcode::G_AND:
3239 case TargetOpcode::G_OR:
3240 // Implement De Morgan's laws:
3241 // ~(x & y) -> ~x | ~y
3242 // ~(x | y) -> ~x & ~y
3243 // When we apply the combine we will change the opcode and recursively
3244 // negate the operands.
3245 RegsToNegate.push_back(Def->getOperand(1).getReg());
3246 RegsToNegate.push_back(Def->getOperand(2).getReg());
3251 // Now we know whether the comparisons are integer or floating point, check
3252 // the constant in the xor.
3254 if (Ty.isVector()) {
3255 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3256 auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
3259 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3262 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3264 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3271 void CombinerHelper::applyNotCmp(MachineInstr &MI,
3272 SmallVectorImpl<Register> &RegsToNegate) {
3273 for (Register Reg : RegsToNegate) {
3274 MachineInstr *Def = MRI.getVRegDef(Reg);
3275 Observer.changingInstr(*Def);
3276 // For each comparison, invert the opcode. For each AND and OR, change the
3278 switch (Def->getOpcode()) {
3280 llvm_unreachable("Unexpected opcode");
3281 case TargetOpcode::G_ICMP:
3282 case TargetOpcode::G_FCMP: {
3283 MachineOperand &PredOp = Def->getOperand(1);
3284 CmpInst::Predicate NewP = CmpInst::getInversePredicate(
3285 (CmpInst::Predicate)PredOp.getPredicate());
3286 PredOp.setPredicate(NewP);
3289 case TargetOpcode::G_AND:
3290 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3292 case TargetOpcode::G_OR:
3293 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3296 Observer.changedInstr(*Def);
3299 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3300 MI.eraseFromParent();
3303 bool CombinerHelper::matchXorOfAndWithSameReg(
3304 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3305 // Match (xor (and x, y), y) (or any of its commuted cases)
3306 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3307 Register &X = MatchInfo.first;
3308 Register &Y = MatchInfo.second;
3309 Register AndReg = MI.getOperand(1).getReg();
3310 Register SharedReg = MI.getOperand(2).getReg();
3312 // Find a G_AND on either side of the G_XOR.
3315 // (xor (and x, y), SharedReg)
3316 // (xor SharedReg, (and x, y))
3317 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3318 std::swap(AndReg, SharedReg);
3319 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3323 // Only do this if we'll eliminate the G_AND.
3324 if (!MRI.hasOneNonDBGUse(AndReg))
3327 // We can combine if SharedReg is the same as either the LHS or RHS of the
3331 return Y == SharedReg;
3334 void CombinerHelper::applyXorOfAndWithSameReg(
3335 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3336 // Fold (xor (and x, y), y) -> (and (not x), y)
3337 Builder.setInstrAndDebugLoc(MI);
3339 std::tie(X, Y) = MatchInfo;
3340 auto Not = Builder.buildNot(MRI.getType(X), X);
3341 Observer.changingInstr(MI);
3342 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3343 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3344 MI.getOperand(2).setReg(Y);
3345 Observer.changedInstr(MI);
3348 bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
3349 Register DstReg = MI.getOperand(0).getReg();
3350 LLT Ty = MRI.getType(DstReg);
3351 const DataLayout &DL = Builder.getMF().getDataLayout();
3353 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3356 if (Ty.isPointer()) {
3357 auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
3358 return ConstVal && *ConstVal == 0;
3361 assert(Ty.isVector() && "Expecting a vector type");
3362 const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
3363 return isBuildVectorAllZeros(*VecMI, MRI);
3366 void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
3367 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
3368 Builder.setInstrAndDebugLoc(MI);
3369 Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
3370 MI.eraseFromParent();
3373 /// The second source operand is known to be a power of 2.
3374 void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
3375 Register DstReg = MI.getOperand(0).getReg();
3376 Register Src0 = MI.getOperand(1).getReg();
3377 Register Pow2Src1 = MI.getOperand(2).getReg();
3378 LLT Ty = MRI.getType(DstReg);
3379 Builder.setInstrAndDebugLoc(MI);
3381 // Fold (urem x, pow2) -> (and x, pow2-1)
3382 auto NegOne = Builder.buildConstant(Ty, -1);
3383 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3384 Builder.buildAnd(DstReg, Src0, Add);
3385 MI.eraseFromParent();
3388 Optional<SmallVector<Register, 8>>
3389 CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3390 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3391 // We want to detect if Root is part of a tree which represents a bunch
3392 // of loads being merged into a larger load. We'll try to recognize patterns
3393 // like, for example:
3412 // Each "Reg" may have been produced by a load + some arithmetic. This
3413 // function will save each of them.
3414 SmallVector<Register, 8> RegsToVisit;
3415 SmallVector<const MachineInstr *, 7> Ors = {Root};
3417 // In the "worst" case, we're dealing with a load for each byte. So, there
3418 // are at most #bytes - 1 ORs.
3419 const unsigned MaxIter =
3420 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3421 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3424 const MachineInstr *Curr = Ors.pop_back_val();
3425 Register OrLHS = Curr->getOperand(1).getReg();
3426 Register OrRHS = Curr->getOperand(2).getReg();
3428 // In the combine, we want to elimate the entire tree.
3429 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3432 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3433 // something that may be a load + arithmetic.
3434 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3437 RegsToVisit.push_back(OrLHS);
3438 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3441 RegsToVisit.push_back(OrRHS);
3444 // We're going to try and merge each register into a wider power-of-2 type,
3445 // so we ought to have an even number of registers.
3446 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3451 /// Helper function for findLoadOffsetsForLoadOrCombine.
3453 /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3454 /// and then moving that value into a specific byte offset.
3458 /// \returns The load instruction and the byte offset it is moved into.
3459 static Optional<std::pair<GZExtLoad *, int64_t>>
3460 matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3461 const MachineRegisterInfo &MRI) {
3462 assert(MRI.hasOneNonDBGUse(Reg) &&
3463 "Expected Reg to only have one non-debug use?");
3466 if (!mi_match(Reg, MRI,
3467 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3472 if (Shift % MemSizeInBits != 0)
3475 // TODO: Handle other types of loads.
3476 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3480 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3483 return std::make_pair(Load, Shift / MemSizeInBits);
3486 Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3487 CombinerHelper::findLoadOffsetsForLoadOrCombine(
3488 SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
3489 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3491 // Each load found for the pattern. There should be one for each RegsToVisit.
3492 SmallSetVector<const MachineInstr *, 8> Loads;
3494 // The lowest index used in any load. (The lowest "i" for each x[i].)
3495 int64_t LowestIdx = INT64_MAX;
3497 // The load which uses the lowest index.
3498 GZExtLoad *LowestIdxLoad = nullptr;
3500 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3501 SmallSet<int64_t, 8> SeenIdx;
3503 // Ensure each load is in the same MBB.
3504 // TODO: Support multiple MachineBasicBlocks.
3505 MachineBasicBlock *MBB = nullptr;
3506 const MachineMemOperand *MMO = nullptr;
3508 // Earliest instruction-order load in the pattern.
3509 GZExtLoad *EarliestLoad = nullptr;
3511 // Latest instruction-order load in the pattern.
3512 GZExtLoad *LatestLoad = nullptr;
3514 // Base pointer which every load should share.
3517 // We want to find a load for each register. Each load should have some
3518 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3519 // track of the load which uses the lowest index. Later, we will check if we
3520 // can use its pointer in the final, combined load.
3521 for (auto Reg : RegsToVisit) {
3522 // Find the load, and find the position that it will end up in (e.g. a
3524 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3529 std::tie(Load, DstPos) = *LoadAndPos;
3531 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3532 // it is difficult to check for stores/calls/etc between loads.
3533 MachineBasicBlock *LoadMBB = Load->getParent();
3539 // Make sure that the MachineMemOperands of every seen load are compatible.
3540 auto &LoadMMO = Load->getMMO();
3543 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3546 // Find out what the base pointer and index for the load is.
3549 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3550 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3551 LoadPtr = Load->getOperand(1).getReg();
3555 // Don't combine things like a[i], a[i] -> a bigger load.
3556 if (!SeenIdx.insert(Idx).second)
3559 // Every load must share the same base pointer; don't combine things like:
3561 // a[i], b[i + 1] -> a bigger load.
3562 if (!BasePtr.isValid())
3564 if (BasePtr != LoadPtr)
3567 if (Idx < LowestIdx) {
3569 LowestIdxLoad = Load;
3572 // Keep track of the byte offset that this load ends up at. If we have seen
3573 // the byte offset, then stop here. We do not want to combine:
3575 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3576 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3580 // Keep track of the position of the earliest/latest loads in the pattern.
3581 // We will check that there are no load fold barriers between them later
3584 // FIXME: Is there a better way to check for load fold barriers?
3585 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3586 EarliestLoad = Load;
3587 if (!LatestLoad || dominates(*LatestLoad, *Load))
3591 // We found a load for each register. Let's check if each load satisfies the
3593 assert(Loads.size() == RegsToVisit.size() &&
3594 "Expected to find a load for each register?");
3595 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3596 LatestLoad && "Expected at least two loads?");
3598 // Check if there are any stores, calls, etc. between any of the loads. If
3599 // there are, then we can't safely perform the combine.
3601 // MaxIter is chosen based off the (worst case) number of iterations it
3602 // typically takes to succeed in the LLVM test suite plus some padding.
3604 // FIXME: Is there a better way to check for load fold barriers?
3605 const unsigned MaxIter = 20;
3607 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3608 LatestLoad->getIterator())) {
3609 if (Loads.count(&MI))
3611 if (MI.isLoadFoldBarrier())
3613 if (Iter++ == MaxIter)
3617 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3620 bool CombinerHelper::matchLoadOrCombine(
3621 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3622 assert(MI.getOpcode() == TargetOpcode::G_OR);
3623 MachineFunction &MF = *MI.getMF();
3624 // Assuming a little-endian target, transform:
3626 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3628 // s32 val = *((i32)a)
3631 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3633 // s32 val = BSWAP(*((s32)a))
3634 Register Dst = MI.getOperand(0).getReg();
3635 LLT Ty = MRI.getType(Dst);
3639 // We need to combine at least two loads into this type. Since the smallest
3640 // possible load is into a byte, we need at least a 16-bit wide type.
3641 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3642 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3645 // Match a collection of non-OR instructions in the pattern.
3646 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3650 // We have a collection of non-OR instructions. Figure out how wide each of
3651 // the small loads should be based off of the number of potential loads we
3653 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3654 if (NarrowMemSizeInBits % 8 != 0)
3657 // Check if each register feeding into each OR is a load from the same
3658 // base pointer + some arithmetic.
3660 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3662 // Also verify that each of these ends up putting a[i] into the same memory
3663 // offset as a load into a wide type would.
3664 SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
3665 GZExtLoad *LowestIdxLoad, *LatestLoad;
3667 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3668 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3671 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3673 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3674 // we found before, check if this corresponds to a big or little endian byte
3675 // pattern. If it does, then we can represent it using a load + possibly a
3677 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3678 Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3679 if (!IsBigEndian.hasValue())
3681 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3682 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3685 // Make sure that the load from the lowest index produces offset 0 in the
3688 // This ensures that we won't combine something like this:
3690 // load x[i] -> byte 2
3691 // load x[i+1] -> byte 0 ---> wide_load x[i]
3692 // load x[i+2] -> byte 1
3693 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3694 const unsigned ZeroByteOffset =
3696 ? bigEndianByteAt(NumLoadsInTy, 0)
3697 : littleEndianByteAt(NumLoadsInTy, 0);
3698 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3699 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3700 ZeroOffsetIdx->second != LowestIdx)
3703 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3704 // may not use index 0.
3705 Register Ptr = LowestIdxLoad->getPointerReg();
3706 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3707 LegalityQuery::MemDesc MMDesc;
3708 MMDesc.MemoryTy = Ty;
3709 MMDesc.AlignInBits = MMO.getAlign().value() * 8;
3710 MMDesc.Ordering = MMO.getSuccessOrdering();
3711 if (!isLegalOrBeforeLegalizer(
3712 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3714 auto PtrInfo = MMO.getPointerInfo();
3715 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3717 // Load must be allowed and fast on the target.
3718 LLVMContext &C = MF.getFunction().getContext();
3719 auto &DL = MF.getDataLayout();
3721 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3725 MatchInfo = [=](MachineIRBuilder &MIB) {
3726 MIB.setInstrAndDebugLoc(*LatestLoad);
3727 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3728 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3730 MIB.buildBSwap(Dst, LoadDst);
3735 bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
3736 MachineInstr *&ExtMI) {
3737 assert(MI.getOpcode() == TargetOpcode::G_PHI);
3739 Register DstReg = MI.getOperand(0).getReg();
3741 // TODO: Extending a vector may be expensive, don't do this until heuristics
3743 if (MRI.getType(DstReg).isVector())
3746 // Try to match a phi, whose only use is an extend.
3747 if (!MRI.hasOneNonDBGUse(DstReg))
3749 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3750 switch (ExtMI->getOpcode()) {
3751 case TargetOpcode::G_ANYEXT:
3752 return true; // G_ANYEXT is usually free.
3753 case TargetOpcode::G_ZEXT:
3754 case TargetOpcode::G_SEXT:
3760 // If the target is likely to fold this extend away, don't propagate.
3761 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
3764 // We don't want to propagate the extends unless there's a good chance that
3765 // they'll be optimized in some way.
3766 // Collect the unique incoming values.
3767 SmallPtrSet<MachineInstr *, 4> InSrcs;
3768 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
3769 auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
3770 switch (DefMI->getOpcode()) {
3771 case TargetOpcode::G_LOAD:
3772 case TargetOpcode::G_TRUNC:
3773 case TargetOpcode::G_SEXT:
3774 case TargetOpcode::G_ZEXT:
3775 case TargetOpcode::G_ANYEXT:
3776 case TargetOpcode::G_CONSTANT:
3777 InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
3778 // Don't try to propagate if there are too many places to create new
3779 // extends, chances are it'll increase code size.
3780 if (InSrcs.size() > 2)
3790 void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
3791 MachineInstr *&ExtMI) {
3792 assert(MI.getOpcode() == TargetOpcode::G_PHI);
3793 Register DstReg = ExtMI->getOperand(0).getReg();
3794 LLT ExtTy = MRI.getType(DstReg);
3796 // Propagate the extension into the block of each incoming reg's block.
3797 // Use a SetVector here because PHIs can have duplicate edges, and we want
3798 // deterministic iteration order.
3799 SmallSetVector<MachineInstr *, 8> SrcMIs;
3800 SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
3801 for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
3802 auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
3803 if (!SrcMIs.insert(SrcMI))
3806 // Build an extend after each src inst.
3807 auto *MBB = SrcMI->getParent();
3808 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3809 if (InsertPt != MBB->end() && InsertPt->isPHI())
3810 InsertPt = MBB->getFirstNonPHI();
3812 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3813 Builder.setDebugLoc(MI.getDebugLoc());
3814 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
3815 SrcMI->getOperand(0).getReg());
3816 OldToNewSrcMap[SrcMI] = NewExt;
3819 // Create a new phi with the extended inputs.
3820 Builder.setInstrAndDebugLoc(MI);
3821 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3822 NewPhi.addDef(DstReg);
3823 for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
3824 auto &MO = MI.getOperand(SrcIdx);
3826 NewPhi.addMBB(MO.getMBB());
3829 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3830 NewPhi.addUse(NewSrc->getOperand(0).getReg());
3832 Builder.insertInstr(NewPhi);
3833 ExtMI->eraseFromParent();
3836 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
3838 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3839 // If we have a constant index, look for a G_BUILD_VECTOR source
3840 // and find the source register that the index maps to.
3841 Register SrcVec = MI.getOperand(1).getReg();
3842 LLT SrcTy = MRI.getType(SrcVec);
3843 if (!isLegalOrBeforeLegalizer(
3844 {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
3847 auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3848 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3851 unsigned VecIdx = Cst->Value.getZExtValue();
3852 MachineInstr *BuildVecMI =
3853 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
3855 BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
3858 LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
3859 if (!isLegalOrBeforeLegalizer(
3860 {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
3864 EVT Ty(getMVTForLLT(SrcTy));
3865 if (!MRI.hasOneNonDBGUse(SrcVec) &&
3866 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3869 Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
3873 void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
3875 // Check the type of the register, since it may have come from a
3876 // G_BUILD_VECTOR_TRUNC.
3877 LLT ScalarTy = MRI.getType(Reg);
3878 Register DstReg = MI.getOperand(0).getReg();
3879 LLT DstTy = MRI.getType(DstReg);
3881 Builder.setInstrAndDebugLoc(MI);
3882 if (ScalarTy != DstTy) {
3883 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
3884 Builder.buildTrunc(DstReg, Reg);
3885 MI.eraseFromParent();
3888 replaceSingleDefInstWithReg(MI, Reg);
3891 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
3893 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3894 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3895 // This combine tries to find build_vector's which have every source element
3896 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
3897 // the masked load scalarization is run late in the pipeline. There's already
3898 // a combine for a similar pattern starting from the extract, but that
3899 // doesn't attempt to do it if there are multiple uses of the build_vector,
3900 // which in this case is true. Starting the combine from the build_vector
3901 // feels more natural than trying to find sibling nodes of extracts.
3903 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
3904 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
3905 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
3906 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
3907 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
3909 // replace ext{1,2,3,4} with %s{1,2,3,4}
3911 Register DstReg = MI.getOperand(0).getReg();
3912 LLT DstTy = MRI.getType(DstReg);
3913 unsigned NumElts = DstTy.getNumElements();
3915 SmallBitVector ExtractedElts(NumElts);
3916 for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
3917 MRI.use_instr_nodbg_end())) {
3918 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
3920 auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
3923 unsigned Idx = Cst.getValue().getZExtValue();
3925 return false; // Out of range.
3926 ExtractedElts.set(Idx);
3927 SrcDstPairs.emplace_back(
3928 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
3930 // Match if every element was extracted.
3931 return ExtractedElts.all();
3934 void CombinerHelper::applyExtractAllEltsFromBuildVector(
3936 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3937 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3938 for (auto &Pair : SrcDstPairs) {
3939 auto *ExtMI = Pair.second;
3940 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
3941 ExtMI->eraseFromParent();
3943 MI.eraseFromParent();
3946 void CombinerHelper::applyBuildFn(
3947 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3948 Builder.setInstrAndDebugLoc(MI);
3950 MI.eraseFromParent();
3953 void CombinerHelper::applyBuildFnNoErase(
3954 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3955 Builder.setInstrAndDebugLoc(MI);
3959 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
3960 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
3961 unsigned Opc = MI.getOpcode();
3962 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3963 Register X = MI.getOperand(1).getReg();
3964 Register Y = MI.getOperand(2).getReg();
3967 unsigned RotateOpc =
3968 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
3969 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
3972 void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
3973 unsigned Opc = MI.getOpcode();
3974 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3975 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
3976 Observer.changingInstr(MI);
3977 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
3978 : TargetOpcode::G_ROTR));
3979 MI.RemoveOperand(2);
3980 Observer.changedInstr(MI);
3983 // Fold (rot x, c) -> (rot x, c % BitSize)
3984 bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
3985 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
3986 MI.getOpcode() == TargetOpcode::G_ROTR);
3988 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3989 Register AmtReg = MI.getOperand(2).getReg();
3990 bool OutOfRange = false;
3991 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
3992 if (auto *CI = dyn_cast<ConstantInt>(C))
3993 OutOfRange |= CI->getValue().uge(Bitsize);
3996 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
3999 void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
4000 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4001 MI.getOpcode() == TargetOpcode::G_ROTR);
4003 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4004 Builder.setInstrAndDebugLoc(MI);
4005 Register Amt = MI.getOperand(2).getReg();
4006 LLT AmtTy = MRI.getType(Amt);
4007 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4008 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4009 Observer.changingInstr(MI);
4010 MI.getOperand(2).setReg(Amt);
4011 Observer.changedInstr(MI);
4014 bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
4015 int64_t &MatchInfo) {
4016 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4017 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4018 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4019 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4020 Optional<bool> KnownVal;
4023 llvm_unreachable("Unexpected G_ICMP predicate?");
4024 case CmpInst::ICMP_EQ:
4025 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4027 case CmpInst::ICMP_NE:
4028 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4030 case CmpInst::ICMP_SGE:
4031 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4033 case CmpInst::ICMP_SGT:
4034 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4036 case CmpInst::ICMP_SLE:
4037 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4039 case CmpInst::ICMP_SLT:
4040 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4042 case CmpInst::ICMP_UGE:
4043 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4045 case CmpInst::ICMP_UGT:
4046 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4048 case CmpInst::ICMP_ULE:
4049 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4051 case CmpInst::ICMP_ULT:
4052 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4059 ? getICmpTrueVal(getTargetLowering(),
4061 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4067 /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4068 bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
4069 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4070 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4071 Register Dst = MI.getOperand(0).getReg();
4072 Register Src = MI.getOperand(1).getReg();
4073 LLT Ty = MRI.getType(Src);
4074 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4075 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4077 int64_t Width = MI.getOperand(2).getImm();
4082 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4083 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4085 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4088 MatchInfo = [=](MachineIRBuilder &B) {
4089 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4090 auto Cst2 = B.buildConstant(ExtractTy, Width);
4091 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4096 /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4097 bool CombinerHelper::matchBitfieldExtractFromAnd(
4098 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4099 assert(MI.getOpcode() == TargetOpcode::G_AND);
4100 Register Dst = MI.getOperand(0).getReg();
4101 LLT Ty = MRI.getType(Dst);
4102 if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
4103 TargetOpcode::G_UBFX, Ty, Ty))
4106 int64_t AndImm, LSBImm;
4108 const unsigned Size = Ty.getScalarSizeInBits();
4109 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4110 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4114 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4115 auto MaybeMask = static_cast<uint64_t>(AndImm);
4116 if (MaybeMask & (MaybeMask + 1))
4119 // LSB must fit within the register.
4120 if (static_cast<uint64_t>(LSBImm) >= Size)
4123 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4124 uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
4125 MatchInfo = [=](MachineIRBuilder &B) {
4126 auto WidthCst = B.buildConstant(ExtractTy, Width);
4127 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4128 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4133 bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4134 MachineInstr &PtrAdd) {
4135 assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
4137 Register Src1Reg = PtrAdd.getOperand(1).getReg();
4138 MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
4142 Register Src2Reg = PtrAdd.getOperand(2).getReg();
4144 if (MRI.hasOneNonDBGUse(Src1Reg))
4147 auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
4150 auto C2 = getConstantVRegVal(Src2Reg, MRI);
4154 const APInt &C1APIntVal = *C1;
4155 const APInt &C2APIntVal = *C2;
4156 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4158 for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
4159 // This combine may end up running before ptrtoint/inttoptr combines
4160 // manage to eliminate redundant conversions, so try to look through them.
4161 MachineInstr *ConvUseMI = &UseMI;
4162 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4163 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4164 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4165 Register DefReg = ConvUseMI->getOperand(0).getReg();
4166 if (!MRI.hasOneNonDBGUse(DefReg))
4168 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4169 ConvUseOpc = ConvUseMI->getOpcode();
4171 auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
4172 ConvUseOpc == TargetOpcode::G_STORE;
4175 // Is x[offset2] already not a legal addressing mode? If so then
4176 // reassociating the constants breaks nothing (we test offset2 because
4177 // that's the one we hope to fold into the load or store).
4178 TargetLoweringBase::AddrMode AM;
4179 AM.HasBaseReg = true;
4180 AM.BaseOffs = C2APIntVal.getSExtValue();
4182 MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
4184 getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
4185 PtrAdd.getMF()->getFunction().getContext());
4186 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4187 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4191 // Would x[offset1+offset2] still be a legal addressing mode?
4192 AM.BaseOffs = CombinedValue;
4193 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4201 bool CombinerHelper::matchReassocPtrAdd(
4202 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4203 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
4204 // We're trying to match a few pointer computation patterns here for
4205 // re-association opportunities.
4206 // 1) Isolating a constant operand to be on the RHS, e.g.:
4207 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4209 // 2) Folding two constants in each sub-tree as long as such folding
4210 // doesn't break a legal addressing mode.
4211 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4212 Register Src1Reg = MI.getOperand(1).getReg();
4213 Register Src2Reg = MI.getOperand(2).getReg();
4214 MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
4215 MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
4217 if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
4218 // Try to match example 1).
4219 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4221 auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4225 MatchInfo = [=,&MI](MachineIRBuilder &B) {
4226 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4229 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4230 Observer.changingInstr(MI);
4231 MI.getOperand(1).setReg(NewBase.getReg(0));
4232 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4233 Observer.changedInstr(MI);
4236 // Try to match example 2.
4237 Register LHSSrc1 = LHS->getOperand(1).getReg();
4238 Register LHSSrc2 = LHS->getOperand(2).getReg();
4239 auto C1 = getConstantVRegVal(LHSSrc2, MRI);
4242 auto C2 = getConstantVRegVal(Src2Reg, MRI);
4246 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4247 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4248 Observer.changingInstr(MI);
4249 MI.getOperand(1).setReg(LHSSrc1);
4250 MI.getOperand(2).setReg(NewCst.getReg(0));
4251 Observer.changedInstr(MI);
4254 return !reassociationCanBreakAddressingModePattern(MI);
4257 bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
4258 Register Op1 = MI.getOperand(1).getReg();
4259 Register Op2 = MI.getOperand(2).getReg();
4260 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4263 MatchInfo = *MaybeCst;
4267 bool CombinerHelper::tryCombine(MachineInstr &MI) {
4268 if (tryCombineCopy(MI))
4270 if (tryCombineExtendingLoads(MI))
4272 if (tryCombineIndexedLoadStore(MI))