1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
8 // This file contains some helper functions which try to cleanup artifacts
9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make
10 // the types match. This file also contains some combines of merges that happens
11 // at the end of the legalization.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
15 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
16 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/Utils.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/Support/Debug.h"
22 #define DEBUG_TYPE "legalizer"
23 using namespace llvm::MIPatternMatch;
26 class LegalizationArtifactCombiner {
27 MachineIRBuilder &Builder;
28 MachineRegisterInfo &MRI;
29 const LegalizerInfo &LI;
31 static bool isArtifactCast(unsigned Opc) {
33 case TargetOpcode::G_TRUNC:
34 case TargetOpcode::G_SEXT:
35 case TargetOpcode::G_ZEXT:
36 case TargetOpcode::G_ANYEXT:
44 LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
45 const LegalizerInfo &LI)
46 : Builder(B), MRI(MRI), LI(LI) {}
48 bool tryCombineAnyExt(MachineInstr &MI,
49 SmallVectorImpl<MachineInstr *> &DeadInsts,
50 SmallVectorImpl<Register> &UpdatedDefs) {
51 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
53 Builder.setInstrAndDebugLoc(MI);
54 Register DstReg = MI.getOperand(0).getReg();
55 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
57 // aext(trunc x) - > aext/copy/trunc x
59 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
60 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
61 Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
62 UpdatedDefs.push_back(DstReg);
63 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
67 // aext([asz]ext x) -> [asz]ext x
70 if (mi_match(SrcReg, MRI,
71 m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
72 m_GSExt(m_Reg(ExtSrc)),
73 m_GZExt(m_Reg(ExtSrc)))))) {
74 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
75 UpdatedDefs.push_back(DstReg);
76 markInstAndDefDead(MI, *ExtMI, DeadInsts);
80 // Try to fold aext(g_constant) when the larger constant type is legal.
81 // Can't use MIPattern because we don't have a specific constant in mind.
82 auto *SrcMI = MRI.getVRegDef(SrcReg);
83 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
84 const LLT DstTy = MRI.getType(DstReg);
85 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
86 auto &CstVal = SrcMI->getOperand(1);
87 Builder.buildConstant(
88 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
89 UpdatedDefs.push_back(DstReg);
90 markInstAndDefDead(MI, *SrcMI, DeadInsts);
94 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
97 bool tryCombineZExt(MachineInstr &MI,
98 SmallVectorImpl<MachineInstr *> &DeadInsts,
99 SmallVectorImpl<Register> &UpdatedDefs,
100 GISelObserverWrapper &Observer) {
101 assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
103 Builder.setInstrAndDebugLoc(MI);
104 Register DstReg = MI.getOperand(0).getReg();
105 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
107 // zext(trunc x) - > and (aext/copy/trunc x), mask
109 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
110 LLT DstTy = MRI.getType(DstReg);
111 if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
112 isConstantUnsupported(DstTy))
114 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
115 LLT SrcTy = MRI.getType(SrcReg);
116 APInt Mask = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
117 auto MIBMask = Builder.buildConstant(
118 DstTy, Mask.zext(DstTy.getScalarSizeInBits()));
119 Builder.buildAnd(DstReg, Builder.buildAnyExtOrTrunc(DstTy, TruncSrc),
121 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
125 // zext(zext x) -> (zext x)
127 if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
128 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
129 Observer.changingInstr(MI);
130 MI.getOperand(1).setReg(ZextSrc);
131 Observer.changedInstr(MI);
132 UpdatedDefs.push_back(DstReg);
133 markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
137 // Try to fold zext(g_constant) when the larger constant type is legal.
138 // Can't use MIPattern because we don't have a specific constant in mind.
139 auto *SrcMI = MRI.getVRegDef(SrcReg);
140 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
141 const LLT DstTy = MRI.getType(DstReg);
142 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
143 auto &CstVal = SrcMI->getOperand(1);
144 Builder.buildConstant(
145 DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
146 UpdatedDefs.push_back(DstReg);
147 markInstAndDefDead(MI, *SrcMI, DeadInsts);
151 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
154 bool tryCombineSExt(MachineInstr &MI,
155 SmallVectorImpl<MachineInstr *> &DeadInsts,
156 SmallVectorImpl<Register> &UpdatedDefs) {
157 assert(MI.getOpcode() == TargetOpcode::G_SEXT);
159 Builder.setInstrAndDebugLoc(MI);
160 Register DstReg = MI.getOperand(0).getReg();
161 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
163 // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
165 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
166 LLT DstTy = MRI.getType(DstReg);
167 if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
169 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
170 LLT SrcTy = MRI.getType(SrcReg);
171 uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
173 TargetOpcode::G_SEXT_INREG, {DstReg},
174 {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits});
175 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
179 // sext(zext x) -> (zext x)
180 // sext(sext x) -> (sext x)
183 if (mi_match(SrcReg, MRI,
184 m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
185 m_GSExt(m_Reg(ExtSrc)))))) {
186 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
187 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
188 UpdatedDefs.push_back(DstReg);
189 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
193 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
196 bool tryCombineTrunc(MachineInstr &MI,
197 SmallVectorImpl<MachineInstr *> &DeadInsts,
198 SmallVectorImpl<Register> &UpdatedDefs,
199 GISelObserverWrapper &Observer) {
200 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
202 Builder.setInstr(MI);
203 Register DstReg = MI.getOperand(0).getReg();
204 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
206 // Try to fold trunc(g_constant) when the smaller constant type is legal.
207 // Can't use MIPattern because we don't have a specific constant in mind.
208 auto *SrcMI = MRI.getVRegDef(SrcReg);
209 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
210 const LLT DstTy = MRI.getType(DstReg);
211 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
212 auto &CstVal = SrcMI->getOperand(1);
213 Builder.buildConstant(
214 DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
215 UpdatedDefs.push_back(DstReg);
216 markInstAndDefDead(MI, *SrcMI, DeadInsts);
221 // Try to fold trunc(merge) to directly use the source of the merge.
222 // This gets rid of large, difficult to legalize, merges
223 if (SrcMI->getOpcode() == TargetOpcode::G_MERGE_VALUES) {
224 const Register MergeSrcReg = SrcMI->getOperand(1).getReg();
225 const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
226 const LLT DstTy = MRI.getType(DstReg);
228 // We can only fold if the types are scalar
229 const unsigned DstSize = DstTy.getSizeInBits();
230 const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
231 if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
234 if (DstSize < MergeSrcSize) {
235 // When the merge source is larger than the destination, we can just
236 // truncate the merge source directly
237 if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
240 LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
243 Builder.buildTrunc(DstReg, MergeSrcReg);
244 UpdatedDefs.push_back(DstReg);
245 } else if (DstSize == MergeSrcSize) {
246 // If the sizes match we can simply try to replace the register
248 dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
250 replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
252 } else if (DstSize % MergeSrcSize == 0) {
253 // If the trunc size is a multiple of the merge source size we can use
254 // a smaller merge instead
255 if (isInstUnsupported(
256 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
260 dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
263 const unsigned NumSrcs = DstSize / MergeSrcSize;
264 assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
265 "trunc(merge) should require less inputs than merge");
266 SmallVector<Register, 8> SrcRegs(NumSrcs);
267 for (unsigned i = 0; i < NumSrcs; ++i)
268 SrcRegs[i] = SrcMI->getOperand(i + 1).getReg();
270 Builder.buildMerge(DstReg, SrcRegs);
271 UpdatedDefs.push_back(DstReg);
277 markInstAndDefDead(MI, *SrcMI, DeadInsts);
281 // trunc(trunc) -> trunc
283 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
284 // Always combine trunc(trunc) since the eventual resulting trunc must be
285 // legal anyway as it must be legal for all outputs of the consumer type
287 LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI);
289 Builder.buildTrunc(DstReg, TruncSrc);
290 UpdatedDefs.push_back(DstReg);
291 markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts);
298 /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
299 bool tryFoldImplicitDef(MachineInstr &MI,
300 SmallVectorImpl<MachineInstr *> &DeadInsts,
301 SmallVectorImpl<Register> &UpdatedDefs) {
302 unsigned Opcode = MI.getOpcode();
303 assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
304 Opcode == TargetOpcode::G_SEXT);
306 if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
307 MI.getOperand(1).getReg(), MRI)) {
308 Builder.setInstr(MI);
309 Register DstReg = MI.getOperand(0).getReg();
310 LLT DstTy = MRI.getType(DstReg);
312 if (Opcode == TargetOpcode::G_ANYEXT) {
313 // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
314 if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
316 LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
317 Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
318 UpdatedDefs.push_back(DstReg);
320 // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
321 // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
322 if (isConstantUnsupported(DstTy))
324 LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
325 Builder.buildConstant(DstReg, 0);
326 UpdatedDefs.push_back(DstReg);
329 markInstAndDefDead(MI, *DefMI, DeadInsts);
335 bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI,
336 SmallVectorImpl<MachineInstr *> &DeadInsts,
337 SmallVectorImpl<Register> &UpdatedDefs) {
339 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
341 const unsigned CastOpc = CastMI.getOpcode();
343 if (!isArtifactCast(CastOpc))
346 const unsigned NumDefs = MI.getNumOperands() - 1;
348 const Register CastSrcReg = CastMI.getOperand(1).getReg();
349 const LLT CastSrcTy = MRI.getType(CastSrcReg);
350 const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
351 const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());
353 const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
354 const unsigned DestSize = DestTy.getSizeInBits();
356 if (CastOpc == TargetOpcode::G_TRUNC) {
357 if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
358 // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
359 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
361 // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0
362 // %2:_(s8) = G_TRUNC %6
363 // %3:_(s8) = G_TRUNC %7
364 // %4:_(s8) = G_TRUNC %8
365 // %5:_(s8) = G_TRUNC %9
367 unsigned UnmergeNumElts =
368 DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
369 LLT UnmergeTy = CastSrcTy.changeNumElements(UnmergeNumElts);
371 if (isInstUnsupported(
372 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
375 Builder.setInstr(MI);
376 auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg);
378 for (unsigned I = 0; I != NumDefs; ++I) {
379 Register DefReg = MI.getOperand(I).getReg();
380 UpdatedDefs.push_back(DefReg);
381 Builder.buildTrunc(DefReg, NewUnmerge.getReg(I));
384 markInstAndDefDead(MI, CastMI, DeadInsts);
388 if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
389 // %1:_(s16) = G_TRUNC %0(s32)
390 // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
392 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
394 // Unmerge(trunc) can be combined if the trunc source size is a multiple
395 // of the unmerge destination size
396 if (CastSrcSize % DestSize != 0)
399 // Check if the new unmerge is supported
400 if (isInstUnsupported(
401 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
404 // Gather the original destination registers and create new ones for the
406 const unsigned NewNumDefs = CastSrcSize / DestSize;
407 SmallVector<Register, 8> DstRegs(NewNumDefs);
408 for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
410 DstRegs[Idx] = MI.getOperand(Idx).getReg();
412 DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
416 Builder.setInstr(MI);
417 Builder.buildUnmerge(DstRegs, CastSrcReg);
418 UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs);
419 markInstAndDefDead(MI, CastMI, DeadInsts);
424 // TODO: support combines with other casts as well
428 static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
429 LLT OpTy, LLT DestTy) {
430 // Check if we found a definition that is like G_MERGE_VALUES.
434 case TargetOpcode::G_BUILD_VECTOR:
435 case TargetOpcode::G_MERGE_VALUES:
436 // The convert operation that we will need to insert is
437 // going to convert the input of that type of instruction (scalar)
438 // to the destination type (DestTy).
439 // The conversion needs to stay in the same domain (scalar to scalar
440 // and vector to vector), so if we were to allow to fold the merge
441 // we would need to insert some bitcasts.
443 // <2 x s16> = build_vector s16, s16
444 // <2 x s32> = zext <2 x s16>
445 // <2 x s16>, <2 x s16> = unmerge <2 x s32>
447 // As is the folding would produce:
448 // <2 x s16> = zext s16 <-- scalar to vector
449 // <2 x s16> = zext s16 <-- scalar to vector
451 // Instead we would want to generate:
453 // <2 x s16> = bitcast s32
455 // <2 x s16> = bitcast s32
457 // That is not done yet.
460 return !DestTy.isVector() && OpTy.isVector();
461 case TargetOpcode::G_CONCAT_VECTORS: {
464 if (!DestTy.isVector())
467 const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
469 // Don't handle scalarization with a cast that isn't in the same
470 // direction as the vector cast. This could be handled, but it would
471 // require more intermediate unmerges.
472 if (ConvertOp == TargetOpcode::G_TRUNC)
473 return DestTy.getSizeInBits() <= OpEltSize;
474 return DestTy.getSizeInBits() >= OpEltSize;
479 /// Try to replace DstReg with SrcReg or build a COPY instruction
480 /// depending on the register constraints.
481 static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg,
482 MachineRegisterInfo &MRI,
483 MachineIRBuilder &Builder,
484 SmallVectorImpl<Register> &UpdatedDefs,
485 GISelObserverWrapper &Observer) {
486 if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
487 Builder.buildCopy(DstReg, SrcReg);
488 UpdatedDefs.push_back(DstReg);
491 SmallVector<MachineInstr *, 4> UseMIs;
492 // Get the users and notify the observer before replacing.
493 for (auto &UseMI : MRI.use_instructions(DstReg)) {
494 UseMIs.push_back(&UseMI);
495 Observer.changingInstr(UseMI);
497 // Replace the registers.
498 MRI.replaceRegWith(DstReg, SrcReg);
499 UpdatedDefs.push_back(SrcReg);
500 // Notify the observer that we changed the instructions.
501 for (auto *UseMI : UseMIs)
502 Observer.changedInstr(*UseMI);
505 bool tryCombineMerges(MachineInstr &MI,
506 SmallVectorImpl<MachineInstr *> &DeadInsts,
507 SmallVectorImpl<Register> &UpdatedDefs,
508 GISelObserverWrapper &Observer) {
509 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
511 unsigned NumDefs = MI.getNumOperands() - 1;
512 MachineInstr *SrcDef =
513 getDefIgnoringCopies(MI.getOperand(NumDefs).getReg(), MRI);
517 LLT OpTy = MRI.getType(MI.getOperand(NumDefs).getReg());
518 LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
519 MachineInstr *MergeI = SrcDef;
520 unsigned ConvertOp = 0;
522 // Handle intermediate conversions
523 unsigned SrcOp = SrcDef->getOpcode();
524 if (isArtifactCast(SrcOp)) {
526 MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
529 if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
530 ConvertOp, OpTy, DestTy)) {
531 // We might have a chance to combine later by trying to combine
532 // unmerge(cast) first
533 return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
536 const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
538 if (NumMergeRegs < NumDefs) {
539 if (NumDefs % NumMergeRegs != 0)
542 Builder.setInstr(MI);
543 // Transform to UNMERGEs, for example
544 // %1 = G_MERGE_VALUES %4, %5
545 // %9, %10, %11, %12 = G_UNMERGE_VALUES %1
547 // %9, %10 = G_UNMERGE_VALUES %4
548 // %11, %12 = G_UNMERGE_VALUES %5
550 const unsigned NewNumDefs = NumDefs / NumMergeRegs;
551 for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
552 SmallVector<Register, 8> DstRegs;
553 for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
555 DstRegs.push_back(MI.getOperand(DefIdx).getReg());
558 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
560 // This is a vector that is being split and casted. Extract to the
561 // element type, and do the conversion on the scalars (or smaller
563 LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs);
565 // Handle split to smaller vectors, with conversions.
566 // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
567 // %3(<8 x s16>) = G_SEXT %2
568 // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3
572 // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0
573 // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1
574 // %4(<2 x s16>) = G_SEXT %8
575 // %5(<2 x s16>) = G_SEXT %9
576 // %6(<2 x s16>) = G_SEXT %10
577 // %7(<2 x s16>)= G_SEXT %11
579 SmallVector<Register, 4> TmpRegs(NewNumDefs);
580 for (unsigned k = 0; k < NewNumDefs; ++k)
581 TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy);
583 Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
585 for (unsigned k = 0; k < NewNumDefs; ++k)
586 Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]});
588 Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
590 UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
593 } else if (NumMergeRegs > NumDefs) {
594 if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
597 Builder.setInstr(MI);
598 // Transform to MERGEs
599 // %6 = G_MERGE_VALUES %17, %18, %19, %20
600 // %7, %8 = G_UNMERGE_VALUES %6
602 // %7 = G_MERGE_VALUES %17, %18
603 // %8 = G_MERGE_VALUES %19, %20
605 const unsigned NumRegs = NumMergeRegs / NumDefs;
606 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
607 SmallVector<Register, 8> Regs;
608 for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
610 Regs.push_back(MergeI->getOperand(Idx).getReg());
612 Register DefReg = MI.getOperand(DefIdx).getReg();
613 Builder.buildMerge(DefReg, Regs);
614 UpdatedDefs.push_back(DefReg);
618 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
620 if (!ConvertOp && DestTy != MergeSrcTy)
621 ConvertOp = TargetOpcode::G_BITCAST;
624 Builder.setInstr(MI);
626 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
627 Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
628 Register DefReg = MI.getOperand(Idx).getReg();
629 Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
630 UpdatedDefs.push_back(DefReg);
633 markInstAndDefDead(MI, *MergeI, DeadInsts);
637 assert(DestTy == MergeSrcTy &&
638 "Bitcast and the other kinds of conversions should "
639 "have happened earlier");
641 Builder.setInstr(MI);
642 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
643 Register DstReg = MI.getOperand(Idx).getReg();
644 Register SrcReg = MergeI->getOperand(Idx + 1).getReg();
645 replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs,
650 markInstAndDefDead(MI, *MergeI, DeadInsts);
654 static bool isMergeLikeOpcode(unsigned Opc) {
656 case TargetOpcode::G_MERGE_VALUES:
657 case TargetOpcode::G_BUILD_VECTOR:
658 case TargetOpcode::G_CONCAT_VECTORS:
665 bool tryCombineExtract(MachineInstr &MI,
666 SmallVectorImpl<MachineInstr *> &DeadInsts,
667 SmallVectorImpl<Register> &UpdatedDefs) {
668 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
670 // Try to use the source registers from a G_MERGE_VALUES
672 // %2 = G_MERGE_VALUES %0, %1
673 // %3 = G_EXTRACT %2, N
676 // for N < %2.getSizeInBits() / 2
677 // %3 = G_EXTRACT %0, N
679 // for N >= %2.getSizeInBits() / 2
680 // %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
682 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
683 MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
684 if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode()))
687 Register DstReg = MI.getOperand(0).getReg();
688 LLT DstTy = MRI.getType(DstReg);
689 LLT SrcTy = MRI.getType(SrcReg);
691 // TODO: Do we need to check if the resulting extract is supported?
692 unsigned ExtractDstSize = DstTy.getSizeInBits();
693 unsigned Offset = MI.getOperand(2).getImm();
694 unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
695 unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
696 unsigned MergeSrcIdx = Offset / MergeSrcSize;
698 // Compute the offset of the last bit the extract needs.
699 unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
701 // Can't handle the case where the extract spans multiple inputs.
702 if (MergeSrcIdx != EndMergeSrcIdx)
705 // TODO: We could modify MI in place in most cases.
706 Builder.setInstr(MI);
707 Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
708 Offset - MergeSrcIdx * MergeSrcSize);
709 UpdatedDefs.push_back(DstReg);
710 markInstAndDefDead(MI, *MergeI, DeadInsts);
714 /// Try to combine away MI.
715 /// Returns true if it combined away the MI.
716 /// Adds instructions that are dead as a result of the combine
717 /// into DeadInsts, which can include MI.
718 bool tryCombineInstruction(MachineInstr &MI,
719 SmallVectorImpl<MachineInstr *> &DeadInsts,
720 GISelObserverWrapper &WrapperObserver) {
721 // This might be a recursive call, and we might have DeadInsts already
722 // populated. To avoid bad things happening later with multiple vreg defs
723 // etc, process the dead instructions now if any.
724 if (!DeadInsts.empty())
725 deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
727 // Put here every vreg that was redefined in such a way that it's at least
728 // possible that one (or more) of its users (immediate or COPY-separated)
729 // could become artifact combinable with the new definition (or the
730 // instruction reachable from it through a chain of copies if any).
731 SmallVector<Register, 4> UpdatedDefs;
732 bool Changed = false;
733 switch (MI.getOpcode()) {
736 case TargetOpcode::G_ANYEXT:
737 Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
739 case TargetOpcode::G_ZEXT:
740 Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
742 case TargetOpcode::G_SEXT:
743 Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
745 case TargetOpcode::G_UNMERGE_VALUES:
746 Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs, WrapperObserver);
748 case TargetOpcode::G_MERGE_VALUES:
749 // If any of the users of this merge are an unmerge, then add them to the
750 // artifact worklist in case there's folding that can be done looking up.
751 for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
752 if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES ||
753 U.getOpcode() == TargetOpcode::G_TRUNC) {
754 UpdatedDefs.push_back(MI.getOperand(0).getReg());
759 case TargetOpcode::G_EXTRACT:
760 Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
762 case TargetOpcode::G_TRUNC:
763 Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
765 // Try to combine truncates away even if they are legal. As all artifact
766 // combines at the moment look only "up" the def-use chains, we achieve
767 // that by throwing truncates' users (with look through copies) into the
768 // ArtifactList again.
769 UpdatedDefs.push_back(MI.getOperand(0).getReg());
773 // If the main loop through the ArtifactList found at least one combinable
774 // pair of artifacts, not only combine it away (as done above), but also
775 // follow the def-use chain from there to combine everything that can be
776 // combined within this def-use chain of artifacts.
777 while (!UpdatedDefs.empty()) {
778 Register NewDef = UpdatedDefs.pop_back_val();
779 assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
780 for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
781 switch (Use.getOpcode()) {
782 // Keep this list in sync with the list of all artifact combines.
783 case TargetOpcode::G_ANYEXT:
784 case TargetOpcode::G_ZEXT:
785 case TargetOpcode::G_SEXT:
786 case TargetOpcode::G_UNMERGE_VALUES:
787 case TargetOpcode::G_EXTRACT:
788 case TargetOpcode::G_TRUNC:
789 // Adding Use to ArtifactList.
790 WrapperObserver.changedInstr(Use);
792 case TargetOpcode::COPY: {
793 Register Copy = Use.getOperand(0).getReg();
794 if (Copy.isVirtual())
795 UpdatedDefs.push_back(Copy);
799 // If we do not have an artifact combine for the opcode, there is no
800 // point in adding it to the ArtifactList as nothing interesting will
801 // be done to it anyway.
810 static Register getArtifactSrcReg(const MachineInstr &MI) {
811 switch (MI.getOpcode()) {
812 case TargetOpcode::COPY:
813 case TargetOpcode::G_TRUNC:
814 case TargetOpcode::G_ZEXT:
815 case TargetOpcode::G_ANYEXT:
816 case TargetOpcode::G_SEXT:
817 case TargetOpcode::G_EXTRACT:
818 return MI.getOperand(1).getReg();
819 case TargetOpcode::G_UNMERGE_VALUES:
820 return MI.getOperand(MI.getNumOperands() - 1).getReg();
822 llvm_unreachable("Not a legalization artifact happen");
826 /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
827 /// (either by killing it or changing operands) results in DefMI being dead
828 /// too. In-between COPYs or artifact-casts are also collected if they are
830 /// MI is not marked dead.
831 void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
832 SmallVectorImpl<MachineInstr *> &DeadInsts) {
833 // Collect all the copy instructions that are made dead, due to deleting
834 // this instruction. Collect all of them until the Trunc(DefMI).
836 // %1(s1) = G_TRUNC %0(s32)
837 // %2(s1) = COPY %1(s1)
838 // %3(s1) = COPY %2(s1)
839 // %4(s32) = G_ANYEXT %3(s1)
840 // In this case, we would have replaced %4 with a copy of %0,
841 // and as a result, %3, %2, %1 are dead.
842 MachineInstr *PrevMI = &MI;
843 while (PrevMI != &DefMI) {
844 Register PrevRegSrc = getArtifactSrcReg(*PrevMI);
846 MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
847 if (MRI.hasOneUse(PrevRegSrc)) {
848 if (TmpDef != &DefMI) {
849 assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
850 isArtifactCast(TmpDef->getOpcode())) &&
851 "Expecting copy or artifact cast here");
853 DeadInsts.push_back(TmpDef);
859 if (PrevMI == &DefMI && MRI.hasOneUse(DefMI.getOperand(0).getReg()))
860 DeadInsts.push_back(&DefMI);
863 /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
864 /// dead due to MI being killed, then mark DefMI as dead too.
865 /// Some of the combines (extends(trunc)), try to walk through redundant
866 /// copies in between the extends and the truncs, and this attempts to collect
867 /// the in between copies if they're dead.
868 void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
869 SmallVectorImpl<MachineInstr *> &DeadInsts) {
870 DeadInsts.push_back(&MI);
871 markDefDead(MI, DefMI, DeadInsts);
874 /// Erase the dead instructions in the list and call the observer hooks.
875 /// Normally the Legalizer will deal with erasing instructions that have been
876 /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
877 /// process instructions which have been marked dead, but otherwise break the
878 /// MIR by introducing multiple vreg defs. For those cases, allow the combines
879 /// to explicitly delete the instructions before we run into trouble.
880 void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
881 GISelObserverWrapper &WrapperObserver) {
882 for (auto *DeadMI : DeadInsts) {
883 LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
884 WrapperObserver.erasingInstr(*DeadMI);
885 DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
890 /// Checks if the target legalizer info has specified anything about the
891 /// instruction, or if unsupported.
892 bool isInstUnsupported(const LegalityQuery &Query) const {
893 using namespace LegalizeActions;
894 auto Step = LI.getAction(Query);
895 return Step.Action == Unsupported || Step.Action == NotFound;
898 bool isInstLegal(const LegalityQuery &Query) const {
899 return LI.getAction(Query).Action == LegalizeActions::Legal;
902 bool isConstantUnsupported(LLT Ty) const {
904 return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
906 LLT EltTy = Ty.getElementType();
907 return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
908 isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
911 /// Looks through copy instructions and returns the actual
913 Register lookThroughCopyInstrs(Register Reg) {
915 while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
916 if (MRI.getType(TmpReg).isValid())