1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the targeting of the InstructionSelector class for
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/IntrinsicsAArch64.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
41 #define DEBUG_TYPE "aarch64-isel"
47 #define GET_GLOBALISEL_PREDICATE_BITSET
48 #include "AArch64GenGlobalISel.inc"
49 #undef GET_GLOBALISEL_PREDICATE_BITSET
51 class AArch64InstructionSelector : public InstructionSelector {
53 AArch64InstructionSelector(const AArch64TargetMachine &TM,
54 const AArch64Subtarget &STI,
55 const AArch64RegisterBankInfo &RBI);
57 bool select(MachineInstr &I) override;
58 static const char *getName() { return DEBUG_TYPE; }
60 void setupMF(MachineFunction &MF, GISelKnownBits &KB,
61 CodeGenCoverage &CoverageInfo) override {
62 InstructionSelector::setupMF(MF, KB, CoverageInfo);
64 // hasFnAttribute() is expensive to call on every BRCOND selection, so
65 // cache it here for each run of the selector.
66 ProduceNonFlagSettingCondBr =
67 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
68 MFReturnAddr = Register();
74 /// tblgen-erated 'select' implementation, used as the initial selector for
75 /// the patterns that don't require complex C++.
76 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
78 // A lowering phase that runs before any selection attempts.
79 // Returns true if the instruction was modified.
80 bool preISelLower(MachineInstr &I);
82 // An early selection function that runs before the selectImpl() call.
83 bool earlySelect(MachineInstr &I) const;
85 // Do some preprocessing of G_PHIs before we begin selection.
86 void processPHIs(MachineFunction &MF);
88 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
90 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
91 bool contractCrossBankCopyIntoStore(MachineInstr &I,
92 MachineRegisterInfo &MRI);
94 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
96 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
97 MachineRegisterInfo &MRI) const;
98 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
99 MachineRegisterInfo &MRI) const;
101 bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
103 const CmpInst::Predicate &Pred,
104 MachineBasicBlock *DstMBB,
105 MachineIRBuilder &MIB) const;
106 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
107 MachineRegisterInfo &MRI) const;
109 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
110 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
112 // Helper to generate an equivalent of scalar_to_vector into a new register,
113 // returned via 'Dst'.
114 MachineInstr *emitScalarToVector(unsigned EltSize,
115 const TargetRegisterClass *DstRC,
117 MachineIRBuilder &MIRBuilder) const;
119 /// Emit a lane insert into \p DstReg, or a new vector register if None is
122 /// The lane inserted into is defined by \p LaneIdx. The vector source
123 /// register is given by \p SrcReg. The register containing the element is
124 /// given by \p EltReg.
125 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
126 Register EltReg, unsigned LaneIdx,
127 const RegisterBank &RB,
128 MachineIRBuilder &MIRBuilder) const;
129 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
130 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
131 MachineRegisterInfo &MRI) const;
132 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
133 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
134 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
136 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
137 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
138 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
139 bool selectSplitVectorUnmerge(MachineInstr &I,
140 MachineRegisterInfo &MRI) const;
141 bool selectIntrinsicWithSideEffects(MachineInstr &I,
142 MachineRegisterInfo &MRI) const;
143 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
145 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
146 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
147 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
148 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
149 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
151 unsigned emitConstantPoolEntry(const Constant *CPVal,
152 MachineFunction &MF) const;
153 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
154 MachineIRBuilder &MIRBuilder) const;
156 // Emit a vector concat operation.
157 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
159 MachineIRBuilder &MIRBuilder) const;
161 // Emit an integer compare between LHS and RHS, which checks for Predicate.
163 // This returns the produced compare instruction, and the predicate which
164 // was ultimately used in the compare. The predicate may differ from what
165 // is passed in \p Predicate due to optimization.
166 std::pair<MachineInstr *, CmpInst::Predicate>
167 emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
168 MachineOperand &Predicate,
169 MachineIRBuilder &MIRBuilder) const;
170 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
171 MachineIRBuilder &MIRBuilder) const;
172 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
173 MachineIRBuilder &MIRBuilder) const;
174 MachineInstr *emitTST(const Register &LHS, const Register &RHS,
175 MachineIRBuilder &MIRBuilder) const;
176 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
177 const RegisterBank &DstRB, LLT ScalarTy,
178 Register VecReg, unsigned LaneIdx,
179 MachineIRBuilder &MIRBuilder) const;
181 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
182 /// materialized using a FMOV instruction, then update MI and return it.
183 /// Otherwise, do nothing and return a nullptr.
184 MachineInstr *emitFMovForFConstant(MachineInstr &MI,
185 MachineRegisterInfo &MRI) const;
187 /// Emit a CSet for a compare.
188 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
189 MachineIRBuilder &MIRBuilder) const;
191 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
192 /// \p IsNegative is true if the test should be "not zero".
193 /// This will also optimize the test bit instruction when possible.
194 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
195 MachineBasicBlock *DstMBB,
196 MachineIRBuilder &MIB) const;
198 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
199 // We use these manually instead of using the importer since it doesn't
200 // support SDNodeXForm.
201 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
202 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
203 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
204 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
206 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
207 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
208 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
210 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
211 unsigned Size) const;
213 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
214 return selectAddrModeUnscaled(Root, 1);
216 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
217 return selectAddrModeUnscaled(Root, 2);
219 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
220 return selectAddrModeUnscaled(Root, 4);
222 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
223 return selectAddrModeUnscaled(Root, 8);
225 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
226 return selectAddrModeUnscaled(Root, 16);
229 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
230 /// from complex pattern matchers like selectAddrModeIndexed().
231 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
232 MachineRegisterInfo &MRI) const;
234 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
235 unsigned Size) const;
237 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
238 return selectAddrModeIndexed(Root, Width / 8);
241 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
242 const MachineRegisterInfo &MRI) const;
244 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
245 unsigned SizeInBytes) const;
247 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
248 /// or not a shift + extend should be folded into an addressing mode. Returns
249 /// None when this is not profitable or possible.
251 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
252 MachineOperand &Offset, unsigned SizeInBytes,
253 bool WantsExt) const;
254 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
255 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
256 unsigned SizeInBytes) const;
258 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
259 return selectAddrModeXRO(Root, Width / 8);
262 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
263 unsigned SizeInBytes) const;
265 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
266 return selectAddrModeWRO(Root, Width / 8);
269 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
271 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
272 return selectShiftedRegister(Root);
275 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
276 // TODO: selectShiftedRegister should allow for rotates on logical shifts.
277 // For now, make them the same. The only difference between the two is that
278 // logical shifts are allowed to fold in rotates. Otherwise, these are
279 // functionally the same.
280 return selectShiftedRegister(Root);
283 /// Given an extend instruction, determine the correct shift-extend type for
284 /// that instruction.
286 /// If the instruction is going to be used in a load or store, pass
287 /// \p IsLoadStore = true.
288 AArch64_AM::ShiftExtendType
289 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
290 bool IsLoadStore = false) const;
292 /// Instructions that accept extend modifiers like UXTW expect the register
293 /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
294 /// subregister copy if necessary. Return either ExtReg, or the result of the
296 Register narrowExtendRegIfNeeded(Register ExtReg,
297 MachineIRBuilder &MIB) const;
298 Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
299 MachineIRBuilder &MIB) const;
300 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
302 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
303 int OpIdx = -1) const;
304 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
305 int OpIdx = -1) const;
306 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
307 int OpIdx = -1) const;
309 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
310 void materializeLargeCMVal(MachineInstr &I, const Value *V,
311 unsigned OpFlags) const;
313 // Optimization methods.
314 bool tryOptSelect(MachineInstr &MI) const;
315 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
316 MachineOperand &Predicate,
317 MachineIRBuilder &MIRBuilder) const;
318 MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
320 CmpInst::Predicate &Predicate,
321 MachineIRBuilder &MIB) const;
322 MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
324 MachineIRBuilder &MIB) const;
326 /// Return true if \p MI is a load or store of \p NumBytes bytes.
327 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
329 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
330 /// register zeroed out. In other words, the result of MI has been explicitly
332 bool isDef32(const MachineInstr &MI) const;
334 const AArch64TargetMachine &TM;
335 const AArch64Subtarget &STI;
336 const AArch64InstrInfo &TII;
337 const AArch64RegisterInfo &TRI;
338 const AArch64RegisterBankInfo &RBI;
340 bool ProduceNonFlagSettingCondBr = false;
342 // Some cached values used during selection.
343 // We use LR as a live-in register, and we keep track of it here as it can be
344 // clobbered by calls.
345 Register MFReturnAddr;
347 #define GET_GLOBALISEL_PREDICATES_DECL
348 #include "AArch64GenGlobalISel.inc"
349 #undef GET_GLOBALISEL_PREDICATES_DECL
351 // We declare the temporaries used by selectImpl() in the class to minimize the
352 // cost of constructing placeholder values.
353 #define GET_GLOBALISEL_TEMPORARIES_DECL
354 #include "AArch64GenGlobalISel.inc"
355 #undef GET_GLOBALISEL_TEMPORARIES_DECL
358 } // end anonymous namespace
360 #define GET_GLOBALISEL_IMPL
361 #include "AArch64GenGlobalISel.inc"
362 #undef GET_GLOBALISEL_IMPL
364 AArch64InstructionSelector::AArch64InstructionSelector(
365 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
366 const AArch64RegisterBankInfo &RBI)
367 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
368 TRI(*STI.getRegisterInfo()), RBI(RBI),
369 #define GET_GLOBALISEL_PREDICATES_INIT
370 #include "AArch64GenGlobalISel.inc"
371 #undef GET_GLOBALISEL_PREDICATES_INIT
372 #define GET_GLOBALISEL_TEMPORARIES_INIT
373 #include "AArch64GenGlobalISel.inc"
374 #undef GET_GLOBALISEL_TEMPORARIES_INIT
378 // FIXME: This should be target-independent, inferred from the types declared
379 // for each class in the bank.
380 static const TargetRegisterClass *
381 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
382 const RegisterBankInfo &RBI,
383 bool GetAllRegSet = false) {
384 if (RB.getID() == AArch64::GPRRegBankID) {
385 if (Ty.getSizeInBits() <= 32)
386 return GetAllRegSet ? &AArch64::GPR32allRegClass
387 : &AArch64::GPR32RegClass;
388 if (Ty.getSizeInBits() == 64)
389 return GetAllRegSet ? &AArch64::GPR64allRegClass
390 : &AArch64::GPR64RegClass;
394 if (RB.getID() == AArch64::FPRRegBankID) {
395 if (Ty.getSizeInBits() <= 16)
396 return &AArch64::FPR16RegClass;
397 if (Ty.getSizeInBits() == 32)
398 return &AArch64::FPR32RegClass;
399 if (Ty.getSizeInBits() == 64)
400 return &AArch64::FPR64RegClass;
401 if (Ty.getSizeInBits() == 128)
402 return &AArch64::FPR128RegClass;
409 /// Given a register bank, and size in bits, return the smallest register class
410 /// that can represent that combination.
411 static const TargetRegisterClass *
412 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
413 bool GetAllRegSet = false) {
414 unsigned RegBankID = RB.getID();
416 if (RegBankID == AArch64::GPRRegBankID) {
417 if (SizeInBits <= 32)
418 return GetAllRegSet ? &AArch64::GPR32allRegClass
419 : &AArch64::GPR32RegClass;
420 if (SizeInBits == 64)
421 return GetAllRegSet ? &AArch64::GPR64allRegClass
422 : &AArch64::GPR64RegClass;
425 if (RegBankID == AArch64::FPRRegBankID) {
426 switch (SizeInBits) {
430 return &AArch64::FPR8RegClass;
432 return &AArch64::FPR16RegClass;
434 return &AArch64::FPR32RegClass;
436 return &AArch64::FPR64RegClass;
438 return &AArch64::FPR128RegClass;
445 /// Returns the correct subregister to use for a given register class.
446 static bool getSubRegForClass(const TargetRegisterClass *RC,
447 const TargetRegisterInfo &TRI, unsigned &SubReg) {
448 switch (TRI.getRegSizeInBits(*RC)) {
450 SubReg = AArch64::bsub;
453 SubReg = AArch64::hsub;
456 if (RC != &AArch64::FPR32RegClass)
457 SubReg = AArch64::sub_32;
459 SubReg = AArch64::ssub;
462 SubReg = AArch64::dsub;
466 dbgs() << "Couldn't find appropriate subregister for register class.");
473 /// Returns the minimum size the given register bank can hold.
474 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
475 switch (RB.getID()) {
476 case AArch64::GPRRegBankID:
478 case AArch64::FPRRegBankID:
481 llvm_unreachable("Tried to get minimum size for unknown register bank.");
485 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
486 auto &MI = *Root.getParent();
487 auto &MBB = *MI.getParent();
488 auto &MF = *MBB.getParent();
489 auto &MRI = MF.getRegInfo();
492 Immed = Root.getImm();
493 else if (Root.isCImm())
494 Immed = Root.getCImm()->getZExtValue();
495 else if (Root.isReg()) {
497 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
500 Immed = ValAndVReg->Value;
506 /// Check whether \p I is a currently unsupported binary operation:
507 /// - it has an unsized type
508 /// - an operand is not a vreg
509 /// - all operands are not in the same bank
510 /// These are checks that should someday live in the verifier, but right now,
511 /// these are mostly limitations of the aarch64 selector.
512 static bool unsupportedBinOp(const MachineInstr &I,
513 const AArch64RegisterBankInfo &RBI,
514 const MachineRegisterInfo &MRI,
515 const AArch64RegisterInfo &TRI) {
516 LLT Ty = MRI.getType(I.getOperand(0).getReg());
518 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
522 const RegisterBank *PrevOpBank = nullptr;
523 for (auto &MO : I.operands()) {
524 // FIXME: Support non-register operands.
526 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
530 // FIXME: Can generic operations have physical registers operands? If
531 // so, this will need to be taught about that, and we'll need to get the
532 // bank out of the minimal class for the register.
533 // Either way, this needs to be documented (and possibly verified).
534 if (!Register::isVirtualRegister(MO.getReg())) {
535 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
539 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
541 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
545 if (PrevOpBank && OpBank != PrevOpBank) {
546 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
554 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
555 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
556 /// and of size \p OpSize.
557 /// \returns \p GenericOpc if the combination is unsupported.
558 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
561 case AArch64::GPRRegBankID:
563 switch (GenericOpc) {
564 case TargetOpcode::G_SHL:
565 return AArch64::LSLVWr;
566 case TargetOpcode::G_LSHR:
567 return AArch64::LSRVWr;
568 case TargetOpcode::G_ASHR:
569 return AArch64::ASRVWr;
573 } else if (OpSize == 64) {
574 switch (GenericOpc) {
575 case TargetOpcode::G_PTR_ADD:
576 return AArch64::ADDXrr;
577 case TargetOpcode::G_SHL:
578 return AArch64::LSLVXr;
579 case TargetOpcode::G_LSHR:
580 return AArch64::LSRVXr;
581 case TargetOpcode::G_ASHR:
582 return AArch64::ASRVXr;
588 case AArch64::FPRRegBankID:
591 switch (GenericOpc) {
592 case TargetOpcode::G_FADD:
593 return AArch64::FADDSrr;
594 case TargetOpcode::G_FSUB:
595 return AArch64::FSUBSrr;
596 case TargetOpcode::G_FMUL:
597 return AArch64::FMULSrr;
598 case TargetOpcode::G_FDIV:
599 return AArch64::FDIVSrr;
604 switch (GenericOpc) {
605 case TargetOpcode::G_FADD:
606 return AArch64::FADDDrr;
607 case TargetOpcode::G_FSUB:
608 return AArch64::FSUBDrr;
609 case TargetOpcode::G_FMUL:
610 return AArch64::FMULDrr;
611 case TargetOpcode::G_FDIV:
612 return AArch64::FDIVDrr;
613 case TargetOpcode::G_OR:
614 return AArch64::ORRv8i8;
624 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
625 /// appropriate for the (value) register bank \p RegBankID and of memory access
626 /// size \p OpSize. This returns the variant with the base+unsigned-immediate
627 /// addressing mode (e.g., LDRXui).
628 /// \returns \p GenericOpc if the combination is unsupported.
629 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
631 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
633 case AArch64::GPRRegBankID:
636 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
638 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
640 return isStore ? AArch64::STRWui : AArch64::LDRWui;
642 return isStore ? AArch64::STRXui : AArch64::LDRXui;
645 case AArch64::FPRRegBankID:
648 return isStore ? AArch64::STRBui : AArch64::LDRBui;
650 return isStore ? AArch64::STRHui : AArch64::LDRHui;
652 return isStore ? AArch64::STRSui : AArch64::LDRSui;
654 return isStore ? AArch64::STRDui : AArch64::LDRDui;
662 /// Helper function that verifies that we have a valid copy at the end of
663 /// selectCopy. Verifies that the source and dest have the expected sizes and
664 /// then returns true.
665 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
666 const MachineRegisterInfo &MRI,
667 const TargetRegisterInfo &TRI,
668 const RegisterBankInfo &RBI) {
669 const Register DstReg = I.getOperand(0).getReg();
670 const Register SrcReg = I.getOperand(1).getReg();
671 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
672 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
674 // Make sure the size of the source and dest line up.
676 (DstSize == SrcSize ||
677 // Copies are a mean to setup initial types, the number of
678 // bits may not exactly match.
679 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
680 // Copies are a mean to copy bits around, as long as we are
681 // on the same register class, that's fine. Otherwise, that
682 // means we need some SUBREG_TO_REG or AND & co.
683 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
684 "Copy with different width?!");
686 // Check the size of the destination.
687 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
688 "GPRs cannot get more than 64-bit width values");
694 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
697 /// E.g "To = COPY SrcReg:SubReg"
698 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
699 const RegisterBankInfo &RBI, Register SrcReg,
700 const TargetRegisterClass *To, unsigned SubReg) {
701 assert(SrcReg.isValid() && "Expected a valid source register?");
702 assert(To && "Destination register class cannot be null");
703 assert(SubReg && "Expected a valid subregister");
705 MachineIRBuilder MIB(I);
707 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
708 MachineOperand &RegOp = I.getOperand(1);
709 RegOp.setReg(SubRegCopy.getReg(0));
711 // It's possible that the destination register won't be constrained. Make
712 // sure that happens.
713 if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
714 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
719 /// Helper function to get the source and destination register classes for a
720 /// copy. Returns a std::pair containing the source register class for the
721 /// copy, and the destination register class for the copy. If a register class
722 /// cannot be determined, then it will be nullptr.
723 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
724 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
725 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
726 const RegisterBankInfo &RBI) {
727 Register DstReg = I.getOperand(0).getReg();
728 Register SrcReg = I.getOperand(1).getReg();
729 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
730 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
731 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
732 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
734 // Special casing for cross-bank copies of s1s. We can technically represent
735 // a 1-bit value with any size of register. The minimum size for a GPR is 32
736 // bits. So, we need to put the FPR on 32 bits as well.
738 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
739 // then we can pull it into the helpers that get the appropriate class for a
740 // register bank. Or make a new helper that carries along some constraint
742 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
743 SrcSize = DstSize = 32;
745 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
746 getMinClassForRegBank(DstRegBank, DstSize, true)};
749 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
750 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
751 const RegisterBankInfo &RBI) {
752 Register DstReg = I.getOperand(0).getReg();
753 Register SrcReg = I.getOperand(1).getReg();
754 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
755 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
757 // Find the correct register classes for the source and destination registers.
758 const TargetRegisterClass *SrcRC;
759 const TargetRegisterClass *DstRC;
760 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
763 LLVM_DEBUG(dbgs() << "Unexpected dest size "
764 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
768 // A couple helpers below, for making sure that the copy we produce is valid.
770 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
771 // to verify that the src and dst are the same size, since that's handled by
772 // the SUBREG_TO_REG.
773 bool KnownValid = false;
775 // Returns true, or asserts if something we don't expect happens. Instead of
776 // returning true, we return isValidCopy() to ensure that we verify the
778 auto CheckCopy = [&]() {
779 // If we have a bitcast or something, we can't have physical registers.
780 assert((I.isCopy() ||
781 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
782 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
783 "No phys reg on generic operator!");
784 bool ValidCopy = true;
786 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
787 assert(ValidCopy && "Invalid copy.");
792 // Is this a copy? If so, then we may need to insert a subregister copy.
794 // Yes. Check if there's anything to fix up.
796 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
800 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
801 unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
804 // If the source bank doesn't support a subregister copy small enough,
805 // then we first need to copy to the destination bank.
806 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
807 const TargetRegisterClass *DstTempRC =
808 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
809 getSubRegForClass(DstRC, TRI, SubReg);
811 MachineIRBuilder MIB(I);
812 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
813 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
814 } else if (SrcSize > DstSize) {
815 // If the source register is bigger than the destination we need to
816 // perform a subregister copy.
817 const TargetRegisterClass *SubRegRC =
818 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
819 getSubRegForClass(SubRegRC, TRI, SubReg);
820 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
821 } else if (DstSize > SrcSize) {
822 // If the destination register is bigger than the source we need to do
823 // a promotion using SUBREG_TO_REG.
824 const TargetRegisterClass *PromotionRC =
825 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
826 getSubRegForClass(SrcRC, TRI, SubReg);
828 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
829 BuildMI(*I.getParent(), I, I.getDebugLoc(),
830 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
834 MachineOperand &RegOp = I.getOperand(1);
835 RegOp.setReg(PromoteReg);
837 // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
841 // If the destination is a physical register, then there's nothing to
842 // change, so we're done.
843 if (Register::isPhysicalRegister(DstReg))
847 // No need to constrain SrcReg. It will get constrained when we hit another
848 // of its use or its defs. Copies do not have constraints.
849 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
850 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
854 I.setDesc(TII.get(AArch64::COPY));
858 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
859 if (!DstTy.isScalar() || !SrcTy.isScalar())
862 const unsigned DstSize = DstTy.getSizeInBits();
863 const unsigned SrcSize = SrcTy.getSizeInBits();
869 switch (GenericOpc) {
870 case TargetOpcode::G_SITOFP:
871 return AArch64::SCVTFUWSri;
872 case TargetOpcode::G_UITOFP:
873 return AArch64::UCVTFUWSri;
874 case TargetOpcode::G_FPTOSI:
875 return AArch64::FCVTZSUWSr;
876 case TargetOpcode::G_FPTOUI:
877 return AArch64::FCVTZUUWSr;
882 switch (GenericOpc) {
883 case TargetOpcode::G_SITOFP:
884 return AArch64::SCVTFUXSri;
885 case TargetOpcode::G_UITOFP:
886 return AArch64::UCVTFUXSri;
887 case TargetOpcode::G_FPTOSI:
888 return AArch64::FCVTZSUWDr;
889 case TargetOpcode::G_FPTOUI:
890 return AArch64::FCVTZUUWDr;
900 switch (GenericOpc) {
901 case TargetOpcode::G_SITOFP:
902 return AArch64::SCVTFUWDri;
903 case TargetOpcode::G_UITOFP:
904 return AArch64::UCVTFUWDri;
905 case TargetOpcode::G_FPTOSI:
906 return AArch64::FCVTZSUXSr;
907 case TargetOpcode::G_FPTOUI:
908 return AArch64::FCVTZUUXSr;
913 switch (GenericOpc) {
914 case TargetOpcode::G_SITOFP:
915 return AArch64::SCVTFUXDri;
916 case TargetOpcode::G_UITOFP:
917 return AArch64::UCVTFUXDri;
918 case TargetOpcode::G_FPTOSI:
919 return AArch64::FCVTZSUXDr;
920 case TargetOpcode::G_FPTOUI:
921 return AArch64::FCVTZUUXDr;
934 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
935 const RegisterBankInfo &RBI) {
936 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
937 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
938 AArch64::GPRRegBankID);
939 LLT Ty = MRI.getType(I.getOperand(0).getReg());
940 if (Ty == LLT::scalar(32))
941 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
942 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
943 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
947 /// Helper function to select the opcode for a G_FCMP.
948 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
949 // If this is a compare against +0.0, then we don't have to explicitly
950 // materialize a constant.
951 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
952 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
953 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
954 if (OpSize != 32 && OpSize != 64)
956 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
957 {AArch64::FCMPSri, AArch64::FCMPDri}};
958 return CmpOpcTbl[ShouldUseImm][OpSize == 64];
961 /// Returns true if \p P is an unsigned integer comparison predicate.
962 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
966 case CmpInst::ICMP_UGT:
967 case CmpInst::ICMP_UGE:
968 case CmpInst::ICMP_ULT:
969 case CmpInst::ICMP_ULE:
974 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
977 llvm_unreachable("Unknown condition code!");
978 case CmpInst::ICMP_NE:
979 return AArch64CC::NE;
980 case CmpInst::ICMP_EQ:
981 return AArch64CC::EQ;
982 case CmpInst::ICMP_SGT:
983 return AArch64CC::GT;
984 case CmpInst::ICMP_SGE:
985 return AArch64CC::GE;
986 case CmpInst::ICMP_SLT:
987 return AArch64CC::LT;
988 case CmpInst::ICMP_SLE:
989 return AArch64CC::LE;
990 case CmpInst::ICMP_UGT:
991 return AArch64CC::HI;
992 case CmpInst::ICMP_UGE:
993 return AArch64CC::HS;
994 case CmpInst::ICMP_ULT:
995 return AArch64CC::LO;
996 case CmpInst::ICMP_ULE:
997 return AArch64CC::LS;
1001 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1002 AArch64CC::CondCode &CondCode,
1003 AArch64CC::CondCode &CondCode2) {
1004 CondCode2 = AArch64CC::AL;
1007 llvm_unreachable("Unknown FP condition!");
1008 case CmpInst::FCMP_OEQ:
1009 CondCode = AArch64CC::EQ;
1011 case CmpInst::FCMP_OGT:
1012 CondCode = AArch64CC::GT;
1014 case CmpInst::FCMP_OGE:
1015 CondCode = AArch64CC::GE;
1017 case CmpInst::FCMP_OLT:
1018 CondCode = AArch64CC::MI;
1020 case CmpInst::FCMP_OLE:
1021 CondCode = AArch64CC::LS;
1023 case CmpInst::FCMP_ONE:
1024 CondCode = AArch64CC::MI;
1025 CondCode2 = AArch64CC::GT;
1027 case CmpInst::FCMP_ORD:
1028 CondCode = AArch64CC::VC;
1030 case CmpInst::FCMP_UNO:
1031 CondCode = AArch64CC::VS;
1033 case CmpInst::FCMP_UEQ:
1034 CondCode = AArch64CC::EQ;
1035 CondCode2 = AArch64CC::VS;
1037 case CmpInst::FCMP_UGT:
1038 CondCode = AArch64CC::HI;
1040 case CmpInst::FCMP_UGE:
1041 CondCode = AArch64CC::PL;
1043 case CmpInst::FCMP_ULT:
1044 CondCode = AArch64CC::LT;
1046 case CmpInst::FCMP_ULE:
1047 CondCode = AArch64CC::LE;
1049 case CmpInst::FCMP_UNE:
1050 CondCode = AArch64CC::NE;
1055 /// Return a register which can be used as a bit to test in a TB(N)Z.
1056 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1057 MachineRegisterInfo &MRI) {
1058 assert(Reg.isValid() && "Expected valid register!");
1059 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1060 unsigned Opc = MI->getOpcode();
1062 if (!MI->getOperand(0).isReg() ||
1063 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1066 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1068 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1069 // on the truncated x is the same as the bit number on x.
1070 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1071 Opc == TargetOpcode::G_TRUNC) {
1072 Register NextReg = MI->getOperand(1).getReg();
1073 // Did we find something worth folding?
1074 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1077 // NextReg is worth folding. Keep looking.
1082 // Attempt to find a suitable operation with a constant on one side.
1083 Optional<uint64_t> C;
1088 case TargetOpcode::G_AND:
1089 case TargetOpcode::G_XOR: {
1090 TestReg = MI->getOperand(1).getReg();
1091 Register ConstantReg = MI->getOperand(2).getReg();
1092 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1094 // AND commutes, check the other side for a constant.
1095 // FIXME: Can we canonicalize the constant so that it's always on the
1096 // same side at some point earlier?
1097 std::swap(ConstantReg, TestReg);
1098 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1101 C = VRegAndVal->Value;
1104 case TargetOpcode::G_ASHR:
1105 case TargetOpcode::G_LSHR:
1106 case TargetOpcode::G_SHL: {
1107 TestReg = MI->getOperand(1).getReg();
1109 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1111 C = VRegAndVal->Value;
1116 // Didn't find a constant or viable register. Bail out of the loop.
1117 if (!C || !TestReg.isValid())
1120 // We found a suitable instruction with a constant. Check to see if we can
1121 // walk through the instruction.
1123 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1127 case TargetOpcode::G_AND:
1128 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1129 if ((*C >> Bit) & 1)
1132 case TargetOpcode::G_SHL:
1133 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1134 // the type of the register.
1135 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1140 case TargetOpcode::G_ASHR:
1141 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1145 if (Bit >= TestRegSize)
1146 Bit = TestRegSize - 1;
1148 case TargetOpcode::G_LSHR:
1149 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1150 if ((Bit + *C) < TestRegSize) {
1155 case TargetOpcode::G_XOR:
1156 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1159 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1161 // tbz x', b -> tbnz x, b
1163 // Because x' only has the b-th bit set if x does not.
1164 if ((*C >> Bit) & 1)
1170 // Check if we found anything worth folding.
1171 if (!NextReg.isValid())
1179 MachineInstr *AArch64InstructionSelector::emitTestBit(
1180 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1181 MachineIRBuilder &MIB) const {
1182 assert(TestReg.isValid());
1183 assert(ProduceNonFlagSettingCondBr &&
1184 "Cannot emit TB(N)Z with speculation tracking!");
1185 MachineRegisterInfo &MRI = *MIB.getMRI();
1187 // Attempt to optimize the test bit by walking over instructions.
1188 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1189 LLT Ty = MRI.getType(TestReg);
1190 unsigned Size = Ty.getSizeInBits();
1191 assert(!Ty.isVector() && "Expected a scalar!");
1192 assert(Bit < 64 && "Bit is too large!");
1194 // When the test register is a 64-bit register, we have to narrow to make
1196 bool UseWReg = Bit < 32;
1197 unsigned NecessarySize = UseWReg ? 32 : 64;
1198 if (Size < NecessarySize)
1199 TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
1200 else if (Size > NecessarySize)
1201 TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
1203 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1204 {AArch64::TBZW, AArch64::TBNZW}};
1205 unsigned Opc = OpcTable[UseWReg][IsNegative];
1207 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1208 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1212 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1213 MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
1214 MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
1215 // Given something like this:
1217 // %x = ...Something...
1218 // %one = G_CONSTANT i64 1
1219 // %zero = G_CONSTANT i64 0
1220 // %and = G_AND %x, %one
1221 // %cmp = G_ICMP intpred(ne), %and, %zero
1222 // %cmp_trunc = G_TRUNC %cmp
1223 // G_BRCOND %cmp_trunc, %bb.3
1225 // We want to try and fold the AND into the G_BRCOND and produce either a
1226 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1228 // In this case, we'd get
1232 if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
1235 // Need to be comparing against 0 to fold.
1236 if (CmpConstant != 0)
1239 MachineRegisterInfo &MRI = *MIB.getMRI();
1241 // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
1242 // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
1243 // so folding would be redundant.
1244 if (Pred != CmpInst::Predicate::ICMP_EQ &&
1245 Pred != CmpInst::Predicate::ICMP_NE)
1248 // Check if the AND has a constant on its RHS which we can use as a mask.
1249 // If it's a power of 2, then it's the same as checking a specific bit.
1250 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1252 getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
1253 if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1256 uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1257 Register TestReg = AndInst->getOperand(1).getReg();
1258 bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
1261 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1265 bool AArch64InstructionSelector::selectCompareBranch(
1266 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1268 const Register CondReg = I.getOperand(0).getReg();
1269 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1270 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1271 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
1272 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
1273 if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
1276 Register LHS = CCMI->getOperand(2).getReg();
1277 Register RHS = CCMI->getOperand(3).getReg();
1278 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1279 MachineIRBuilder MIB(I);
1280 CmpInst::Predicate Pred =
1281 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1282 MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
1284 // When we can emit a TB(N)Z, prefer that.
1286 // Handle non-commutative condition codes first.
1287 // Note that we don't want to do this when we have a G_AND because it can
1288 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1289 if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
1290 int64_t C = VRegAndVal->Value;
1292 // When we have a greater-than comparison, we can just test if the msb is
1294 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1295 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1296 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1297 I.eraseFromParent();
1301 // When we have a less than comparison, we can just test if the msb is not
1303 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1304 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1305 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1306 I.eraseFromParent();
1312 std::swap(RHS, LHS);
1313 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1314 LHSMI = getDefIgnoringCopies(LHS, MRI);
1317 if (!VRegAndVal || VRegAndVal->Value != 0) {
1318 // If we can't select a CBZ then emit a cmp + Bcc.
1320 std::tie(Cmp, Pred) = emitIntegerCompare(
1321 CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
1324 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
1325 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1326 I.eraseFromParent();
1330 // Try to emit a TB(N)Z for an eq or ne condition.
1331 if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
1333 I.eraseFromParent();
1337 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1338 if (RB.getID() != AArch64::GPRRegBankID)
1340 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1343 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1346 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1347 else if (CmpWidth == 64)
1348 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1352 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1355 .constrainAllUses(TII, TRI, RBI);
1357 I.eraseFromParent();
1361 /// Returns the element immediate value of a vector shift operand if found.
1362 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1363 static Optional<int64_t> getVectorShiftImm(Register Reg,
1364 MachineRegisterInfo &MRI) {
1365 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1366 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1367 assert(OpMI && "Expected to find a vreg def for vector shift operand");
1368 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1371 // Check all operands are identical immediates.
1373 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1374 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1379 ImmVal = VRegAndVal->Value;
1380 if (ImmVal != VRegAndVal->Value)
1387 /// Matches and returns the shift immediate value for a SHL instruction given
1388 /// a shift operand.
1389 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1390 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1393 // Check the immediate is in range for a SHL.
1394 int64_t Imm = *ShiftImm;
1397 switch (SrcTy.getElementType().getSizeInBits()) {
1399 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1421 bool AArch64InstructionSelector::selectVectorSHL(
1422 MachineInstr &I, MachineRegisterInfo &MRI) const {
1423 assert(I.getOpcode() == TargetOpcode::G_SHL);
1424 Register DstReg = I.getOperand(0).getReg();
1425 const LLT Ty = MRI.getType(DstReg);
1426 Register Src1Reg = I.getOperand(1).getReg();
1427 Register Src2Reg = I.getOperand(2).getReg();
1432 // Check if we have a vector of constants on RHS that we can select as the
1434 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1437 if (Ty == LLT::vector(2, 64)) {
1438 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1439 } else if (Ty == LLT::vector(4, 32)) {
1440 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1441 } else if (Ty == LLT::vector(2, 32)) {
1442 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1444 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1448 MachineIRBuilder MIB(I);
1449 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1451 Shl.addImm(*ImmVal);
1453 Shl.addUse(Src2Reg);
1454 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1455 I.eraseFromParent();
1459 bool AArch64InstructionSelector::selectVectorASHR(
1460 MachineInstr &I, MachineRegisterInfo &MRI) const {
1461 assert(I.getOpcode() == TargetOpcode::G_ASHR);
1462 Register DstReg = I.getOperand(0).getReg();
1463 const LLT Ty = MRI.getType(DstReg);
1464 Register Src1Reg = I.getOperand(1).getReg();
1465 Register Src2Reg = I.getOperand(2).getReg();
1470 // There is not a shift right register instruction, but the shift left
1471 // register instruction takes a signed value, where negative numbers specify a
1475 unsigned NegOpc = 0;
1476 const TargetRegisterClass *RC = nullptr;
1477 if (Ty == LLT::vector(2, 64)) {
1478 Opc = AArch64::SSHLv2i64;
1479 NegOpc = AArch64::NEGv2i64;
1480 RC = &AArch64::FPR128RegClass;
1481 } else if (Ty == LLT::vector(4, 32)) {
1482 Opc = AArch64::SSHLv4i32;
1483 NegOpc = AArch64::NEGv4i32;
1484 RC = &AArch64::FPR128RegClass;
1485 } else if (Ty == LLT::vector(2, 32)) {
1486 Opc = AArch64::SSHLv2i32;
1487 NegOpc = AArch64::NEGv2i32;
1488 RC = &AArch64::FPR64RegClass;
1490 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1494 MachineIRBuilder MIB(I);
1495 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1496 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1497 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1498 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1499 I.eraseFromParent();
1503 bool AArch64InstructionSelector::selectVaStartAAPCS(
1504 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1508 bool AArch64InstructionSelector::selectVaStartDarwin(
1509 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1510 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1511 Register ListReg = I.getOperand(0).getReg();
1513 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1516 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1517 .addDef(ArgsAddrReg)
1518 .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1522 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1524 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1525 .addUse(ArgsAddrReg)
1528 .addMemOperand(*I.memoperands_begin());
1530 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1531 I.eraseFromParent();
1535 void AArch64InstructionSelector::materializeLargeCMVal(
1536 MachineInstr &I, const Value *V, unsigned OpFlags) const {
1537 MachineBasicBlock &MBB = *I.getParent();
1538 MachineFunction &MF = *MBB.getParent();
1539 MachineRegisterInfo &MRI = MF.getRegInfo();
1540 MachineIRBuilder MIB(I);
1542 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1543 MovZ->addOperand(MF, I.getOperand(1));
1544 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1546 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1547 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1549 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1550 Register ForceDstReg) {
1551 Register DstReg = ForceDstReg
1553 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1554 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1555 if (auto *GV = dyn_cast<GlobalValue>(V)) {
1556 MovI->addOperand(MF, MachineOperand::CreateGA(
1557 GV, MovZ->getOperand(1).getOffset(), Flags));
1560 MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1561 MovZ->getOperand(1).getOffset(), Flags));
1563 MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1564 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1567 Register DstReg = BuildMovK(MovZ.getReg(0),
1568 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1569 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1570 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1574 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1575 MachineBasicBlock &MBB = *I.getParent();
1576 MachineFunction &MF = *MBB.getParent();
1577 MachineRegisterInfo &MRI = MF.getRegInfo();
1579 switch (I.getOpcode()) {
1580 case TargetOpcode::G_SHL:
1581 case TargetOpcode::G_ASHR:
1582 case TargetOpcode::G_LSHR: {
1583 // These shifts are legalized to have 64 bit shift amounts because we want
1584 // to take advantage of the existing imported selection patterns that assume
1585 // the immediates are s64s. However, if the shifted type is 32 bits and for
1586 // some reason we receive input GMIR that has an s64 shift amount that's not
1587 // a G_CONSTANT, insert a truncate so that we can still select the s32
1588 // register-register variant.
1589 Register SrcReg = I.getOperand(1).getReg();
1590 Register ShiftReg = I.getOperand(2).getReg();
1591 const LLT ShiftTy = MRI.getType(ShiftReg);
1592 const LLT SrcTy = MRI.getType(SrcReg);
1593 if (SrcTy.isVector())
1595 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1596 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1598 auto *AmtMI = MRI.getVRegDef(ShiftReg);
1599 assert(AmtMI && "could not find a vreg definition for shift amount");
1600 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1601 // Insert a subregister copy to implement a 64->32 trunc
1602 MachineIRBuilder MIB(I);
1603 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1604 .addReg(ShiftReg, 0, AArch64::sub_32);
1605 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1606 I.getOperand(2).setReg(Trunc.getReg(0));
1610 case TargetOpcode::G_STORE:
1611 return contractCrossBankCopyIntoStore(I, MRI);
1612 case TargetOpcode::G_PTR_ADD:
1613 return convertPtrAddToAdd(I, MRI);
1614 case TargetOpcode::G_LOAD: {
1615 // For scalar loads of pointers, we try to convert the dest type from p0
1616 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1617 // conversion, this should be ok because all users should have been
1618 // selected already, so the type doesn't matter for them.
1619 Register DstReg = I.getOperand(0).getReg();
1620 const LLT DstTy = MRI.getType(DstReg);
1621 if (!DstTy.isPointer())
1623 MRI.setType(DstReg, LLT::scalar(64));
1631 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1632 /// them to a standard G_ADD with a COPY on the source.
1634 /// The motivation behind this is to expose the add semantics to the imported
1635 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1636 /// because the selector works bottom up, uses before defs. By the time we
1637 /// end up trying to select a G_PTR_ADD, we should have already attempted to
1638 /// fold this into addressing modes and were therefore unsuccessful.
1639 bool AArch64InstructionSelector::convertPtrAddToAdd(
1640 MachineInstr &I, MachineRegisterInfo &MRI) {
1641 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1642 Register DstReg = I.getOperand(0).getReg();
1643 Register AddOp1Reg = I.getOperand(1).getReg();
1644 const LLT PtrTy = MRI.getType(DstReg);
1645 if (PtrTy.getAddressSpace() != 0)
1648 MachineIRBuilder MIB(I);
1649 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1650 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1651 // Set regbanks on the registers.
1652 if (PtrTy.isVector())
1653 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1655 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1657 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1658 // %dst(intty) = G_ADD %intbase, off
1659 I.setDesc(TII.get(TargetOpcode::G_ADD));
1660 MRI.setType(DstReg, CastPtrTy);
1661 I.getOperand(1).setReg(PtrToInt.getReg(0));
1662 if (!select(*PtrToInt)) {
1663 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
1669 bool AArch64InstructionSelector::earlySelectSHL(
1670 MachineInstr &I, MachineRegisterInfo &MRI) const {
1671 // We try to match the immediate variant of LSL, which is actually an alias
1672 // for a special case of UBFM. Otherwise, we fall back to the imported
1673 // selector which will match the register variant.
1674 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1675 const auto &MO = I.getOperand(2);
1676 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1680 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1681 if (DstTy.isVector())
1683 bool Is64Bit = DstTy.getSizeInBits() == 64;
1684 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1685 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1686 MachineIRBuilder MIB(I);
1688 if (!Imm1Fn || !Imm2Fn)
1692 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1693 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1695 for (auto &RenderFn : *Imm1Fn)
1697 for (auto &RenderFn : *Imm2Fn)
1700 I.eraseFromParent();
1701 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1704 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1705 MachineInstr &I, MachineRegisterInfo &MRI) {
1706 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1707 // If we're storing a scalar, it doesn't matter what register bank that
1708 // scalar is on. All that matters is the size.
1710 // So, if we see something like this (with a 32-bit scalar as an example):
1712 // %x:gpr(s32) = ... something ...
1713 // %y:fpr(s32) = COPY %x:gpr(s32)
1714 // G_STORE %y:fpr(s32)
1716 // We can fix this up into something like this:
1718 // G_STORE %x:gpr(s32)
1720 // And then continue the selection process normally.
1721 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
1722 if (!DefDstReg.isValid())
1724 LLT DefDstTy = MRI.getType(DefDstReg);
1725 Register StoreSrcReg = I.getOperand(0).getReg();
1726 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1728 // If we get something strange like a physical register, then we shouldn't
1730 if (!DefDstTy.isValid())
1733 // Are the source and dst types the same size?
1734 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1737 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1738 RBI.getRegBank(DefDstReg, MRI, TRI))
1741 // We have a cross-bank copy, which is entering a store. Let's fold it.
1742 I.getOperand(0).setReg(DefDstReg);
1746 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1747 assert(I.getParent() && "Instruction should be in a basic block!");
1748 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1750 MachineBasicBlock &MBB = *I.getParent();
1751 MachineFunction &MF = *MBB.getParent();
1752 MachineRegisterInfo &MRI = MF.getRegInfo();
1754 switch (I.getOpcode()) {
1755 case TargetOpcode::G_SHL:
1756 return earlySelectSHL(I, MRI);
1757 case TargetOpcode::G_CONSTANT: {
1758 bool IsZero = false;
1759 if (I.getOperand(1).isCImm())
1760 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1761 else if (I.getOperand(1).isImm())
1762 IsZero = I.getOperand(1).getImm() == 0;
1767 Register DefReg = I.getOperand(0).getReg();
1768 LLT Ty = MRI.getType(DefReg);
1769 if (Ty.getSizeInBits() == 64) {
1770 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1771 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1772 } else if (Ty.getSizeInBits() == 32) {
1773 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1774 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1778 I.setDesc(TII.get(TargetOpcode::COPY));
1786 bool AArch64InstructionSelector::select(MachineInstr &I) {
1787 assert(I.getParent() && "Instruction should be in a basic block!");
1788 assert(I.getParent()->getParent() && "Instruction should be in a function!");
1790 MachineBasicBlock &MBB = *I.getParent();
1791 MachineFunction &MF = *MBB.getParent();
1792 MachineRegisterInfo &MRI = MF.getRegInfo();
1794 const AArch64Subtarget *Subtarget =
1795 &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
1796 if (Subtarget->requiresStrictAlign()) {
1797 // We don't support this feature yet.
1798 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
1802 unsigned Opcode = I.getOpcode();
1803 // G_PHI requires same handling as PHI
1804 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
1805 // Certain non-generic instructions also need some special handling.
1807 if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
1808 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1810 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1811 const Register DefReg = I.getOperand(0).getReg();
1812 const LLT DefTy = MRI.getType(DefReg);
1814 const RegClassOrRegBank &RegClassOrBank =
1815 MRI.getRegClassOrRegBank(DefReg);
1817 const TargetRegisterClass *DefRC
1818 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1820 if (!DefTy.isValid()) {
1821 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1824 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1825 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1827 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1832 I.setDesc(TII.get(TargetOpcode::PHI));
1834 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1838 return selectCopy(I, TII, MRI, TRI, RBI);
1844 if (I.getNumOperands() != I.getNumExplicitOperands()) {
1846 dbgs() << "Generic instruction has unexpected implicit operands\n");
1850 // Try to do some lowering before we start instruction selecting. These
1851 // lowerings are purely transformations on the input G_MIR and so selection
1852 // must continue after any modification of the instruction.
1853 if (preISelLower(I)) {
1854 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
1857 // There may be patterns where the importer can't deal with them optimally,
1858 // but does select it to a suboptimal sequence so our custom C++ selection
1859 // code later never has a chance to work on it. Therefore, we have an early
1860 // selection attempt here to give priority to certain selection routines
1861 // over the imported ones.
1865 if (selectImpl(I, *CoverageInfo))
1869 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1871 MachineIRBuilder MIB(I);
1874 case TargetOpcode::G_BRCOND: {
1875 if (Ty.getSizeInBits() > 32) {
1876 // We shouldn't need this on AArch64, but it would be implemented as an
1877 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1878 // bit being tested is < 32.
1879 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1880 << ", expected at most 32-bits");
1884 const Register CondReg = I.getOperand(0).getReg();
1885 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1887 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1888 // instructions will not be produced, as they are conditional branch
1889 // instructions that do not set flags.
1890 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1893 if (ProduceNonFlagSettingCondBr) {
1894 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1896 .addImm(/*bit offset=*/0)
1899 I.eraseFromParent();
1900 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1902 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1903 .addDef(AArch64::WZR)
1906 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1908 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1909 .addImm(AArch64CC::EQ)
1912 I.eraseFromParent();
1913 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1917 case TargetOpcode::G_BRINDIRECT: {
1918 I.setDesc(TII.get(AArch64::BR));
1919 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1922 case TargetOpcode::G_BRJT:
1923 return selectBrJT(I, MRI);
1925 case AArch64::G_ADD_LOW: {
1926 // This op may have been separated from it's ADRP companion by the localizer
1927 // or some other code motion pass. Given that many CPUs will try to
1928 // macro fuse these operations anyway, select this into a MOVaddr pseudo
1929 // which will later be expanded into an ADRP+ADD pair after scheduling.
1930 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
1931 if (BaseMI->getOpcode() != AArch64::ADRP) {
1932 I.setDesc(TII.get(AArch64::ADDXri));
1933 I.addOperand(MachineOperand::CreateImm(0));
1934 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1936 assert(TM.getCodeModel() == CodeModel::Small &&
1937 "Expected small code model");
1938 MachineIRBuilder MIB(I);
1939 auto Op1 = BaseMI->getOperand(1);
1940 auto Op2 = I.getOperand(2);
1941 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
1942 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
1943 Op1.getTargetFlags())
1944 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
1945 Op2.getTargetFlags());
1946 I.eraseFromParent();
1947 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
1950 case TargetOpcode::G_BSWAP: {
1951 // Handle vector types for G_BSWAP directly.
1952 Register DstReg = I.getOperand(0).getReg();
1953 LLT DstTy = MRI.getType(DstReg);
1955 // We should only get vector types here; everything else is handled by the
1956 // importer right now.
1957 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1958 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1962 // Only handle 4 and 2 element vectors for now.
1963 // TODO: 16-bit elements.
1964 unsigned NumElts = DstTy.getNumElements();
1965 if (NumElts != 4 && NumElts != 2) {
1966 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1970 // Choose the correct opcode for the supported types. Right now, that's
1971 // v2s32, v4s32, and v2s64.
1973 unsigned EltSize = DstTy.getElementType().getSizeInBits();
1975 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1976 : AArch64::REV32v16i8;
1977 else if (EltSize == 64)
1978 Opc = AArch64::REV64v16i8;
1980 // We should always get something by the time we get here...
1981 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1983 I.setDesc(TII.get(Opc));
1984 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1987 case TargetOpcode::G_FCONSTANT:
1988 case TargetOpcode::G_CONSTANT: {
1989 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1991 const LLT s8 = LLT::scalar(8);
1992 const LLT s16 = LLT::scalar(16);
1993 const LLT s32 = LLT::scalar(32);
1994 const LLT s64 = LLT::scalar(64);
1995 const LLT p0 = LLT::pointer(0, 64);
1997 const Register DefReg = I.getOperand(0).getReg();
1998 const LLT DefTy = MRI.getType(DefReg);
1999 const unsigned DefSize = DefTy.getSizeInBits();
2000 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2002 // FIXME: Redundant check, but even less readable when factored out.
2004 if (Ty != s32 && Ty != s64) {
2005 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2006 << " constant, expected: " << s32 << " or " << s64
2011 if (RB.getID() != AArch64::FPRRegBankID) {
2012 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2013 << " constant on bank: " << RB
2014 << ", expected: FPR\n");
2018 // The case when we have 0.0 is covered by tablegen. Reject it here so we
2019 // can be sure tablegen works correctly and isn't rescued by this code.
2020 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2023 // s32 and s64 are covered by tablegen.
2024 if (Ty != p0 && Ty != s8 && Ty != s16) {
2025 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2026 << " constant, expected: " << s32 << ", " << s64
2027 << ", or " << p0 << '\n');
2031 if (RB.getID() != AArch64::GPRRegBankID) {
2032 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2033 << " constant on bank: " << RB
2034 << ", expected: GPR\n");
2039 // We allow G_CONSTANT of types < 32b.
2040 const unsigned MovOpc =
2041 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2044 // Either emit a FMOV, or emit a copy to emit a normal mov.
2045 const TargetRegisterClass &GPRRC =
2046 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2047 const TargetRegisterClass &FPRRC =
2048 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2050 // Can we use a FMOV instruction to represent the immediate?
2051 if (emitFMovForFConstant(I, MRI))
2054 // For 64b values, emit a constant pool load instead.
2055 if (DefSize == 64) {
2056 auto *FPImm = I.getOperand(1).getFPImm();
2057 MachineIRBuilder MIB(I);
2058 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2060 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2063 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2064 I.eraseFromParent();
2065 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2068 // Nope. Emit a copy and use a normal mov instead.
2069 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2070 MachineOperand &RegOp = I.getOperand(0);
2071 RegOp.setReg(DefGPRReg);
2072 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2073 MIB.buildCopy({DefReg}, {DefGPRReg});
2075 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2076 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2080 MachineOperand &ImmOp = I.getOperand(1);
2081 // FIXME: Is going through int64_t always correct?
2082 ImmOp.ChangeToImmediate(
2083 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2084 } else if (I.getOperand(1).isCImm()) {
2085 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2086 I.getOperand(1).ChangeToImmediate(Val);
2087 } else if (I.getOperand(1).isImm()) {
2088 uint64_t Val = I.getOperand(1).getImm();
2089 I.getOperand(1).ChangeToImmediate(Val);
2092 I.setDesc(TII.get(MovOpc));
2093 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2096 case TargetOpcode::G_EXTRACT: {
2097 Register DstReg = I.getOperand(0).getReg();
2098 Register SrcReg = I.getOperand(1).getReg();
2099 LLT SrcTy = MRI.getType(SrcReg);
2100 LLT DstTy = MRI.getType(DstReg);
2102 unsigned SrcSize = SrcTy.getSizeInBits();
2104 if (SrcTy.getSizeInBits() > 64) {
2105 // This should be an extract of an s128, which is like a vector extract.
2106 if (SrcTy.getSizeInBits() != 128)
2108 // Only support extracting 64 bits from an s128 at the moment.
2109 if (DstTy.getSizeInBits() != 64)
2112 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2113 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2114 // Check we have the right regbank always.
2115 assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2116 DstRB.getID() == AArch64::FPRRegBankID &&
2117 "Wrong extract regbank!");
2120 // Emit the same code as a vector extract.
2121 // Offset must be a multiple of 64.
2122 unsigned Offset = I.getOperand(2).getImm();
2123 if (Offset % 64 != 0)
2125 unsigned LaneIdx = Offset / 64;
2126 MachineIRBuilder MIB(I);
2127 MachineInstr *Extract = emitExtractVectorElt(
2128 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2131 I.eraseFromParent();
2135 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2136 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2137 Ty.getSizeInBits() - 1);
2140 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2141 "unexpected G_EXTRACT types");
2142 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2145 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2146 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2147 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2148 .addReg(DstReg, 0, AArch64::sub_32);
2149 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2150 AArch64::GPR32RegClass, MRI);
2151 I.getOperand(0).setReg(DstReg);
2153 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2156 case TargetOpcode::G_INSERT: {
2157 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2158 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2159 unsigned DstSize = DstTy.getSizeInBits();
2160 // Larger inserts are vectors, same-size ones should be something else by
2161 // now (split up or turned into COPYs).
2162 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2165 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2166 unsigned LSB = I.getOperand(3).getImm();
2167 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2168 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2169 MachineInstrBuilder(MF, I).addImm(Width - 1);
2172 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2173 "unexpected G_INSERT types");
2174 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2177 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2178 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2179 TII.get(AArch64::SUBREG_TO_REG))
2182 .addUse(I.getOperand(2).getReg())
2183 .addImm(AArch64::sub_32);
2184 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2185 AArch64::GPR32RegClass, MRI);
2186 I.getOperand(2).setReg(SrcReg);
2188 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2190 case TargetOpcode::G_FRAME_INDEX: {
2191 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2192 if (Ty != LLT::pointer(0, 64)) {
2193 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2194 << ", expected: " << LLT::pointer(0, 64) << '\n');
2197 I.setDesc(TII.get(AArch64::ADDXri));
2199 // MOs for a #0 shifted immediate.
2200 I.addOperand(MachineOperand::CreateImm(0));
2201 I.addOperand(MachineOperand::CreateImm(0));
2203 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2206 case TargetOpcode::G_GLOBAL_VALUE: {
2207 auto GV = I.getOperand(1).getGlobal();
2208 if (GV->isThreadLocal())
2209 return selectTLSGlobalValue(I, MRI);
2211 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2212 if (OpFlags & AArch64II::MO_GOT) {
2213 I.setDesc(TII.get(AArch64::LOADgot));
2214 I.getOperand(1).setTargetFlags(OpFlags);
2215 } else if (TM.getCodeModel() == CodeModel::Large) {
2216 // Materialize the global using movz/movk instructions.
2217 materializeLargeCMVal(I, GV, OpFlags);
2218 I.eraseFromParent();
2220 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2221 I.setDesc(TII.get(AArch64::ADR));
2222 I.getOperand(1).setTargetFlags(OpFlags);
2224 I.setDesc(TII.get(AArch64::MOVaddr));
2225 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2226 MachineInstrBuilder MIB(MF, I);
2227 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2228 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2230 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2233 case TargetOpcode::G_ZEXTLOAD:
2234 case TargetOpcode::G_LOAD:
2235 case TargetOpcode::G_STORE: {
2236 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2237 MachineIRBuilder MIB(I);
2239 LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2241 if (PtrTy != LLT::pointer(0, 64)) {
2242 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2243 << ", expected: " << LLT::pointer(0, 64) << '\n');
2247 auto &MemOp = **I.memoperands_begin();
2248 if (MemOp.isAtomic()) {
2249 // For now we just support s8 acquire loads to be able to compile stack
2251 if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2252 MemOp.getSize() == 1) {
2253 I.setDesc(TII.get(AArch64::LDARB));
2254 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2256 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2259 unsigned MemSizeInBits = MemOp.getSize() * 8;
2261 const Register PtrReg = I.getOperand(1).getReg();
2263 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2264 // Sanity-check the pointer register.
2265 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2266 "Load/Store pointer operand isn't a GPR");
2267 assert(MRI.getType(PtrReg).isPointer() &&
2268 "Load/Store pointer operand isn't a pointer");
2271 const Register ValReg = I.getOperand(0).getReg();
2272 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2274 const unsigned NewOpc =
2275 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2276 if (NewOpc == I.getOpcode())
2279 I.setDesc(TII.get(NewOpc));
2281 uint64_t Offset = 0;
2282 auto *PtrMI = MRI.getVRegDef(PtrReg);
2284 // Try to fold a GEP into our unsigned immediate addressing mode.
2285 if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
2286 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
2287 int64_t Imm = *COff;
2288 const unsigned Size = MemSizeInBits / 8;
2289 const unsigned Scale = Log2_32(Size);
2290 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
2291 Register Ptr2Reg = PtrMI->getOperand(1).getReg();
2292 I.getOperand(1).setReg(Ptr2Reg);
2293 PtrMI = MRI.getVRegDef(Ptr2Reg);
2294 Offset = Imm / Size;
2299 // If we haven't folded anything into our addressing mode yet, try to fold
2300 // a frame index into the base+offset.
2301 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
2302 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
2304 I.addOperand(MachineOperand::CreateImm(Offset));
2306 // If we're storing a 0, use WZR/XZR.
2307 if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
2308 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
2309 if (I.getOpcode() == AArch64::STRWui)
2310 I.getOperand(0).setReg(AArch64::WZR);
2311 else if (I.getOpcode() == AArch64::STRXui)
2312 I.getOperand(0).setReg(AArch64::XZR);
2317 // The zextload from a smaller type to i32 should be handled by the importer.
2318 if (MRI.getType(ValReg).getSizeInBits() != 64)
2320 // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2321 //and zero_extend with SUBREG_TO_REG.
2322 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2323 Register DstReg = I.getOperand(0).getReg();
2324 I.getOperand(0).setReg(LdReg);
2326 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2327 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2330 .addImm(AArch64::sub_32);
2331 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2332 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2335 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2338 case TargetOpcode::G_SMULH:
2339 case TargetOpcode::G_UMULH: {
2340 // Reject the various things we don't support yet.
2341 if (unsupportedBinOp(I, RBI, MRI, TRI))
2344 const Register DefReg = I.getOperand(0).getReg();
2345 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2347 if (RB.getID() != AArch64::GPRRegBankID) {
2348 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2352 if (Ty != LLT::scalar(64)) {
2353 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2354 << ", expected: " << LLT::scalar(64) << '\n');
2358 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2360 I.setDesc(TII.get(NewOpc));
2362 // Now that we selected an opcode, we need to constrain the register
2363 // operands to use appropriate classes.
2364 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2366 case TargetOpcode::G_FADD:
2367 case TargetOpcode::G_FSUB:
2368 case TargetOpcode::G_FMUL:
2369 case TargetOpcode::G_FDIV:
2371 case TargetOpcode::G_ASHR:
2372 if (MRI.getType(I.getOperand(0).getReg()).isVector())
2373 return selectVectorASHR(I, MRI);
2375 case TargetOpcode::G_SHL:
2376 if (Opcode == TargetOpcode::G_SHL &&
2377 MRI.getType(I.getOperand(0).getReg()).isVector())
2378 return selectVectorSHL(I, MRI);
2380 case TargetOpcode::G_OR:
2381 case TargetOpcode::G_LSHR: {
2382 // Reject the various things we don't support yet.
2383 if (unsupportedBinOp(I, RBI, MRI, TRI))
2386 const unsigned OpSize = Ty.getSizeInBits();
2388 const Register DefReg = I.getOperand(0).getReg();
2389 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2391 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2392 if (NewOpc == I.getOpcode())
2395 I.setDesc(TII.get(NewOpc));
2396 // FIXME: Should the type be always reset in setDesc?
2398 // Now that we selected an opcode, we need to constrain the register
2399 // operands to use appropriate classes.
2400 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2403 case TargetOpcode::G_PTR_ADD: {
2404 MachineIRBuilder MIRBuilder(I);
2405 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2407 I.eraseFromParent();
2410 case TargetOpcode::G_UADDO: {
2411 // TODO: Support other types.
2412 unsigned OpSize = Ty.getSizeInBits();
2413 if (OpSize != 32 && OpSize != 64) {
2416 << "G_UADDO currently only supported for 32 and 64 b types.\n");
2420 // TODO: Support vectors.
2421 if (Ty.isVector()) {
2422 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
2426 // Add and set the set condition flag.
2427 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
2428 MachineIRBuilder MIRBuilder(I);
2429 auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
2430 {I.getOperand(2), I.getOperand(3)});
2431 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
2433 // Now, put the overflow result in the register given by the first operand
2434 // to the G_UADDO. CSINC increments the result when the predicate is false,
2435 // so to get the increment when it's true, we need to use the inverse. In
2436 // this case, we want to increment when carry is set.
2437 auto CsetMI = MIRBuilder
2438 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2439 {Register(AArch64::WZR), Register(AArch64::WZR)})
2440 .addImm(getInvertedCondCode(AArch64CC::HS));
2441 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2442 I.eraseFromParent();
2446 case TargetOpcode::G_PTRMASK: {
2447 Register MaskReg = I.getOperand(2).getReg();
2448 Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2449 // TODO: Implement arbitrary cases
2450 if (!MaskVal || !isShiftedMask_64(*MaskVal))
2453 uint64_t Mask = *MaskVal;
2454 I.setDesc(TII.get(AArch64::ANDXri));
2455 I.getOperand(2).ChangeToImmediate(
2456 AArch64_AM::encodeLogicalImmediate(Mask, 64));
2458 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2460 case TargetOpcode::G_PTRTOINT:
2461 case TargetOpcode::G_TRUNC: {
2462 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2463 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2465 const Register DstReg = I.getOperand(0).getReg();
2466 const Register SrcReg = I.getOperand(1).getReg();
2468 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2469 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2471 if (DstRB.getID() != SrcRB.getID()) {
2473 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2477 if (DstRB.getID() == AArch64::GPRRegBankID) {
2478 const TargetRegisterClass *DstRC =
2479 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2483 const TargetRegisterClass *SrcRC =
2484 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2488 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2489 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2490 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2494 if (DstRC == SrcRC) {
2495 // Nothing to be done
2496 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2497 SrcTy == LLT::scalar(64)) {
2498 llvm_unreachable("TableGen can import this case");
2500 } else if (DstRC == &AArch64::GPR32RegClass &&
2501 SrcRC == &AArch64::GPR64RegClass) {
2502 I.getOperand(1).setSubReg(AArch64::sub_32);
2505 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2509 I.setDesc(TII.get(TargetOpcode::COPY));
2511 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2512 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2513 I.setDesc(TII.get(AArch64::XTNv4i16));
2514 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2518 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2519 MachineIRBuilder MIB(I);
2520 MachineInstr *Extract = emitExtractVectorElt(
2521 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2524 I.eraseFromParent();
2528 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2529 if (Opcode == TargetOpcode::G_PTRTOINT) {
2530 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2531 I.setDesc(TII.get(TargetOpcode::COPY));
2539 case TargetOpcode::G_ANYEXT: {
2540 const Register DstReg = I.getOperand(0).getReg();
2541 const Register SrcReg = I.getOperand(1).getReg();
2543 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2544 if (RBDst.getID() != AArch64::GPRRegBankID) {
2545 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2546 << ", expected: GPR\n");
2550 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2551 if (RBSrc.getID() != AArch64::GPRRegBankID) {
2552 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2553 << ", expected: GPR\n");
2557 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2560 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2564 if (DstSize != 64 && DstSize > 32) {
2565 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2566 << ", expected: 32 or 64\n");
2569 // At this point G_ANYEXT is just like a plain COPY, but we need
2570 // to explicitly form the 64-bit value if any.
2572 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2573 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2577 .addImm(AArch64::sub_32);
2578 I.getOperand(1).setReg(ExtSrc);
2580 return selectCopy(I, TII, MRI, TRI, RBI);
2583 case TargetOpcode::G_ZEXT:
2584 case TargetOpcode::G_SEXT_INREG:
2585 case TargetOpcode::G_SEXT: {
2586 unsigned Opcode = I.getOpcode();
2587 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2588 const Register DefReg = I.getOperand(0).getReg();
2589 Register SrcReg = I.getOperand(1).getReg();
2590 const LLT DstTy = MRI.getType(DefReg);
2591 const LLT SrcTy = MRI.getType(SrcReg);
2592 unsigned DstSize = DstTy.getSizeInBits();
2593 unsigned SrcSize = SrcTy.getSizeInBits();
2595 // SEXT_INREG has the same src reg size as dst, the size of the value to be
2596 // extended is encoded in the imm.
2597 if (Opcode == TargetOpcode::G_SEXT_INREG)
2598 SrcSize = I.getOperand(2).getImm();
2600 if (DstTy.isVector())
2601 return false; // Should be handled by imported patterns.
2603 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2604 AArch64::GPRRegBankID &&
2605 "Unexpected ext regbank");
2607 MachineIRBuilder MIB(I);
2610 // First check if we're extending the result of a load which has a dest type
2611 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2612 // GPR register on AArch64 and all loads which are smaller automatically
2613 // zero-extend the upper bits. E.g.
2614 // %v(s8) = G_LOAD %p, :: (load 1)
2615 // %v2(s32) = G_ZEXT %v(s8)
2617 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2619 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2620 if (LoadMI && IsGPR) {
2621 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2622 unsigned BytesLoaded = MemOp->getSize();
2623 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2624 return selectCopy(I, TII, MRI, TRI, RBI);
2627 // If we are zero extending from 32 bits to 64 bits, it's possible that
2628 // the instruction implicitly does the zero extend for us. In that case,
2629 // we can just emit a SUBREG_TO_REG.
2630 if (IsGPR && SrcSize == 32 && DstSize == 64) {
2631 // Unlike with the G_LOAD case, we don't want to look through copies
2633 MachineInstr *Def = MRI.getVRegDef(SrcReg);
2634 if (Def && isDef32(*Def)) {
2635 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2638 .addImm(AArch64::sub_32);
2640 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2642 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
2646 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2648 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
2652 I.eraseFromParent();
2658 if (DstSize == 64) {
2659 if (Opcode != TargetOpcode::G_SEXT_INREG) {
2660 // FIXME: Can we avoid manually doing this?
2661 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2663 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2667 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2668 {&AArch64::GPR64RegClass}, {})
2671 .addImm(AArch64::sub_32)
2675 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2678 .addImm(SrcSize - 1);
2679 } else if (DstSize <= 32) {
2680 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2683 .addImm(SrcSize - 1);
2688 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2689 I.eraseFromParent();
2693 case TargetOpcode::G_SITOFP:
2694 case TargetOpcode::G_UITOFP:
2695 case TargetOpcode::G_FPTOSI:
2696 case TargetOpcode::G_FPTOUI: {
2697 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2698 SrcTy = MRI.getType(I.getOperand(1).getReg());
2699 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2700 if (NewOpc == Opcode)
2703 I.setDesc(TII.get(NewOpc));
2704 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2709 case TargetOpcode::G_FREEZE:
2710 return selectCopy(I, TII, MRI, TRI, RBI);
2712 case TargetOpcode::G_INTTOPTR:
2713 // The importer is currently unable to import pointer types since they
2714 // didn't exist in SelectionDAG.
2715 return selectCopy(I, TII, MRI, TRI, RBI);
2717 case TargetOpcode::G_BITCAST:
2718 // Imported SelectionDAG rules can handle every bitcast except those that
2719 // bitcast from a type to the same type. Ideally, these shouldn't occur
2720 // but we might not run an optimizer that deletes them. The other exception
2721 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2723 return selectCopy(I, TII, MRI, TRI, RBI);
2725 case TargetOpcode::G_SELECT: {
2726 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2727 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2728 << ", expected: " << LLT::scalar(1) << '\n');
2732 const Register CondReg = I.getOperand(1).getReg();
2733 const Register TReg = I.getOperand(2).getReg();
2734 const Register FReg = I.getOperand(3).getReg();
2736 if (tryOptSelect(I))
2739 Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2740 MachineInstr &TstMI =
2741 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2742 .addDef(AArch64::WZR)
2744 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2746 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2747 .addDef(I.getOperand(0).getReg())
2750 .addImm(AArch64CC::NE);
2752 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2753 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2755 I.eraseFromParent();
2758 case TargetOpcode::G_ICMP: {
2760 return selectVectorICmp(I, MRI);
2762 if (Ty != LLT::scalar(32)) {
2763 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2764 << ", expected: " << LLT::scalar(32) << '\n');
2768 MachineIRBuilder MIRBuilder(I);
2770 CmpInst::Predicate Pred;
2771 std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
2772 I.getOperand(1), MIRBuilder);
2775 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
2776 I.eraseFromParent();
2780 case TargetOpcode::G_FCMP: {
2781 if (Ty != LLT::scalar(32)) {
2782 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2783 << ", expected: " << LLT::scalar(32) << '\n');
2787 unsigned CmpOpc = selectFCMPOpc(I, MRI);
2793 AArch64CC::CondCode CC1, CC2;
2794 changeFCMPPredToAArch64CC(
2795 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2797 // Partially build the compare. Decide if we need to add a use for the
2798 // third operand based off whether or not we're comparing against 0.0.
2799 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2800 .addUse(I.getOperand(2).getReg());
2802 // If we don't have an immediate compare, then we need to add a use of the
2803 // register which wasn't used for the immediate.
2804 // Note that the immediate will always be the last operand.
2805 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2806 CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2808 const Register DefReg = I.getOperand(0).getReg();
2809 Register Def1Reg = DefReg;
2810 if (CC2 != AArch64CC::AL)
2811 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2813 MachineInstr &CSetMI =
2814 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2816 .addUse(AArch64::WZR)
2817 .addUse(AArch64::WZR)
2818 .addImm(getInvertedCondCode(CC1));
2820 if (CC2 != AArch64CC::AL) {
2821 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2822 MachineInstr &CSet2MI =
2823 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2825 .addUse(AArch64::WZR)
2826 .addUse(AArch64::WZR)
2827 .addImm(getInvertedCondCode(CC2));
2828 MachineInstr &OrMI =
2829 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2833 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2834 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2836 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2837 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2839 I.eraseFromParent();
2842 case TargetOpcode::G_VASTART:
2843 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2844 : selectVaStartAAPCS(I, MF, MRI);
2845 case TargetOpcode::G_INTRINSIC:
2846 return selectIntrinsic(I, MRI);
2847 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2848 return selectIntrinsicWithSideEffects(I, MRI);
2849 case TargetOpcode::G_IMPLICIT_DEF: {
2850 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2851 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2852 const Register DstReg = I.getOperand(0).getReg();
2853 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2854 const TargetRegisterClass *DstRC =
2855 getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2856 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2859 case TargetOpcode::G_BLOCK_ADDR: {
2860 if (TM.getCodeModel() == CodeModel::Large) {
2861 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2862 I.eraseFromParent();
2865 I.setDesc(TII.get(AArch64::MOVaddrBA));
2866 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2867 I.getOperand(0).getReg())
2868 .addBlockAddress(I.getOperand(1).getBlockAddress(),
2869 /* Offset */ 0, AArch64II::MO_PAGE)
2871 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2872 AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2873 I.eraseFromParent();
2874 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2877 case TargetOpcode::G_INTRINSIC_TRUNC:
2878 return selectIntrinsicTrunc(I, MRI);
2879 case TargetOpcode::G_INTRINSIC_ROUND:
2880 return selectIntrinsicRound(I, MRI);
2881 case TargetOpcode::G_BUILD_VECTOR:
2882 return selectBuildVector(I, MRI);
2883 case TargetOpcode::G_MERGE_VALUES:
2884 return selectMergeValues(I, MRI);
2885 case TargetOpcode::G_UNMERGE_VALUES:
2886 return selectUnmergeValues(I, MRI);
2887 case TargetOpcode::G_SHUFFLE_VECTOR:
2888 return selectShuffleVector(I, MRI);
2889 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2890 return selectExtractElt(I, MRI);
2891 case TargetOpcode::G_INSERT_VECTOR_ELT:
2892 return selectInsertElt(I, MRI);
2893 case TargetOpcode::G_CONCAT_VECTORS:
2894 return selectConcatVectors(I, MRI);
2895 case TargetOpcode::G_JUMP_TABLE:
2896 return selectJumpTable(I, MRI);
2902 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2903 MachineRegisterInfo &MRI) const {
2904 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2905 Register JTAddr = I.getOperand(0).getReg();
2906 unsigned JTI = I.getOperand(1).getIndex();
2907 Register Index = I.getOperand(2).getReg();
2908 MachineIRBuilder MIB(I);
2910 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2911 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2912 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
2913 {TargetReg, ScratchReg}, {JTAddr, Index})
2914 .addJumpTableIndex(JTI);
2915 // Build the indirect branch.
2916 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2917 I.eraseFromParent();
2918 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
2921 bool AArch64InstructionSelector::selectJumpTable(
2922 MachineInstr &I, MachineRegisterInfo &MRI) const {
2923 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2924 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2926 Register DstReg = I.getOperand(0).getReg();
2927 unsigned JTI = I.getOperand(1).getIndex();
2928 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2929 MachineIRBuilder MIB(I);
2931 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2932 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2933 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2934 I.eraseFromParent();
2935 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2938 bool AArch64InstructionSelector::selectTLSGlobalValue(
2939 MachineInstr &I, MachineRegisterInfo &MRI) const {
2940 if (!STI.isTargetMachO())
2942 MachineFunction &MF = *I.getParent()->getParent();
2943 MF.getFrameInfo().setAdjustsStack(true);
2945 const GlobalValue &GV = *I.getOperand(1).getGlobal();
2946 MachineIRBuilder MIB(I);
2948 MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2949 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2951 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2952 {Register(AArch64::X0)})
2955 // TLS calls preserve all registers except those that absolutely must be
2956 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2958 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
2959 .addDef(AArch64::X0, RegState::Implicit)
2960 .addRegMask(TRI.getTLSCallPreservedMask());
2962 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2963 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2965 I.eraseFromParent();
2969 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2970 MachineInstr &I, MachineRegisterInfo &MRI) const {
2971 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2973 // Select the correct opcode.
2975 if (!SrcTy.isVector()) {
2976 switch (SrcTy.getSizeInBits()) {
2979 Opc = AArch64::FRINTZHr;
2982 Opc = AArch64::FRINTZSr;
2985 Opc = AArch64::FRINTZDr;
2989 unsigned NumElts = SrcTy.getNumElements();
2990 switch (SrcTy.getElementType().getSizeInBits()) {
2995 Opc = AArch64::FRINTZv4f16;
2996 else if (NumElts == 8)
2997 Opc = AArch64::FRINTZv8f16;
3001 Opc = AArch64::FRINTZv2f32;
3002 else if (NumElts == 4)
3003 Opc = AArch64::FRINTZv4f32;
3007 Opc = AArch64::FRINTZv2f64;
3013 // Didn't get an opcode above, bail.
3014 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3018 // Legalization would have set us up perfectly for this; we just need to
3019 // set the opcode and move on.
3020 I.setDesc(TII.get(Opc));
3021 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3024 bool AArch64InstructionSelector::selectIntrinsicRound(
3025 MachineInstr &I, MachineRegisterInfo &MRI) const {
3026 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3028 // Select the correct opcode.
3030 if (!SrcTy.isVector()) {
3031 switch (SrcTy.getSizeInBits()) {
3034 Opc = AArch64::FRINTAHr;
3037 Opc = AArch64::FRINTASr;
3040 Opc = AArch64::FRINTADr;
3044 unsigned NumElts = SrcTy.getNumElements();
3045 switch (SrcTy.getElementType().getSizeInBits()) {
3050 Opc = AArch64::FRINTAv4f16;
3051 else if (NumElts == 8)
3052 Opc = AArch64::FRINTAv8f16;
3056 Opc = AArch64::FRINTAv2f32;
3057 else if (NumElts == 4)
3058 Opc = AArch64::FRINTAv4f32;
3062 Opc = AArch64::FRINTAv2f64;
3068 // Didn't get an opcode above, bail.
3069 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3073 // Legalization would have set us up perfectly for this; we just need to
3074 // set the opcode and move on.
3075 I.setDesc(TII.get(Opc));
3076 return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3079 bool AArch64InstructionSelector::selectVectorICmp(
3080 MachineInstr &I, MachineRegisterInfo &MRI) const {
3081 Register DstReg = I.getOperand(0).getReg();
3082 LLT DstTy = MRI.getType(DstReg);
3083 Register SrcReg = I.getOperand(2).getReg();
3084 Register Src2Reg = I.getOperand(3).getReg();
3085 LLT SrcTy = MRI.getType(SrcReg);
3087 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3088 unsigned NumElts = DstTy.getNumElements();
3090 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3091 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3092 // Third index is cc opcode:
3102 // ne is done by negating 'eq' result.
3104 // This table below assumes that for some comparisons the operands will be
3106 // ult op == commute + ugt op
3107 // ule op == commute + uge op
3108 // slt op == commute + sgt op
3109 // sle op == commute + sge op
3110 unsigned PredIdx = 0;
3111 bool SwapOperands = false;
3112 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3114 case CmpInst::ICMP_NE:
3115 case CmpInst::ICMP_EQ:
3118 case CmpInst::ICMP_UGT:
3121 case CmpInst::ICMP_UGE:
3124 case CmpInst::ICMP_ULT:
3126 SwapOperands = true;
3128 case CmpInst::ICMP_ULE:
3130 SwapOperands = true;
3132 case CmpInst::ICMP_SGT:
3135 case CmpInst::ICMP_SGE:
3138 case CmpInst::ICMP_SLT:
3140 SwapOperands = true;
3142 case CmpInst::ICMP_SLE:
3144 SwapOperands = true;
3147 llvm_unreachable("Unhandled icmp predicate");
3151 // This table obviously should be tablegen'd when we have our GISel native
3152 // tablegen selector.
3154 static const unsigned OpcTable[4][4][9] = {
3156 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3157 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3159 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3160 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3162 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3163 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3164 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3165 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3166 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3167 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3170 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3171 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3173 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3174 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3175 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3176 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3177 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3178 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3179 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3180 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3184 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3185 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3186 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3187 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3188 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3189 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3190 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3191 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3193 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3194 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3198 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3199 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3200 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3201 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3202 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3204 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3205 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3207 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3208 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3212 unsigned EltIdx = Log2_32(SrcEltSize / 8);
3213 unsigned NumEltsIdx = Log2_32(NumElts / 2);
3214 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3216 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3220 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3221 const TargetRegisterClass *SrcRC =
3222 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3224 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3228 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3229 if (SrcTy.getSizeInBits() == 128)
3230 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3233 std::swap(SrcReg, Src2Reg);
3235 MachineIRBuilder MIB(I);
3236 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3237 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3239 // Invert if we had a 'ne' cc.
3241 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3242 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3244 MIB.buildCopy(DstReg, Cmp.getReg(0));
3246 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3247 I.eraseFromParent();
3251 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3252 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3253 MachineIRBuilder &MIRBuilder) const {
3254 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3256 auto BuildFn = [&](unsigned SubregIndex) {
3259 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3260 .addImm(SubregIndex);
3261 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3262 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3268 return BuildFn(AArch64::hsub);
3270 return BuildFn(AArch64::ssub);
3272 return BuildFn(AArch64::dsub);
3278 bool AArch64InstructionSelector::selectMergeValues(
3279 MachineInstr &I, MachineRegisterInfo &MRI) const {
3280 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3281 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3282 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3283 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3284 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3286 if (I.getNumOperands() != 3)
3289 // Merging 2 s64s into an s128.
3290 if (DstTy == LLT::scalar(128)) {
3291 if (SrcTy.getSizeInBits() != 64)
3293 MachineIRBuilder MIB(I);
3294 Register DstReg = I.getOperand(0).getReg();
3295 Register Src1Reg = I.getOperand(1).getReg();
3296 Register Src2Reg = I.getOperand(2).getReg();
3297 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3298 MachineInstr *InsMI =
3299 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3302 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3303 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3306 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3307 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3308 I.eraseFromParent();
3312 if (RB.getID() != AArch64::GPRRegBankID)
3315 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3318 auto *DstRC = &AArch64::GPR64RegClass;
3319 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3320 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3321 TII.get(TargetOpcode::SUBREG_TO_REG))
3322 .addDef(SubToRegDef)
3324 .addUse(I.getOperand(1).getReg())
3325 .addImm(AArch64::sub_32);
3326 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3327 // Need to anyext the second scalar before we can use bfm
3328 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3329 TII.get(TargetOpcode::SUBREG_TO_REG))
3330 .addDef(SubToRegDef2)
3332 .addUse(I.getOperand(2).getReg())
3333 .addImm(AArch64::sub_32);
3335 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3336 .addDef(I.getOperand(0).getReg())
3337 .addUse(SubToRegDef)
3338 .addUse(SubToRegDef2)
3341 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3342 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3343 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3344 I.eraseFromParent();
3348 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3349 const unsigned EltSize) {
3350 // Choose a lane copy opcode and subregister based off of the size of the
3351 // vector's elements.
3354 CopyOpc = AArch64::CPYi16;
3355 ExtractSubReg = AArch64::hsub;
3358 CopyOpc = AArch64::CPYi32;
3359 ExtractSubReg = AArch64::ssub;
3362 CopyOpc = AArch64::CPYi64;
3363 ExtractSubReg = AArch64::dsub;
3366 // Unknown size, bail out.
3367 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3373 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3374 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3375 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3376 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3377 unsigned CopyOpc = 0;
3378 unsigned ExtractSubReg = 0;
3379 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3381 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3385 const TargetRegisterClass *DstRC =
3386 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3388 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3392 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3393 const LLT &VecTy = MRI.getType(VecReg);
3394 const TargetRegisterClass *VecRC =
3395 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3397 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3401 // The register that we're going to copy into.
3402 Register InsertReg = VecReg;
3404 DstReg = MRI.createVirtualRegister(DstRC);
3405 // If the lane index is 0, we just use a subregister COPY.
3407 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3408 .addReg(VecReg, 0, ExtractSubReg);
3409 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3413 // Lane copies require 128-bit wide registers. If we're dealing with an
3414 // unpacked vector, then we need to move up to that width. Insert an implicit
3415 // def and a subregister insert to get us there.
3416 if (VecTy.getSizeInBits() != 128) {
3417 MachineInstr *ScalarToVector = emitScalarToVector(
3418 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3419 if (!ScalarToVector)
3421 InsertReg = ScalarToVector->getOperand(0).getReg();
3424 MachineInstr *LaneCopyMI =
3425 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3426 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3428 // Make sure that we actually constrain the initial copy.
3429 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3433 bool AArch64InstructionSelector::selectExtractElt(
3434 MachineInstr &I, MachineRegisterInfo &MRI) const {
3435 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3436 "unexpected opcode!");
3437 Register DstReg = I.getOperand(0).getReg();
3438 const LLT NarrowTy = MRI.getType(DstReg);
3439 const Register SrcReg = I.getOperand(1).getReg();
3440 const LLT WideTy = MRI.getType(SrcReg);
3442 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3443 "source register size too small!");
3444 assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
3446 // Need the lane index to determine the correct copy opcode.
3447 MachineOperand &LaneIdxOp = I.getOperand(2);
3448 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3450 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3451 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3455 // Find the index to extract from.
3456 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3459 unsigned LaneIdx = VRegAndVal->Value;
3461 MachineIRBuilder MIRBuilder(I);
3463 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3464 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3465 LaneIdx, MIRBuilder);
3469 I.eraseFromParent();
3473 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3474 MachineInstr &I, MachineRegisterInfo &MRI) const {
3475 unsigned NumElts = I.getNumOperands() - 1;
3476 Register SrcReg = I.getOperand(NumElts).getReg();
3477 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3478 const LLT SrcTy = MRI.getType(SrcReg);
3480 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3481 if (SrcTy.getSizeInBits() > 128) {
3482 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3486 MachineIRBuilder MIB(I);
3488 // We implement a split vector operation by treating the sub-vectors as
3489 // scalars and extracting them.
3490 const RegisterBank &DstRB =
3491 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3492 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3493 Register Dst = I.getOperand(OpIdx).getReg();
3494 MachineInstr *Extract =
3495 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3499 I.eraseFromParent();
3503 bool AArch64InstructionSelector::selectUnmergeValues(
3504 MachineInstr &I, MachineRegisterInfo &MRI) const {
3505 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3506 "unexpected opcode");
3508 // TODO: Handle unmerging into GPRs and from scalars to scalars.
3509 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3510 AArch64::FPRRegBankID ||
3511 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3512 AArch64::FPRRegBankID) {
3513 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3514 "currently unsupported.\n");
3518 // The last operand is the vector source register, and every other operand is
3519 // a register to unpack into.
3520 unsigned NumElts = I.getNumOperands() - 1;
3521 Register SrcReg = I.getOperand(NumElts).getReg();
3522 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3523 const LLT WideTy = MRI.getType(SrcReg);
3525 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3526 "can only unmerge from vector or s128 types!");
3527 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3528 "source register size too small!");
3530 if (!NarrowTy.isScalar())
3531 return selectSplitVectorUnmerge(I, MRI);
3533 MachineIRBuilder MIB(I);
3535 // Choose a lane copy opcode and subregister based off of the size of the
3536 // vector's elements.
3537 unsigned CopyOpc = 0;
3538 unsigned ExtractSubReg = 0;
3539 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3542 // Set up for the lane copies.
3543 MachineBasicBlock &MBB = *I.getParent();
3545 // Stores the registers we'll be copying from.
3546 SmallVector<Register, 4> InsertRegs;
3548 // We'll use the first register twice, so we only need NumElts-1 registers.
3549 unsigned NumInsertRegs = NumElts - 1;
3551 // If our elements fit into exactly 128 bits, then we can copy from the source
3552 // directly. Otherwise, we need to do a bit of setup with some subregister
3554 if (NarrowTy.getSizeInBits() * NumElts == 128) {
3555 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3557 // No. We have to perform subregister inserts. For each insert, create an
3558 // implicit def and a subregister insert, and save the register we create.
3559 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3560 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3561 MachineInstr &ImpDefMI =
3562 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3565 // Now, create the subregister insert from SrcReg.
3566 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3567 MachineInstr &InsMI =
3568 *BuildMI(MBB, I, I.getDebugLoc(),
3569 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3572 .addImm(AArch64::dsub);
3574 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3575 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3577 // Save the register so that we can copy from it after.
3578 InsertRegs.push_back(InsertReg);
3582 // Now that we've created any necessary subregister inserts, we can
3583 // create the copies.
3585 // Perform the first copy separately as a subregister copy.
3586 Register CopyTo = I.getOperand(0).getReg();
3587 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3588 .addReg(InsertRegs[0], 0, ExtractSubReg);
3589 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3591 // Now, perform the remaining copies as vector lane copies.
3592 unsigned LaneIdx = 1;
3593 for (Register InsReg : InsertRegs) {
3594 Register CopyTo = I.getOperand(LaneIdx).getReg();
3595 MachineInstr &CopyInst =
3596 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3599 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3603 // Separately constrain the first copy's destination. Because of the
3604 // limitation in constrainOperandRegClass, we can't guarantee that this will
3605 // actually be constrained. So, do it ourselves using the second operand.
3606 const TargetRegisterClass *RC =
3607 MRI.getRegClassOrNull(I.getOperand(1).getReg());
3609 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3613 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3614 I.eraseFromParent();
3618 bool AArch64InstructionSelector::selectConcatVectors(
3619 MachineInstr &I, MachineRegisterInfo &MRI) const {
3620 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3621 "Unexpected opcode");
3622 Register Dst = I.getOperand(0).getReg();
3623 Register Op1 = I.getOperand(1).getReg();
3624 Register Op2 = I.getOperand(2).getReg();
3625 MachineIRBuilder MIRBuilder(I);
3626 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3629 I.eraseFromParent();
3634 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3635 MachineFunction &MF) const {
3636 Type *CPTy = CPVal->getType();
3637 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3639 MachineConstantPool *MCP = MF.getConstantPool();
3640 return MCP->getConstantPoolIndex(CPVal, Alignment);
3643 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3644 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3645 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3648 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3649 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3651 MachineInstr *LoadMI = nullptr;
3652 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3656 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3657 .addConstantPoolIndex(CPIdx, 0,
3658 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3661 LoadMI = &*MIRBuilder
3662 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3663 .addConstantPoolIndex(
3664 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3667 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3668 << *CPVal->getType());
3671 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3672 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3676 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3678 static std::pair<unsigned, unsigned>
3679 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3680 unsigned Opc, SubregIdx;
3681 if (RB.getID() == AArch64::GPRRegBankID) {
3682 if (EltSize == 32) {
3683 Opc = AArch64::INSvi32gpr;
3684 SubregIdx = AArch64::ssub;
3685 } else if (EltSize == 64) {
3686 Opc = AArch64::INSvi64gpr;
3687 SubregIdx = AArch64::dsub;
3689 llvm_unreachable("invalid elt size!");
3693 Opc = AArch64::INSvi8lane;
3694 SubregIdx = AArch64::bsub;
3695 } else if (EltSize == 16) {
3696 Opc = AArch64::INSvi16lane;
3697 SubregIdx = AArch64::hsub;
3698 } else if (EltSize == 32) {
3699 Opc = AArch64::INSvi32lane;
3700 SubregIdx = AArch64::ssub;
3701 } else if (EltSize == 64) {
3702 Opc = AArch64::INSvi64lane;
3703 SubregIdx = AArch64::dsub;
3705 llvm_unreachable("invalid elt size!");
3708 return std::make_pair(Opc, SubregIdx);
3712 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3713 MachineOperand &RHS,
3714 MachineIRBuilder &MIRBuilder) const {
3715 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3716 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3717 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3718 {AArch64::ADDWrr, AArch64::ADDWri}};
3719 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3720 auto ImmFns = selectArithImmed(RHS);
3721 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3722 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
3724 // If we matched a valid constant immediate, add those operands.
3726 for (auto &RenderFn : *ImmFns)
3729 AddMI.addUse(RHS.getReg());
3732 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3737 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3738 MachineIRBuilder &MIRBuilder) const {
3739 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3740 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3741 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3742 {AArch64::ADDSWrr, AArch64::ADDSWri}};
3743 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3744 auto ImmFns = selectArithImmed(RHS);
3745 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3746 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3748 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3750 // If we matched a valid constant immediate, add those operands.
3752 for (auto &RenderFn : *ImmFns)
3755 CmpMI.addUse(RHS.getReg());
3758 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3763 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3764 MachineIRBuilder &MIRBuilder) const {
3765 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3766 unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3767 bool Is32Bit = (RegSize == 32);
3768 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3769 {AArch64::ANDSWrr, AArch64::ANDSWri}};
3770 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3772 // We might be able to fold in an immediate into the TST. We need to make sure
3773 // it's a logical immediate though, since ANDS requires that.
3774 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3775 bool IsImmForm = ValAndVReg.hasValue() &&
3776 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3777 unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3778 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3782 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3786 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3790 std::pair<MachineInstr *, CmpInst::Predicate>
3791 AArch64InstructionSelector::emitIntegerCompare(
3792 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3793 MachineIRBuilder &MIRBuilder) const {
3794 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3795 assert(Predicate.isPredicate() && "Expected predicate?");
3796 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3798 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3800 // Fold the compare if possible.
3801 MachineInstr *FoldCmp =
3802 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3804 return {FoldCmp, P};
3806 // Can't fold into a CMN. Just emit a normal compare.
3807 unsigned CmpOpc = 0;
3810 LLT CmpTy = MRI.getType(LHS.getReg());
3811 assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3812 "Expected scalar or pointer");
3813 if (CmpTy == LLT::scalar(32)) {
3814 CmpOpc = AArch64::SUBSWrr;
3815 ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3816 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3817 CmpOpc = AArch64::SUBSXrr;
3818 ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3820 return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
3823 // Try to match immediate forms.
3824 MachineInstr *ImmedCmp =
3825 tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
3827 return {ImmedCmp, P};
3829 // If we don't have an immediate, we may have a shift which can be folded
3830 // into the compare.
3831 MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
3833 return {ShiftedCmp, P};
3836 MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
3837 // Make sure that we can constrain the compare that we emitted.
3838 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3839 return {&*CmpMI, P};
3842 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3843 Optional<Register> Dst, Register Op1, Register Op2,
3844 MachineIRBuilder &MIRBuilder) const {
3845 // We implement a vector concat by:
3846 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3847 // 2. Insert the upper vector into the destination's upper element
3848 // TODO: some of this code is common with G_BUILD_VECTOR handling.
3849 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3851 const LLT Op1Ty = MRI.getType(Op1);
3852 const LLT Op2Ty = MRI.getType(Op2);
3854 if (Op1Ty != Op2Ty) {
3855 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3858 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3860 if (Op1Ty.getSizeInBits() >= 128) {
3861 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3865 // At the moment we just support 64 bit vector concats.
3866 if (Op1Ty.getSizeInBits() != 64) {
3867 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3871 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3872 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3873 const TargetRegisterClass *DstRC =
3874 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3876 MachineInstr *WidenedOp1 =
3877 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3878 MachineInstr *WidenedOp2 =
3879 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3880 if (!WidenedOp1 || !WidenedOp2) {
3881 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3885 // Now do the insert of the upper element.
3886 unsigned InsertOpc, InsSubRegIdx;
3887 std::tie(InsertOpc, InsSubRegIdx) =
3888 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3891 Dst = MRI.createVirtualRegister(DstRC);
3894 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3895 .addImm(1) /* Lane index */
3896 .addUse(WidenedOp2->getOperand(0).getReg())
3898 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3902 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3903 MachineInstr &I, MachineRegisterInfo &MRI) const {
3904 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3905 "Expected a G_FCONSTANT!");
3906 MachineOperand &ImmOp = I.getOperand(1);
3907 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3909 // Only handle 32 and 64 bit defs for now.
3910 if (DefSize != 32 && DefSize != 64)
3913 // Don't handle null values using FMOV.
3914 if (ImmOp.getFPImm()->isNullValue())
3917 // Get the immediate representation for the FMOV.
3918 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3919 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3920 : AArch64_AM::getFP64Imm(ImmValAPF);
3922 // If this is -1, it means the immediate can't be represented as the requested
3923 // floating point value. Bail.
3927 // Update MI to represent the new FMOV instruction, constrain it, and return.
3928 ImmOp.ChangeToImmediate(Imm);
3929 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3930 I.setDesc(TII.get(MovOpc));
3931 constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3936 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3937 MachineIRBuilder &MIRBuilder) const {
3938 // CSINC increments the result when the predicate is false. Invert it.
3939 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3940 CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3943 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3945 constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3949 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3950 MachineIRBuilder MIB(I);
3951 MachineRegisterInfo &MRI = *MIB.getMRI();
3952 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3954 // We want to recognize this pattern:
3956 // $z = G_FCMP pred, $x, $y
3958 // $w = G_SELECT $z, $a, $b
3960 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3961 // some copies/truncs in between.)
3963 // If we see this, then we can emit something like this:
3966 // fcsel $w, $a, $b, pred
3968 // Rather than emitting both of the rather long sequences in the standard
3969 // G_FCMP/G_SELECT select methods.
3971 // First, check if the condition is defined by a compare.
3972 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3974 // We can only fold if all of the defs have one use.
3975 Register CondDefReg = CondDef->getOperand(0).getReg();
3976 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
3977 // Unless it's another select.
3978 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
3981 if (UI.getOpcode() != TargetOpcode::G_SELECT)
3986 // We can skip over G_TRUNC since the condition is 1-bit.
3987 // Truncating/extending can have no impact on the value.
3988 unsigned Opc = CondDef->getOpcode();
3989 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3992 // Can't see past copies from physregs.
3993 if (Opc == TargetOpcode::COPY &&
3994 Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3997 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4000 // Is the condition defined by a compare?
4004 unsigned CondOpc = CondDef->getOpcode();
4005 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4008 AArch64CC::CondCode CondCode;
4009 if (CondOpc == TargetOpcode::G_ICMP) {
4011 CmpInst::Predicate Pred;
4013 std::tie(Cmp, Pred) =
4014 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4015 CondDef->getOperand(1), MIB);
4018 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4022 // Have to collect the CondCode after emitIntegerCompare, since it can
4023 // update the predicate.
4024 CondCode = changeICMPPredToAArch64CC(Pred);
4026 // Get the condition code for the select.
4027 AArch64CC::CondCode CondCode2;
4028 changeFCMPPredToAArch64CC(
4029 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
4032 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4033 // instructions to emit the comparison.
4034 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4036 if (CondCode2 != AArch64CC::AL)
4039 // Make sure we'll be able to select the compare.
4040 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
4044 // Emit a new compare.
4045 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
4046 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
4047 Cmp.addUse(CondDef->getOperand(3).getReg());
4048 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
4052 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
4054 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
4055 {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
4057 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
4058 I.eraseFromParent();
4062 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4063 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4064 MachineIRBuilder &MIRBuilder) const {
4065 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4066 "Unexpected MachineOperand");
4067 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4068 // We want to find this sort of thing:
4072 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4077 // Helper lambda to detect the subtract followed by the compare.
4078 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4079 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4080 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4083 // Need to make sure NZCV is the same at the end of the transformation.
4084 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4087 // We want to match against SUBs.
4088 if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4091 // Make sure that we're getting
4094 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4095 if (!ValAndVReg || ValAndVReg->Value != 0)
4098 // This can safely be represented as a CMN.
4102 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4103 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4104 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4105 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4106 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4116 if (IsCMN(LHSDef, CC))
4117 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4119 // Same idea here, but with the RHS of the compare instead:
4129 if (IsCMN(RHSDef, CC))
4130 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4137 // Produce this if the compare is signed:
4140 if (!isUnsignedICMPPred(P) && LHSDef &&
4141 LHSDef->getOpcode() == TargetOpcode::G_AND) {
4142 // Make sure that the RHS is 0.
4143 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4144 if (!ValAndVReg || ValAndVReg->Value != 0)
4147 return emitTST(LHSDef->getOperand(1).getReg(),
4148 LHSDef->getOperand(2).getReg(), MIRBuilder);
4154 MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
4155 MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
4156 MachineIRBuilder &MIB) const {
4157 // Attempt to select the immediate form of an integer compare.
4158 MachineRegisterInfo &MRI = *MIB.getMRI();
4159 auto Ty = MRI.getType(LHS.getReg());
4160 assert(!Ty.isVector() && "Expected scalar or pointer only?");
4161 unsigned Size = Ty.getSizeInBits();
4162 assert((Size == 32 || Size == 64) &&
4163 "Expected 32 bit or 64 bit compare only?");
4165 // Check if this is a case we can already handle.
4166 InstructionSelector::ComplexRendererFns ImmFns;
4167 ImmFns = selectArithImmed(RHS);
4170 // We didn't get a rendering function, but we may still have a constant.
4171 auto MaybeImmed = getImmedFromMO(RHS);
4175 // We have a constant, but it doesn't fit. Try adjusting it by one and
4176 // updating the predicate if possible.
4177 uint64_t C = *MaybeImmed;
4178 CmpInst::Predicate NewP;
4182 case CmpInst::ICMP_SLT:
4183 case CmpInst::ICMP_SGE:
4186 // x slt c => x sle c - 1
4187 // x sge c => x sgt c - 1
4189 // When c is not the smallest possible negative number.
4190 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
4191 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
4193 NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
4196 case CmpInst::ICMP_ULT:
4197 case CmpInst::ICMP_UGE:
4200 // x ult c => x ule c - 1
4201 // x uge c => x ugt c - 1
4203 // When c is not zero.
4206 NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
4209 case CmpInst::ICMP_SLE:
4210 case CmpInst::ICMP_SGT:
4213 // x sle c => x slt c + 1
4214 // x sgt c => s sge c + 1
4216 // When c is not the largest possible signed integer.
4217 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
4218 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
4220 NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
4223 case CmpInst::ICMP_ULE:
4224 case CmpInst::ICMP_UGT:
4227 // x ule c => x ult c + 1
4228 // x ugt c => s uge c + 1
4230 // When c is not the largest possible unsigned integer.
4231 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
4232 (Size == 64 && C == UINT64_MAX))
4234 NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
4239 // Check if the new constant is valid.
4241 C = static_cast<uint32_t>(C);
4242 ImmFns = select12BitValueWithLeftShift(C);
4248 // At this point, we know we can select an immediate form. Go ahead and do
4253 ZReg = AArch64::WZR;
4254 Opc = AArch64::SUBSWri;
4256 ZReg = AArch64::XZR;
4257 Opc = AArch64::SUBSXri;
4260 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4261 for (auto &RenderFn : *ImmFns)
4263 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4267 MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
4268 MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
4269 // We are looking for the following pattern:
4271 // shift = G_SHL/ASHR/LHSR y, c
4273 // cmp = G_ICMP pred, something, shift
4275 // Since we will select the G_ICMP to a SUBS, we can potentially fold the
4276 // shift into the subtract.
4277 static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
4278 static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
4279 auto ImmFns = selectShiftedRegister(RHS);
4282 MachineRegisterInfo &MRI = *MIB.getMRI();
4283 auto Ty = MRI.getType(LHS.getReg());
4284 assert(!Ty.isVector() && "Expected scalar or pointer only?");
4285 unsigned Size = Ty.getSizeInBits();
4286 bool Idx = (Size == 64);
4287 Register ZReg = ZRegTable[Idx];
4288 unsigned Opc = OpcTable[Idx];
4289 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4290 for (auto &RenderFn : *ImmFns)
4292 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4296 bool AArch64InstructionSelector::selectShuffleVector(
4297 MachineInstr &I, MachineRegisterInfo &MRI) const {
4298 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4299 Register Src1Reg = I.getOperand(1).getReg();
4300 const LLT Src1Ty = MRI.getType(Src1Reg);
4301 Register Src2Reg = I.getOperand(2).getReg();
4302 const LLT Src2Ty = MRI.getType(Src2Reg);
4303 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4305 MachineBasicBlock &MBB = *I.getParent();
4306 MachineFunction &MF = *MBB.getParent();
4307 LLVMContext &Ctx = MF.getFunction().getContext();
4309 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4310 // it's originated from a <1 x T> type. Those should have been lowered into
4311 // G_BUILD_VECTOR earlier.
4312 if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4313 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4317 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4319 SmallVector<Constant *, 64> CstIdxs;
4320 for (int Val : Mask) {
4321 // For now, any undef indexes we'll just assume to be 0. This should be
4322 // optimized in future, e.g. to select DUP etc.
4323 Val = Val < 0 ? 0 : Val;
4324 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4325 unsigned Offset = Byte + Val * BytesPerElt;
4326 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4330 MachineIRBuilder MIRBuilder(I);
4332 // Use a constant pool to load the index vector for TBL.
4333 Constant *CPVal = ConstantVector::get(CstIdxs);
4334 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4336 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4340 if (DstTy.getSizeInBits() != 128) {
4341 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4342 // This case can be done with TBL1.
4343 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4345 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4349 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4351 emitScalarToVector(64, &AArch64::FPR128RegClass,
4352 IndexLoad->getOperand(0).getReg(), MIRBuilder);
4354 auto TBL1 = MIRBuilder.buildInstr(
4355 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4356 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4357 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4361 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4362 .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4363 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4364 I.eraseFromParent();
4368 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4369 // Q registers for regalloc.
4370 auto RegSeq = MIRBuilder
4371 .buildInstr(TargetOpcode::REG_SEQUENCE,
4372 {&AArch64::QQRegClass}, {Src1Reg})
4373 .addImm(AArch64::qsub0)
4375 .addImm(AArch64::qsub1);
4377 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4378 {RegSeq, IndexLoad->getOperand(0)});
4379 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4380 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4381 I.eraseFromParent();
4385 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4386 Optional<Register> DstReg, Register SrcReg, Register EltReg,
4387 unsigned LaneIdx, const RegisterBank &RB,
4388 MachineIRBuilder &MIRBuilder) const {
4389 MachineInstr *InsElt = nullptr;
4390 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4391 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4393 // Create a register to define with the insert if one wasn't passed in.
4395 DstReg = MRI.createVirtualRegister(DstRC);
4397 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4398 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4400 if (RB.getID() == AArch64::FPRRegBankID) {
4401 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4402 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4404 .addUse(InsSub->getOperand(0).getReg())
4407 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4412 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4416 bool AArch64InstructionSelector::selectInsertElt(
4417 MachineInstr &I, MachineRegisterInfo &MRI) const {
4418 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4420 // Get information on the destination.
4421 Register DstReg = I.getOperand(0).getReg();
4422 const LLT DstTy = MRI.getType(DstReg);
4423 unsigned VecSize = DstTy.getSizeInBits();
4425 // Get information on the element we want to insert into the destination.
4426 Register EltReg = I.getOperand(2).getReg();
4427 const LLT EltTy = MRI.getType(EltReg);
4428 unsigned EltSize = EltTy.getSizeInBits();
4429 if (EltSize < 16 || EltSize > 64)
4430 return false; // Don't support all element types yet.
4432 // Find the definition of the index. Bail out if it's not defined by a
4434 Register IdxReg = I.getOperand(3).getReg();
4435 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4438 unsigned LaneIdx = VRegAndVal->Value;
4440 // Perform the lane insert.
4441 Register SrcReg = I.getOperand(1).getReg();
4442 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4443 MachineIRBuilder MIRBuilder(I);
4445 if (VecSize < 128) {
4446 // If the vector we're inserting into is smaller than 128 bits, widen it
4447 // to 128 to do the insert.
4448 MachineInstr *ScalarToVec = emitScalarToVector(
4449 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4452 SrcReg = ScalarToVec->getOperand(0).getReg();
4455 // Create an insert into a new FPR128 register.
4456 // Note that if our vector is already 128 bits, we end up emitting an extra
4458 MachineInstr *InsMI =
4459 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4461 if (VecSize < 128) {
4462 // If we had to widen to perform the insert, then we have to demote back to
4463 // the original size to get the result we want.
4464 Register DemoteVec = InsMI->getOperand(0).getReg();
4465 const TargetRegisterClass *RC =
4466 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4467 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4468 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4471 unsigned SubReg = 0;
4472 if (!getSubRegForClass(RC, TRI, SubReg))
4474 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4475 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4479 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4480 .addReg(DemoteVec, 0, SubReg);
4481 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4483 // No widening needed.
4484 InsMI->getOperand(0).setReg(DstReg);
4485 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4488 I.eraseFromParent();
4492 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4493 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4494 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4495 assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!");
4496 if (DstTy.getSizeInBits() < 32)
4498 // Check if we're building a constant vector, in which case we want to
4499 // generate a constant pool load instead of a vector insert sequence.
4500 SmallVector<Constant *, 16> Csts;
4501 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4502 // Try to find G_CONSTANT or G_FCONSTANT
4504 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4507 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4508 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4509 I.getOperand(Idx).getReg(), MRI)))
4511 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4515 Constant *CV = ConstantVector::get(Csts);
4516 MachineIRBuilder MIB(I);
4517 auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4519 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
4522 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4523 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4524 *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4526 I.eraseFromParent();
4530 bool AArch64InstructionSelector::selectBuildVector(
4531 MachineInstr &I, MachineRegisterInfo &MRI) const {
4532 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4533 // Until we port more of the optimized selections, for now just use a vector
4535 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4536 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4537 unsigned EltSize = EltTy.getSizeInBits();
4539 if (tryOptConstantBuildVec(I, DstTy, MRI))
4541 if (EltSize < 16 || EltSize > 64)
4542 return false; // Don't support all element types yet.
4543 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4544 MachineIRBuilder MIRBuilder(I);
4546 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4547 MachineInstr *ScalarToVec =
4548 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4549 I.getOperand(1).getReg(), MIRBuilder);
4553 Register DstVec = ScalarToVec->getOperand(0).getReg();
4554 unsigned DstSize = DstTy.getSizeInBits();
4556 // Keep track of the last MI we inserted. Later on, we might be able to save
4558 MachineInstr *PrevMI = nullptr;
4559 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4560 // Note that if we don't do a subregister copy, we can end up making an
4562 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4564 DstVec = PrevMI->getOperand(0).getReg();
4567 // If DstTy's size in bits is less than 128, then emit a subregister copy
4568 // from DstVec to the last register we've defined.
4569 if (DstSize < 128) {
4570 // Force this to be FPR using the destination vector.
4571 const TargetRegisterClass *RC =
4572 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4575 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4576 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4580 unsigned SubReg = 0;
4581 if (!getSubRegForClass(RC, TRI, SubReg))
4583 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4584 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4589 Register Reg = MRI.createVirtualRegister(RC);
4590 Register DstReg = I.getOperand(0).getReg();
4592 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4593 .addReg(DstVec, 0, SubReg);
4594 MachineOperand &RegOp = I.getOperand(1);
4596 RBI.constrainGenericRegister(DstReg, *RC, MRI);
4598 // We don't need a subregister copy. Save a copy by re-using the
4599 // destination register on the final insert.
4600 assert(PrevMI && "PrevMI was null?");
4601 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4602 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4605 I.eraseFromParent();
4609 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4610 /// ID if it exists, and 0 otherwise.
4611 static unsigned findIntrinsicID(MachineInstr &I) {
4612 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4613 return Op.isIntrinsicID();
4615 if (IntrinOp == I.operands_end())
4617 return IntrinOp->getIntrinsicID();
4620 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4621 MachineInstr &I, MachineRegisterInfo &MRI) const {
4622 // Find the intrinsic ID.
4623 unsigned IntrinID = findIntrinsicID(I);
4626 MachineIRBuilder MIRBuilder(I);
4628 // Select the instruction.
4632 case Intrinsic::trap:
4633 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4635 case Intrinsic::debugtrap:
4636 if (!STI.isTargetWindows())
4638 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4642 I.eraseFromParent();
4646 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4647 MachineRegisterInfo &MRI) {
4648 unsigned IntrinID = findIntrinsicID(I);
4651 MachineIRBuilder MIRBuilder(I);
4656 case Intrinsic::aarch64_crypto_sha1h: {
4657 Register DstReg = I.getOperand(0).getReg();
4658 Register SrcReg = I.getOperand(2).getReg();
4660 // FIXME: Should this be an assert?
4661 if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4662 MRI.getType(SrcReg).getSizeInBits() != 32)
4665 // The operation has to happen on FPRs. Set up some new FPR registers for
4666 // the source and destination if they are on GPRs.
4667 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4668 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4669 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4671 // Make sure the copy ends up getting constrained properly.
4672 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4673 AArch64::GPR32RegClass, MRI);
4676 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4677 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4679 // Actually insert the instruction.
4680 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4681 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4683 // Did we create a new register for the destination?
4684 if (DstReg != I.getOperand(0).getReg()) {
4685 // Yep. Copy the result of the instruction back into the original
4687 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4688 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4689 AArch64::GPR32RegClass, MRI);
4692 I.eraseFromParent();
4695 case Intrinsic::frameaddress:
4696 case Intrinsic::returnaddress: {
4697 MachineFunction &MF = *I.getParent()->getParent();
4698 MachineFrameInfo &MFI = MF.getFrameInfo();
4700 unsigned Depth = I.getOperand(2).getImm();
4701 Register DstReg = I.getOperand(0).getReg();
4702 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4704 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4706 MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
4707 I.eraseFromParent();
4710 MFI.setReturnAddressIsTaken(true);
4711 MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
4712 // Insert the copy from LR/X30 into the entry block, before it can be
4713 // clobbered by anything.
4714 MachineBasicBlock &EntryBlock = *MF.begin();
4715 if (!EntryBlock.isLiveIn(AArch64::LR))
4716 EntryBlock.addLiveIn(AArch64::LR);
4717 MachineIRBuilder EntryBuilder(MF);
4718 EntryBuilder.setInstr(*EntryBlock.begin());
4719 EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4720 MFReturnAddr = DstReg;
4721 I.eraseFromParent();
4725 MFI.setFrameAddressIsTaken(true);
4726 Register FrameAddr(AArch64::FP);
4728 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4730 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4732 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4733 FrameAddr = NextFrame;
4736 if (IntrinID == Intrinsic::frameaddress)
4737 MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4739 MFI.setReturnAddressIsTaken(true);
4740 MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
4743 I.eraseFromParent();
4750 InstructionSelector::ComplexRendererFns
4751 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4752 auto MaybeImmed = getImmedFromMO(Root);
4753 if (MaybeImmed == None || *MaybeImmed > 31)
4755 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4756 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4759 InstructionSelector::ComplexRendererFns
4760 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4761 auto MaybeImmed = getImmedFromMO(Root);
4762 if (MaybeImmed == None || *MaybeImmed > 31)
4764 uint64_t Enc = 31 - *MaybeImmed;
4765 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4768 InstructionSelector::ComplexRendererFns
4769 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4770 auto MaybeImmed = getImmedFromMO(Root);
4771 if (MaybeImmed == None || *MaybeImmed > 63)
4773 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4774 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4777 InstructionSelector::ComplexRendererFns
4778 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4779 auto MaybeImmed = getImmedFromMO(Root);
4780 if (MaybeImmed == None || *MaybeImmed > 63)
4782 uint64_t Enc = 63 - *MaybeImmed;
4783 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4786 /// Helper to select an immediate value that can be represented as a 12-bit
4787 /// value shifted left by either 0 or 12. If it is possible to do so, return
4788 /// the immediate and shift value. If not, return None.
4790 /// Used by selectArithImmed and selectNegArithImmed.
4791 InstructionSelector::ComplexRendererFns
4792 AArch64InstructionSelector::select12BitValueWithLeftShift(
4793 uint64_t Immed) const {
4795 if (Immed >> 12 == 0) {
4797 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4799 Immed = Immed >> 12;
4803 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4805 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4806 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4810 /// SelectArithImmed - Select an immediate value that can be represented as
4811 /// a 12-bit value shifted left by either 0 or 12. If so, return true with
4812 /// Val set to the 12-bit value and Shift set to the shifter operand.
4813 InstructionSelector::ComplexRendererFns
4814 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4815 // This function is called from the addsub_shifted_imm ComplexPattern,
4816 // which lists [imm] as the list of opcode it's interested in, however
4817 // we still need to check whether the operand is actually an immediate
4818 // here because the ComplexPattern opcode list is only used in
4819 // root-level opcode matching.
4820 auto MaybeImmed = getImmedFromMO(Root);
4821 if (MaybeImmed == None)
4823 return select12BitValueWithLeftShift(*MaybeImmed);
4826 /// SelectNegArithImmed - As above, but negates the value before trying to
4828 InstructionSelector::ComplexRendererFns
4829 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4830 // We need a register here, because we need to know if we have a 64 or 32
4834 auto MaybeImmed = getImmedFromMO(Root);
4835 if (MaybeImmed == None)
4837 uint64_t Immed = *MaybeImmed;
4839 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4840 // have the opposite effect on the C flag, so this pattern mustn't match under
4841 // those circumstances.
4845 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4847 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4848 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4849 Immed = ~((uint32_t)Immed) + 1;
4851 Immed = ~Immed + 1ULL;
4853 if (Immed & 0xFFFFFFFFFF000000ULL)
4856 Immed &= 0xFFFFFFULL;
4857 return select12BitValueWithLeftShift(Immed);
4860 /// Return true if it is worth folding MI into an extended register. That is,
4861 /// if it's safe to pull it into the addressing mode of a load or store as a
4863 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4864 MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4865 // Always fold if there is one use, or if we're optimizing for size.
4866 Register DefReg = MI.getOperand(0).getReg();
4867 if (MRI.hasOneNonDBGUse(DefReg) ||
4868 MI.getParent()->getParent()->getFunction().hasMinSize())
4871 // It's better to avoid folding and recomputing shifts when we don't have a
4873 if (!STI.hasLSLFast())
4876 // We have a fastpath, so folding a shift in and potentially computing it
4877 // many times may be beneficial. Check if this is only used in memory ops.
4878 // If it is, then we should fold.
4879 return all_of(MRI.use_nodbg_instructions(DefReg),
4880 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4883 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
4885 case AArch64_AM::SXTB:
4886 case AArch64_AM::SXTH:
4887 case AArch64_AM::SXTW:
4894 InstructionSelector::ComplexRendererFns
4895 AArch64InstructionSelector::selectExtendedSHL(
4896 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4897 unsigned SizeInBytes, bool WantsExt) const {
4898 assert(Base.isReg() && "Expected base to be a register operand");
4899 assert(Offset.isReg() && "Expected offset to be a register operand");
4901 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4902 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
4906 unsigned OffsetOpc = OffsetInst->getOpcode();
4907 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4910 // Make sure that the memory op is a valid size.
4911 int64_t LegalShiftVal = Log2_32(SizeInBytes);
4912 if (LegalShiftVal == 0)
4914 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4917 // Now, try to find the specific G_CONSTANT. Start by assuming that the
4918 // register we will offset is the LHS, and the register containing the
4919 // constant is the RHS.
4920 Register OffsetReg = OffsetInst->getOperand(1).getReg();
4921 Register ConstantReg = OffsetInst->getOperand(2).getReg();
4922 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4924 // We didn't get a constant on the RHS. If the opcode is a shift, then
4926 if (OffsetOpc == TargetOpcode::G_SHL)
4929 // If we have a G_MUL, we can use either register. Try looking at the RHS.
4930 std::swap(OffsetReg, ConstantReg);
4931 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4936 // The value must fit into 3 bits, and must be positive. Make sure that is
4938 int64_t ImmVal = ValAndVReg->Value;
4940 // Since we're going to pull this into a shift, the constant value must be
4941 // a power of 2. If we got a multiply, then we need to check this.
4942 if (OffsetOpc == TargetOpcode::G_MUL) {
4943 if (!isPowerOf2_32(ImmVal))
4946 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4947 ImmVal = Log2_32(ImmVal);
4950 if ((ImmVal & 0x7) != ImmVal)
4953 // We are only allowed to shift by LegalShiftVal. This shift value is built
4954 // into the instruction, so we can't just use whatever we want.
4955 if (ImmVal != LegalShiftVal)
4958 unsigned SignExtend = 0;
4960 // Check if the offset is defined by an extend.
4961 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
4962 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
4963 if (Ext == AArch64_AM::InvalidShiftExtend)
4966 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
4967 // We only support SXTW for signed extension here.
4968 if (SignExtend && Ext != AArch64_AM::SXTW)
4971 // Need a 32-bit wide register here.
4972 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
4973 OffsetReg = ExtInst->getOperand(1).getReg();
4974 OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
4977 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4978 // offset. Signify that we are shifting by setting the shift flag to 1.
4979 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
4980 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4981 [=](MachineInstrBuilder &MIB) {
4982 // Need to add both immediates here to make sure that they are both
4983 // added to the instruction.
4984 MIB.addImm(SignExtend);
4989 /// This is used for computing addresses like this:
4991 /// ldr x1, [x2, x3, lsl #3]
4993 /// Where x2 is the base register, and x3 is an offset register. The shift-left
4994 /// is a constant value specific to this load instruction. That is, we'll never
4995 /// see anything other than a 3 here (which corresponds to the size of the
4996 /// element being loaded.)
4997 InstructionSelector::ComplexRendererFns
4998 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
4999 MachineOperand &Root, unsigned SizeInBytes) const {
5002 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5004 // We want to find something like this:
5006 // val = G_CONSTANT LegalShiftVal
5007 // shift = G_SHL off_reg val
5008 // ptr = G_PTR_ADD base_reg shift
5011 // And fold it into this addressing mode:
5013 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5015 // Check if we can find the G_PTR_ADD.
5016 MachineInstr *PtrAdd =
5017 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5018 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5021 // Now, try to match an opcode which will match our specific offset.
5022 // We want a G_SHL or a G_MUL.
5023 MachineInstr *OffsetInst =
5024 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5025 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5026 OffsetInst->getOperand(0), SizeInBytes,
5027 /*WantsExt=*/false);
5030 /// This is used for computing addresses like this:
5032 /// ldr x1, [x2, x3]
5034 /// Where x2 is the base register, and x3 is an offset register.
5036 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5037 /// this will do so. Otherwise, it will return None.
5038 InstructionSelector::ComplexRendererFns
5039 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5040 MachineOperand &Root) const {
5041 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5044 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5045 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5048 // If this is used more than once, let's not bother folding.
5049 // TODO: Check if they are memory ops. If they are, then we can still fold
5050 // without having to recompute anything.
5051 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5054 // Base is the GEP's LHS, offset is its RHS.
5055 return {{[=](MachineInstrBuilder &MIB) {
5056 MIB.addUse(Gep->getOperand(1).getReg());
5058 [=](MachineInstrBuilder &MIB) {
5059 MIB.addUse(Gep->getOperand(2).getReg());
5061 [=](MachineInstrBuilder &MIB) {
5062 // Need to add both immediates here to make sure that they are both
5063 // added to the instruction.
5069 /// This is intended to be equivalent to selectAddrModeXRO in
5070 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5071 InstructionSelector::ComplexRendererFns
5072 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5073 unsigned SizeInBytes) const {
5074 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5076 // If we have a constant offset, then we probably don't want to match a
5078 if (isBaseWithConstantOffset(Root, MRI))
5081 // Try to fold shifts into the addressing mode.
5082 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5086 // If that doesn't work, see if it's possible to fold in registers from
5088 return selectAddrModeRegisterOffset(Root);
5091 /// This is used for computing addresses like this:
5093 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5095 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5096 /// extend (which may or may not be signed).
5097 InstructionSelector::ComplexRendererFns
5098 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5099 unsigned SizeInBytes) const {
5100 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5102 MachineInstr *PtrAdd =
5103 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5104 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5107 MachineOperand &LHS = PtrAdd->getOperand(1);
5108 MachineOperand &RHS = PtrAdd->getOperand(2);
5109 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5111 // The first case is the same as selectAddrModeXRO, except we need an extend.
5112 // In this case, we try to find a shift and extend, and fold them into the
5117 // off_reg = G_Z/S/ANYEXT ext_reg
5118 // val = G_CONSTANT LegalShiftVal
5119 // shift = G_SHL off_reg val
5120 // ptr = G_PTR_ADD base_reg shift
5123 // In this case we can get a load like this:
5125 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5126 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5127 SizeInBytes, /*WantsExt=*/true);
5131 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5134 // ldr something, [base_reg, ext_reg, sxtw]
5135 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5138 // Check if this is an extend. We'll get an extend type if it is.
5139 AArch64_AM::ShiftExtendType Ext =
5140 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5141 if (Ext == AArch64_AM::InvalidShiftExtend)
5144 // Need a 32-bit wide register.
5145 MachineIRBuilder MIB(*PtrAdd);
5147 narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
5148 unsigned SignExtend = Ext == AArch64_AM::SXTW;
5150 // Base is LHS, offset is ExtReg.
5151 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5152 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5153 [=](MachineInstrBuilder &MIB) {
5154 MIB.addImm(SignExtend);
5159 /// Select a "register plus unscaled signed 9-bit immediate" address. This
5160 /// should only match when there is an offset that is not valid for a scaled
5161 /// immediate addressing mode. The "Size" argument is the size in bytes of the
5162 /// memory reference, which is needed here to know what is valid for a scaled
5164 InstructionSelector::ComplexRendererFns
5165 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5166 unsigned Size) const {
5167 MachineRegisterInfo &MRI =
5168 Root.getParent()->getParent()->getParent()->getRegInfo();
5173 if (!isBaseWithConstantOffset(Root, MRI))
5176 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5180 MachineOperand &OffImm = RootDef->getOperand(2);
5181 if (!OffImm.isReg())
5183 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5184 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5187 MachineOperand &RHSOp1 = RHS->getOperand(1);
5188 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5190 RHSC = RHSOp1.getCImm()->getSExtValue();
5192 // If the offset is valid as a scaled immediate, don't match here.
5193 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5195 if (RHSC >= -256 && RHSC < 256) {
5196 MachineOperand &Base = RootDef->getOperand(1);
5198 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5199 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5205 InstructionSelector::ComplexRendererFns
5206 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5208 MachineRegisterInfo &MRI) const {
5209 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5211 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5212 if (Adrp.getOpcode() != AArch64::ADRP)
5215 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5216 // TODO: Need to check GV's offset % size if doing offset folding into globals.
5217 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5218 auto GV = Adrp.getOperand(1).getGlobal();
5219 if (GV->isThreadLocal())
5222 auto &MF = *RootDef.getParent()->getParent();
5223 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5226 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5227 MachineIRBuilder MIRBuilder(RootDef);
5228 Register AdrpReg = Adrp.getOperand(0).getReg();
5229 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5230 [=](MachineInstrBuilder &MIB) {
5231 MIB.addGlobalAddress(GV, /* Offset */ 0,
5232 OpFlags | AArch64II::MO_PAGEOFF |
5237 /// Select a "register plus scaled unsigned 12-bit immediate" address. The
5238 /// "Size" argument is the size in bytes of the memory reference, which
5239 /// determines the scale.
5240 InstructionSelector::ComplexRendererFns
5241 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5242 unsigned Size) const {
5243 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5244 MachineRegisterInfo &MRI = MF.getRegInfo();
5249 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5253 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5255 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5256 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5260 CodeModel::Model CM = MF.getTarget().getCodeModel();
5261 // Check if we can fold in the ADD of small code model ADRP + ADD address.
5262 if (CM == CodeModel::Small) {
5263 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5268 if (isBaseWithConstantOffset(Root, MRI)) {
5269 MachineOperand &LHS = RootDef->getOperand(1);
5270 MachineOperand &RHS = RootDef->getOperand(2);
5271 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5272 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5273 if (LHSDef && RHSDef) {
5274 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5275 unsigned Scale = Log2_32(Size);
5276 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5277 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5279 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5280 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5284 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5285 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5291 // Before falling back to our general case, check if the unscaled
5292 // instructions can handle this. If so, that's preferable.
5293 if (selectAddrModeUnscaled(Root, Size).hasValue())
5297 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5298 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5302 /// Given a shift instruction, return the correct shift type for that
5304 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5305 // TODO: Handle AArch64_AM::ROR
5306 switch (MI.getOpcode()) {
5308 return AArch64_AM::InvalidShiftExtend;
5309 case TargetOpcode::G_SHL:
5310 return AArch64_AM::LSL;
5311 case TargetOpcode::G_LSHR:
5312 return AArch64_AM::LSR;
5313 case TargetOpcode::G_ASHR:
5314 return AArch64_AM::ASR;
5318 /// Select a "shifted register" operand. If the value is not shifted, set the
5319 /// shift operand to a default value of "lsl 0".
5321 /// TODO: Allow shifted register to be rotated in logical instructions.
5322 InstructionSelector::ComplexRendererFns
5323 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5326 MachineRegisterInfo &MRI =
5327 Root.getParent()->getParent()->getParent()->getRegInfo();
5329 // Check if the operand is defined by an instruction which corresponds to
5330 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5332 // TODO: Handle AArch64_AM::ROR for logical instructions.
5333 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5336 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5337 if (ShType == AArch64_AM::InvalidShiftExtend)
5339 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5342 // Need an immediate on the RHS.
5343 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5344 auto Immed = getImmedFromMO(ShiftRHS);
5348 // We have something that we can fold. Fold in the shift's LHS and RHS into
5350 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5351 Register ShiftReg = ShiftLHS.getReg();
5353 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5354 unsigned Val = *Immed & (NumBits - 1);
5355 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5357 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5358 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5361 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5362 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5363 unsigned Opc = MI.getOpcode();
5365 // Handle explicit extend instructions first.
5366 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5368 if (Opc == TargetOpcode::G_SEXT)
5369 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5371 Size = MI.getOperand(2).getImm();
5372 assert(Size != 64 && "Extend from 64 bits?");
5375 return AArch64_AM::SXTB;
5377 return AArch64_AM::SXTH;
5379 return AArch64_AM::SXTW;
5381 return AArch64_AM::InvalidShiftExtend;
5385 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5386 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5387 assert(Size != 64 && "Extend from 64 bits?");
5390 return AArch64_AM::UXTB;
5392 return AArch64_AM::UXTH;
5394 return AArch64_AM::UXTW;
5396 return AArch64_AM::InvalidShiftExtend;
5400 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5402 if (Opc != TargetOpcode::G_AND)
5403 return AArch64_AM::InvalidShiftExtend;
5405 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5407 return AArch64_AM::InvalidShiftExtend;
5408 uint64_t AndMask = *MaybeAndMask;
5411 return AArch64_AM::InvalidShiftExtend;
5413 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5415 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5417 return AArch64_AM::UXTW;
5421 Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
5422 Register ExtReg, MachineIRBuilder &MIB) const {
5423 MachineRegisterInfo &MRI = *MIB.getMRI();
5424 if (MRI.getType(ExtReg).getSizeInBits() == 32)
5427 // Insert a copy to move ExtReg to GPR32.
5428 Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5429 auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg});
5431 // Select the copy into a subregister copy.
5432 selectCopy(*Copy, TII, MRI, TRI, RBI);
5433 return Copy.getReg(0);
5436 Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
5437 Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
5438 assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
5439 MachineRegisterInfo &MRI = *MIB.getMRI();
5440 unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
5441 assert(WideSize >= NarrowSize &&
5442 "WideSize cannot be smaller than NarrowSize!");
5444 // If the sizes match, just return the register.
5446 // If NarrowSize is an s1, then we can select it to any size, so we'll treat
5447 // it as a don't care.
5448 if (NarrowSize == WideSize || NarrowSize == 1)
5451 // Now check the register classes.
5452 const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
5453 const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
5454 const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
5455 assert(OrigRC && "Could not determine narrow RC?");
5456 assert(WideRC && "Could not determine wide RC?");
5458 // If the sizes differ, but the register classes are the same, there is no
5459 // need to insert a SUBREG_TO_REG.
5461 // For example, an s8 that's supposed to be a GPR will be selected to either
5462 // a GPR32 or a GPR64 register. Note that this assumes that the s8 will
5463 // always end up on a GPR32.
5464 if (OrigRC == WideRC)
5467 // We have two different register classes. Insert a SUBREG_TO_REG.
5468 unsigned SubReg = 0;
5469 getSubRegForClass(OrigRC, TRI, SubReg);
5470 assert(SubReg && "Couldn't determine subregister?");
5472 // Build the SUBREG_TO_REG and return the new, widened register.
5474 MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
5478 constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
5479 return SubRegToReg.getReg(0);
5482 /// Select an "extended register" operand. This operand folds in an extend
5483 /// followed by an optional left shift.
5484 InstructionSelector::ComplexRendererFns
5485 AArch64InstructionSelector::selectArithExtendedRegister(
5486 MachineOperand &Root) const {
5489 MachineRegisterInfo &MRI =
5490 Root.getParent()->getParent()->getParent()->getRegInfo();
5492 uint64_t ShiftVal = 0;
5494 AArch64_AM::ShiftExtendType Ext;
5495 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
5499 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
5502 // Check if we can fold a shift and an extend.
5503 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
5504 // Look for a constant on the RHS of the shift.
5505 MachineOperand &RHS = RootDef->getOperand(2);
5506 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
5509 ShiftVal = *MaybeShiftVal;
5512 // Look for a valid extend instruction on the LHS of the shift.
5513 MachineOperand &LHS = RootDef->getOperand(1);
5514 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5517 Ext = getExtendTypeForInst(*ExtDef, MRI);
5518 if (Ext == AArch64_AM::InvalidShiftExtend)
5520 ExtReg = ExtDef->getOperand(1).getReg();
5522 // Didn't get a shift. Try just folding an extend.
5523 Ext = getExtendTypeForInst(*RootDef, MRI);
5524 if (Ext == AArch64_AM::InvalidShiftExtend)
5526 ExtReg = RootDef->getOperand(1).getReg();
5528 // If we have a 32 bit instruction which zeroes out the high half of a
5529 // register, we get an implicit zero extend for free. Check if we have one.
5530 // FIXME: We actually emit the extend right now even though we don't have
5532 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
5533 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
5534 if (ExtInst && isDef32(*ExtInst))
5539 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
5541 MachineIRBuilder MIB(*RootDef);
5542 ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB);
5544 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5545 [=](MachineInstrBuilder &MIB) {
5546 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
5550 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
5551 const MachineInstr &MI,
5553 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5554 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5555 "Expected G_CONSTANT");
5556 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
5557 assert(CstVal && "Expected constant value");
5558 MIB.addImm(CstVal.getValue());
5561 void AArch64InstructionSelector::renderLogicalImm32(
5562 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5563 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5564 "Expected G_CONSTANT");
5565 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5566 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
5570 void AArch64InstructionSelector::renderLogicalImm64(
5571 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5572 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5573 "Expected G_CONSTANT");
5574 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5575 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
5579 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
5580 const MachineInstr &MI, unsigned NumBytes) const {
5581 if (!MI.mayLoadOrStore())
5583 assert(MI.hasOneMemOperand() &&
5584 "Expected load/store to have only one mem op!");
5585 return (*MI.memoperands_begin())->getSize() == NumBytes;
5588 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
5589 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5590 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
5593 // Only return true if we know the operation will zero-out the high half of
5594 // the 64-bit register. Truncates can be subregister copies, which don't
5595 // zero out the high bits. Copies and other copy-like instructions can be
5596 // fed by truncates, or could be lowered as subregister copies.
5597 switch (MI.getOpcode()) {
5600 case TargetOpcode::COPY:
5601 case TargetOpcode::G_BITCAST:
5602 case TargetOpcode::G_TRUNC:
5603 case TargetOpcode::G_PHI:
5609 // Perform fixups on the given PHI instruction's operands to force them all
5610 // to be the same as the destination regbank.
5611 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5612 const AArch64RegisterBankInfo &RBI) {
5613 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5614 Register DstReg = MI.getOperand(0).getReg();
5615 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5616 assert(DstRB && "Expected PHI dst to have regbank assigned");
5617 MachineIRBuilder MIB(MI);
5619 // Go through each operand and ensure it has the same regbank.
5620 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5621 MachineOperand &MO = MI.getOperand(OpIdx);
5624 Register OpReg = MO.getReg();
5625 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5627 // Insert a cross-bank copy.
5628 auto *OpDef = MRI.getVRegDef(OpReg);
5629 const LLT &Ty = MRI.getType(OpReg);
5630 MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5631 auto Copy = MIB.buildCopy(Ty, OpReg);
5632 MRI.setRegBank(Copy.getReg(0), *DstRB);
5633 MO.setReg(Copy.getReg(0));
5638 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5639 // We're looking for PHIs, build a list so we don't invalidate iterators.
5640 MachineRegisterInfo &MRI = MF.getRegInfo();
5641 SmallVector<MachineInstr *, 32> Phis;
5642 for (auto &BB : MF) {
5643 for (auto &MI : BB) {
5644 if (MI.getOpcode() == TargetOpcode::G_PHI)
5645 Phis.emplace_back(&MI);
5649 for (auto *MI : Phis) {
5650 // We need to do some work here if the operand types are < 16 bit and they
5651 // are split across fpr/gpr banks. Since all types <32b on gpr
5652 // end up being assigned gpr32 regclasses, we can end up with PHIs here
5653 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5654 // be selecting heterogenous regbanks for operands if possible, but we
5655 // still need to be able to deal with it here.
5657 // To fix this, if we have a gpr-bank operand < 32b in size and at least
5658 // one other operand is on the fpr bank, then we add cross-bank copies
5659 // to homogenize the operand banks. For simplicity the bank that we choose
5660 // to settle on is whatever bank the def operand has. For example:
5663 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5667 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5670 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5671 bool HasGPROp = false, HasFPROp = false;
5672 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5673 const auto &MO = MI->getOperand(OpIdx);
5676 const LLT &Ty = MRI.getType(MO.getReg());
5677 if (!Ty.isValid() || !Ty.isScalar())
5679 if (Ty.getSizeInBits() >= 32)
5681 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
5682 // If for some reason we don't have a regbank yet. Don't try anything.
5686 if (RB->getID() == AArch64::GPRRegBankID)
5691 // We have heterogenous regbanks, need to fixup.
5692 if (HasGPROp && HasFPROp)
5693 fixupPHIOpBanks(*MI, MRI, RBI);
5698 InstructionSelector *
5699 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
5700 AArch64Subtarget &Subtarget,
5701 AArch64RegisterBankInfo &RBI) {
5702 return new AArch64InstructionSelector(TM, Subtarget, RBI);