1 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that expands pseudo instructions into target
11 // instructions to allow proper scheduling and other late optimizations. This
12 // pass should be run after register allocation but before the post-regalloc
15 //===----------------------------------------------------------------------===//
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/Support/MathExtras.h"
27 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
30 class AArch64ExpandPseudo : public MachineFunctionPass {
33 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
34 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
37 const AArch64InstrInfo *TII;
39 bool runOnMachineFunction(MachineFunction &Fn) override;
41 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
44 bool expandMBB(MachineBasicBlock &MBB);
45 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
46 MachineBasicBlock::iterator &NextMBBI);
47 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
50 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
51 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
52 unsigned ExtendImm, unsigned ZeroReg,
53 MachineBasicBlock::iterator &NextMBBI);
54 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
55 MachineBasicBlock::iterator MBBI,
56 MachineBasicBlock::iterator &NextMBBI);
58 char AArch64ExpandPseudo::ID = 0;
61 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
62 AARCH64_EXPAND_PSEUDO_NAME, false, false)
64 /// \brief Transfer implicit operands on the pseudo instruction to the
65 /// instructions created from the expansion.
66 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
67 MachineInstrBuilder &DefMI) {
68 const MCInstrDesc &Desc = OldMI.getDesc();
69 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
71 const MachineOperand &MO = OldMI.getOperand(i);
72 assert(MO.isReg() && MO.getReg());
80 /// \brief Helper function which extracts the specified 16-bit chunk from a
82 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
83 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
85 return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
88 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit
89 /// value. Indices correspond to element numbers in a v4i16.
90 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
91 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
92 const unsigned ShiftAmt = ToIdx * 16;
94 // Replicate the source chunk to the destination position.
95 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
96 // Clear the destination chunk.
97 Imm &= ~(0xFFFFLL << ShiftAmt);
98 // Insert the replicated chunk.
102 /// \brief Helper function which tries to materialize a 64-bit value with an
103 /// ORR + MOVK instruction sequence.
104 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
105 MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator &MBBI,
107 const AArch64InstrInfo *TII, unsigned ChunkIdx) {
108 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
109 const unsigned ShiftAmt = ChunkIdx * 16;
112 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
113 // Create the ORR-immediate instruction.
114 MachineInstrBuilder MIB =
115 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
116 .add(MI.getOperand(0))
117 .addReg(AArch64::XZR)
120 // Create the MOVK instruction.
121 const unsigned Imm16 = getChunk(UImm, ChunkIdx);
122 const unsigned DstReg = MI.getOperand(0).getReg();
123 const bool DstIsDead = MI.getOperand(0).isDead();
124 MachineInstrBuilder MIB1 =
125 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
126 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
129 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
131 transferImpOps(MI, MIB, MIB1);
132 MI.eraseFromParent();
139 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
140 /// can be materialized with an ORR instruction.
141 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
142 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
144 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
147 /// \brief Check for identical 16-bit chunks within the constant and if so
148 /// materialize them with a single ORR instruction. The remaining one or two
149 /// 16-bit chunks will be materialized with MOVK instructions.
151 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
152 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
153 /// an ORR instruction.
155 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
156 MachineBasicBlock &MBB,
157 MachineBasicBlock::iterator &MBBI,
158 const AArch64InstrInfo *TII) {
159 typedef DenseMap<uint64_t, unsigned> CountMap;
162 // Scan the constant and count how often every chunk occurs.
163 for (unsigned Idx = 0; Idx < 4; ++Idx)
164 ++Counts[getChunk(UImm, Idx)];
166 // Traverse the chunks to find one which occurs more than once.
167 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
168 Chunk != End; ++Chunk) {
169 const uint64_t ChunkVal = Chunk->first;
170 const unsigned Count = Chunk->second;
172 uint64_t Encoding = 0;
174 // We are looking for chunks which have two or three instances and can be
175 // materialized with an ORR instruction.
176 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
179 const bool CountThree = Count == 3;
180 // Create the ORR-immediate instruction.
181 MachineInstrBuilder MIB =
182 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
183 .add(MI.getOperand(0))
184 .addReg(AArch64::XZR)
187 const unsigned DstReg = MI.getOperand(0).getReg();
188 const bool DstIsDead = MI.getOperand(0).isDead();
190 unsigned ShiftAmt = 0;
192 // Find the first chunk not materialized with the ORR instruction.
193 for (; ShiftAmt < 64; ShiftAmt += 16) {
194 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
196 if (Imm16 != ChunkVal)
200 // Create the first MOVK instruction.
201 MachineInstrBuilder MIB1 =
202 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
204 RegState::Define | getDeadRegState(DstIsDead && CountThree))
207 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
209 // In case we have three instances the whole constant is now materialized
212 transferImpOps(MI, MIB, MIB1);
213 MI.eraseFromParent();
217 // Find the remaining chunk which needs to be materialized.
218 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
219 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
221 if (Imm16 != ChunkVal)
225 // Create the second MOVK instruction.
226 MachineInstrBuilder MIB2 =
227 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
228 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
231 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
233 transferImpOps(MI, MIB, MIB2);
234 MI.eraseFromParent();
241 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
242 /// starts a contiguous sequence of ones if we look at the bits from the LSB
244 static bool isStartChunk(uint64_t Chunk) {
245 if (Chunk == 0 || Chunk == UINT64_MAX)
248 return isMask_64(~Chunk);
251 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern
252 /// ends a contiguous sequence of ones if we look at the bits from the LSB
254 static bool isEndChunk(uint64_t Chunk) {
255 if (Chunk == 0 || Chunk == UINT64_MAX)
258 return isMask_64(Chunk);
261 /// \brief Clear or set all bits in the chunk at the given index.
262 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
263 const uint64_t Mask = 0xFFFF;
266 // Clear chunk in the immediate.
267 Imm &= ~(Mask << (Idx * 16));
269 // Set all bits in the immediate for the particular chunk.
270 Imm |= Mask << (Idx * 16);
275 /// \brief Check whether the constant contains a sequence of contiguous ones,
276 /// which might be interrupted by one or two chunks. If so, materialize the
277 /// sequence of contiguous ones with an ORR instruction.
278 /// Materialize the chunks which are either interrupting the sequence or outside
279 /// of the sequence with a MOVK instruction.
281 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
282 /// which ends the sequence (0...1...). Then we are looking for constants which
283 /// contain at least one S and E chunk.
284 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
286 /// We are also looking for constants like |S|A|B|E| where the contiguous
287 /// sequence of ones wraps around the MSB into the LSB.
289 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
290 MachineBasicBlock &MBB,
291 MachineBasicBlock::iterator &MBBI,
292 const AArch64InstrInfo *TII) {
293 const int NotSet = -1;
294 const uint64_t Mask = 0xFFFF;
296 int StartIdx = NotSet;
298 // Try to find the chunks which start/end a contiguous sequence of ones.
299 for (int Idx = 0; Idx < 4; ++Idx) {
300 int64_t Chunk = getChunk(UImm, Idx);
301 // Sign extend the 16-bit chunk to 64-bit.
302 Chunk = (Chunk << 48) >> 48;
304 if (isStartChunk(Chunk))
306 else if (isEndChunk(Chunk))
310 // Early exit in case we can't find a start/end chunk.
311 if (StartIdx == NotSet || EndIdx == NotSet)
314 // Outside of the contiguous sequence of ones everything needs to be zero.
315 uint64_t Outside = 0;
316 // Chunks between the start and end chunk need to have all their bits set.
317 uint64_t Inside = Mask;
319 // If our contiguous sequence of ones wraps around from the MSB into the LSB,
320 // just swap indices and pretend we are materializing a contiguous sequence
321 // of zeros surrounded by a contiguous sequence of ones.
322 if (StartIdx > EndIdx) {
323 std::swap(StartIdx, EndIdx);
324 std::swap(Outside, Inside);
327 uint64_t OrrImm = UImm;
328 int FirstMovkIdx = NotSet;
329 int SecondMovkIdx = NotSet;
331 // Find out which chunks we need to patch up to obtain a contiguous sequence
333 for (int Idx = 0; Idx < 4; ++Idx) {
334 const uint64_t Chunk = getChunk(UImm, Idx);
336 // Check whether we are looking at a chunk which is not part of the
337 // contiguous sequence of ones.
338 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
339 OrrImm = updateImm(OrrImm, Idx, Outside == 0);
341 // Remember the index we need to patch.
342 if (FirstMovkIdx == NotSet)
347 // Check whether we are looking a chunk which is part of the contiguous
349 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
350 OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
352 // Remember the index we need to patch.
353 if (FirstMovkIdx == NotSet)
359 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
361 // Create the ORR-immediate instruction.
362 uint64_t Encoding = 0;
363 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
364 MachineInstrBuilder MIB =
365 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
366 .add(MI.getOperand(0))
367 .addReg(AArch64::XZR)
370 const unsigned DstReg = MI.getOperand(0).getReg();
371 const bool DstIsDead = MI.getOperand(0).isDead();
373 const bool SingleMovk = SecondMovkIdx == NotSet;
374 // Create the first MOVK instruction.
375 MachineInstrBuilder MIB1 =
376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
378 RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
380 .addImm(getChunk(UImm, FirstMovkIdx))
382 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
384 // Early exit in case we only need to emit a single MOVK instruction.
386 transferImpOps(MI, MIB, MIB1);
387 MI.eraseFromParent();
391 // Create the second MOVK instruction.
392 MachineInstrBuilder MIB2 =
393 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
394 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
396 .addImm(getChunk(UImm, SecondMovkIdx))
398 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
400 transferImpOps(MI, MIB, MIB2);
401 MI.eraseFromParent();
405 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
406 /// real move-immediate instructions to synthesize the immediate.
407 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator MBBI,
410 MachineInstr &MI = *MBBI;
411 unsigned DstReg = MI.getOperand(0).getReg();
412 uint64_t Imm = MI.getOperand(1).getImm();
413 const unsigned Mask = 0xFFFF;
415 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
416 // Useless def, and we don't want to risk creating an invalid ORR (which
417 // would really write to sp).
418 MI.eraseFromParent();
422 // Try a MOVI instruction (aka ORR-immediate with the zero register).
423 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
425 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
426 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
427 MachineInstrBuilder MIB =
428 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
429 .add(MI.getOperand(0))
430 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
432 transferImpOps(MI, MIB, MIB);
433 MI.eraseFromParent();
437 // Scan the immediate and count the number of 16-bit chunks which are either
438 // all ones or all zeros.
439 unsigned OneChunks = 0;
440 unsigned ZeroChunks = 0;
441 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
442 const unsigned Chunk = (Imm >> Shift) & Mask;
449 // Since we can't materialize the constant with a single ORR instruction,
450 // let's see whether we can materialize 3/4 of the constant with an ORR
451 // instruction and use an additional MOVK instruction to materialize the
454 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
456 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
457 // we would create the following instruction sequence:
459 // ORR x0, xzr, |A|X|A|X|
460 // MOVK x0, |B|, LSL #16
462 // Only look at 64-bit constants which can't be materialized with a single
463 // instruction e.g. which have less than either three all zero or all one
466 // Ignore 32-bit constants here, they always can be materialized with a
467 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
468 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
469 // Thus we fall back to the default code below which in the best case creates
470 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
472 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
473 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
475 if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
476 // See if we can come up with a constant which can be materialized with
477 // ORR-immediate by replicating element 3 into element 1.
478 uint64_t OrrImm = replicateChunk(UImm, 3, 1);
479 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
482 // See if we can come up with a constant which can be materialized with
483 // ORR-immediate by replicating element 1 into element 3.
484 OrrImm = replicateChunk(UImm, 1, 3);
485 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
488 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
490 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
491 // See if we can come up with a constant which can be materialized with
492 // ORR-immediate by replicating element 2 into element 0.
493 uint64_t OrrImm = replicateChunk(UImm, 2, 0);
494 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
497 // See if we can come up with a constant which can be materialized with
498 // ORR-immediate by replicating element 1 into element 3.
499 OrrImm = replicateChunk(UImm, 0, 2);
500 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
505 // Check for identical 16-bit chunks within the constant and if so materialize
506 // them with a single ORR instruction. The remaining one or two 16-bit chunks
507 // will be materialized with MOVK instructions.
508 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
511 // Check whether the constant contains a sequence of contiguous ones, which
512 // might be interrupted by one or two chunks. If so, materialize the sequence
513 // of contiguous ones with an ORR instruction. Materialize the chunks which
514 // are either interrupting the sequence or outside of the sequence with a
516 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
519 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
520 // more MOVK instructions to insert additional 16-bit portions into the
524 // Use MOVN to materialize the high bits if we have more all one chunks
525 // than all zero chunks.
526 if (OneChunks > ZeroChunks) {
533 Imm &= (1LL << 32) - 1;
534 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
536 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
538 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
539 unsigned LastShift = 0; // LSL amount for last MOVK
541 unsigned LZ = countLeadingZeros(Imm);
542 unsigned TZ = countTrailingZeros(Imm);
543 Shift = (TZ / 16) * 16;
544 LastShift = ((63 - LZ) / 16) * 16;
546 unsigned Imm16 = (Imm >> Shift) & Mask;
547 bool DstIsDead = MI.getOperand(0).isDead();
548 MachineInstrBuilder MIB1 =
549 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
550 .addReg(DstReg, RegState::Define |
551 getDeadRegState(DstIsDead && Shift == LastShift))
553 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
555 // If a MOVN was used for the high bits of a negative value, flip the rest
556 // of the bits back for use with MOVK.
560 if (Shift == LastShift) {
561 transferImpOps(MI, MIB1, MIB1);
562 MI.eraseFromParent();
566 MachineInstrBuilder MIB2;
567 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
568 while (Shift < LastShift) {
570 Imm16 = (Imm >> Shift) & Mask;
571 if (Imm16 == (isNeg ? Mask : 0))
572 continue; // This 16-bit portion is already set correctly.
573 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
576 getDeadRegState(DstIsDead && Shift == LastShift))
579 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
582 transferImpOps(MI, MIB1, MIB2);
583 MI.eraseFromParent();
587 static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
588 for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
592 bool AArch64ExpandPseudo::expandCMP_SWAP(
593 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
594 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
595 MachineBasicBlock::iterator &NextMBBI) {
596 MachineInstr &MI = *MBBI;
597 DebugLoc DL = MI.getDebugLoc();
598 MachineOperand &Dest = MI.getOperand(0);
599 unsigned StatusReg = MI.getOperand(1).getReg();
600 MachineOperand &Addr = MI.getOperand(2);
601 MachineOperand &Desired = MI.getOperand(3);
602 MachineOperand &New = MI.getOperand(4);
604 LivePhysRegs LiveRegs(&TII->getRegisterInfo());
605 LiveRegs.addLiveOuts(MBB);
606 for (auto I = std::prev(MBB.end()); I != MBBI; --I)
607 LiveRegs.stepBackward(*I);
609 MachineFunction *MF = MBB.getParent();
610 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
611 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
612 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
614 MF->insert(++MBB.getIterator(), LoadCmpBB);
615 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
616 MF->insert(++StoreBB->getIterator(), DoneBB);
619 // ldaxr xDest, [xAddr]
620 // cmp xDest, xDesired
622 LoadCmpBB->addLiveIn(Addr.getReg());
623 LoadCmpBB->addLiveIn(Dest.getReg());
624 LoadCmpBB->addLiveIn(Desired.getReg());
625 addPostLoopLiveIns(LoadCmpBB, LiveRegs);
627 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
628 .addReg(Addr.getReg());
629 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
630 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
633 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
634 .addImm(AArch64CC::NE)
636 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
637 LoadCmpBB->addSuccessor(DoneBB);
638 LoadCmpBB->addSuccessor(StoreBB);
641 // stlxr wStatus, xNew, [xAddr]
642 // cbnz wStatus, .Lloadcmp
643 StoreBB->addLiveIn(Addr.getReg());
644 StoreBB->addLiveIn(New.getReg());
645 addPostLoopLiveIns(StoreBB, LiveRegs);
647 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
648 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
649 .addReg(StatusReg, RegState::Kill)
651 StoreBB->addSuccessor(LoadCmpBB);
652 StoreBB->addSuccessor(DoneBB);
654 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
655 DoneBB->transferSuccessors(&MBB);
656 addPostLoopLiveIns(DoneBB, LiveRegs);
658 MBB.addSuccessor(LoadCmpBB);
660 NextMBBI = MBB.end();
661 MI.eraseFromParent();
665 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
666 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
667 MachineBasicBlock::iterator &NextMBBI) {
669 MachineInstr &MI = *MBBI;
670 DebugLoc DL = MI.getDebugLoc();
671 MachineOperand &DestLo = MI.getOperand(0);
672 MachineOperand &DestHi = MI.getOperand(1);
673 unsigned StatusReg = MI.getOperand(2).getReg();
674 MachineOperand &Addr = MI.getOperand(3);
675 MachineOperand &DesiredLo = MI.getOperand(4);
676 MachineOperand &DesiredHi = MI.getOperand(5);
677 MachineOperand &NewLo = MI.getOperand(6);
678 MachineOperand &NewHi = MI.getOperand(7);
680 LivePhysRegs LiveRegs(&TII->getRegisterInfo());
681 LiveRegs.addLiveOuts(MBB);
682 for (auto I = std::prev(MBB.end()); I != MBBI; --I)
683 LiveRegs.stepBackward(*I);
685 MachineFunction *MF = MBB.getParent();
686 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
687 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
688 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
690 MF->insert(++MBB.getIterator(), LoadCmpBB);
691 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
692 MF->insert(++StoreBB->getIterator(), DoneBB);
695 // ldaxp xDestLo, xDestHi, [xAddr]
696 // cmp xDestLo, xDesiredLo
697 // sbcs xDestHi, xDesiredHi
699 LoadCmpBB->addLiveIn(Addr.getReg());
700 LoadCmpBB->addLiveIn(DestLo.getReg());
701 LoadCmpBB->addLiveIn(DestHi.getReg());
702 LoadCmpBB->addLiveIn(DesiredLo.getReg());
703 LoadCmpBB->addLiveIn(DesiredHi.getReg());
704 addPostLoopLiveIns(LoadCmpBB, LiveRegs);
706 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
707 .addReg(DestLo.getReg(), RegState::Define)
708 .addReg(DestHi.getReg(), RegState::Define)
709 .addReg(Addr.getReg());
710 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
711 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
714 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
715 .addUse(AArch64::WZR)
716 .addUse(AArch64::WZR)
717 .addImm(AArch64CC::EQ);
718 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
719 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
722 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
723 .addUse(StatusReg, RegState::Kill)
724 .addUse(StatusReg, RegState::Kill)
725 .addImm(AArch64CC::EQ);
726 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
727 .addUse(StatusReg, RegState::Kill)
729 LoadCmpBB->addSuccessor(DoneBB);
730 LoadCmpBB->addSuccessor(StoreBB);
733 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
734 // cbnz wStatus, .Lloadcmp
735 StoreBB->addLiveIn(Addr.getReg());
736 StoreBB->addLiveIn(NewLo.getReg());
737 StoreBB->addLiveIn(NewHi.getReg());
738 addPostLoopLiveIns(StoreBB, LiveRegs);
739 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
743 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
744 .addReg(StatusReg, RegState::Kill)
746 StoreBB->addSuccessor(LoadCmpBB);
747 StoreBB->addSuccessor(DoneBB);
749 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
750 DoneBB->transferSuccessors(&MBB);
751 addPostLoopLiveIns(DoneBB, LiveRegs);
753 MBB.addSuccessor(LoadCmpBB);
755 NextMBBI = MBB.end();
756 MI.eraseFromParent();
760 /// \brief If MBBI references a pseudo instruction that should be expanded here,
761 /// do the expansion and return true. Otherwise return false.
762 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
763 MachineBasicBlock::iterator MBBI,
764 MachineBasicBlock::iterator &NextMBBI) {
765 MachineInstr &MI = *MBBI;
766 unsigned Opcode = MI.getOpcode();
771 case AArch64::ADDWrr:
772 case AArch64::SUBWrr:
773 case AArch64::ADDXrr:
774 case AArch64::SUBXrr:
775 case AArch64::ADDSWrr:
776 case AArch64::SUBSWrr:
777 case AArch64::ADDSXrr:
778 case AArch64::SUBSXrr:
779 case AArch64::ANDWrr:
780 case AArch64::ANDXrr:
781 case AArch64::BICWrr:
782 case AArch64::BICXrr:
783 case AArch64::ANDSWrr:
784 case AArch64::ANDSXrr:
785 case AArch64::BICSWrr:
786 case AArch64::BICSXrr:
787 case AArch64::EONWrr:
788 case AArch64::EONXrr:
789 case AArch64::EORWrr:
790 case AArch64::EORXrr:
791 case AArch64::ORNWrr:
792 case AArch64::ORNXrr:
793 case AArch64::ORRWrr:
794 case AArch64::ORRXrr: {
796 switch (MI.getOpcode()) {
799 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
800 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
801 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
802 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
803 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
804 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
805 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
806 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
807 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
808 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
809 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
810 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
811 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
812 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
813 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
814 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
815 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
816 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
817 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
818 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
819 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
820 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
821 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
822 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
824 MachineInstrBuilder MIB1 =
825 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
826 MI.getOperand(0).getReg())
827 .add(MI.getOperand(1))
828 .add(MI.getOperand(2))
829 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
830 transferImpOps(MI, MIB1, MIB1);
831 MI.eraseFromParent();
835 case AArch64::LOADgot: {
836 // Expand into ADRP + LDR.
837 unsigned DstReg = MI.getOperand(0).getReg();
838 const MachineOperand &MO1 = MI.getOperand(1);
839 unsigned Flags = MO1.getTargetFlags();
840 MachineInstrBuilder MIB1 =
841 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
842 MachineInstrBuilder MIB2 =
843 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
844 .add(MI.getOperand(0))
847 if (MO1.isGlobal()) {
848 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
849 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
850 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
851 } else if (MO1.isSymbol()) {
852 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
853 MIB2.addExternalSymbol(MO1.getSymbolName(),
854 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
856 assert(MO1.isCPI() &&
857 "Only expect globals, externalsymbols, or constant pools");
858 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
859 Flags | AArch64II::MO_PAGE);
860 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
861 Flags | AArch64II::MO_PAGEOFF |
865 transferImpOps(MI, MIB1, MIB2);
866 MI.eraseFromParent();
870 case AArch64::MOVaddr:
871 case AArch64::MOVaddrJT:
872 case AArch64::MOVaddrCP:
873 case AArch64::MOVaddrBA:
874 case AArch64::MOVaddrTLS:
875 case AArch64::MOVaddrEXT: {
876 // Expand into ADRP + ADD.
877 unsigned DstReg = MI.getOperand(0).getReg();
878 MachineInstrBuilder MIB1 =
879 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
880 .add(MI.getOperand(1));
882 MachineInstrBuilder MIB2 =
883 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
884 .add(MI.getOperand(0))
886 .add(MI.getOperand(2))
889 transferImpOps(MI, MIB1, MIB2);
890 MI.eraseFromParent();
893 case AArch64::MOVbaseTLS: {
894 unsigned DstReg = MI.getOperand(0).getReg();
895 auto SysReg = AArch64SysReg::TPIDR_EL0;
896 MachineFunction *MF = MBB.getParent();
897 if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
898 MF->getTarget().getCodeModel() == CodeModel::Kernel)
899 SysReg = AArch64SysReg::TPIDR_EL1;
900 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
902 MI.eraseFromParent();
906 case AArch64::MOVi32imm:
907 return expandMOVImm(MBB, MBBI, 32);
908 case AArch64::MOVi64imm:
909 return expandMOVImm(MBB, MBBI, 64);
910 case AArch64::RET_ReallyLR: {
911 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
912 // function and missing live-ins. We are fine in practice because callee
913 // saved register handling ensures the register value is restored before
914 // RET, but we need the undef flag here to appease the MachineVerifier
916 MachineInstrBuilder MIB =
917 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
918 .addReg(AArch64::LR, RegState::Undef);
919 transferImpOps(MI, MIB, MIB);
920 MI.eraseFromParent();
923 case AArch64::CMP_SWAP_8:
924 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
926 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
927 AArch64::WZR, NextMBBI);
928 case AArch64::CMP_SWAP_16:
929 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
931 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
932 AArch64::WZR, NextMBBI);
933 case AArch64::CMP_SWAP_32:
934 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
936 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
937 AArch64::WZR, NextMBBI);
938 case AArch64::CMP_SWAP_64:
939 return expandCMP_SWAP(MBB, MBBI,
940 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
941 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
942 AArch64::XZR, NextMBBI);
943 case AArch64::CMP_SWAP_128:
944 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
949 /// \brief Iterate over the instructions in basic block MBB and expand any
950 /// pseudo instructions. Return true if anything was modified.
951 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
952 bool Modified = false;
954 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
956 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
957 Modified |= expandMI(MBB, MBBI, NMBBI);
964 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
965 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
967 bool Modified = false;
969 Modified |= expandMBB(MBB);
973 /// \brief Returns an instance of the pseudo instruction expansion pass.
974 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
975 return new AArch64ExpandPseudo();