1 //==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains a pass that expands pseudo instructions into target
11 // instructions to allow proper scheduling and other late optimizations. This
12 // pass should be run after register allocation but before the post-regalloc
15 //===----------------------------------------------------------------------===//
17 #include "AArch64InstrInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/Support/MathExtras.h"
27 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
30 class AArch64ExpandPseudo : public MachineFunctionPass {
33 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
34 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
37 const AArch64InstrInfo *TII;
39 bool runOnMachineFunction(MachineFunction &Fn) override;
41 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
44 bool expandMBB(MachineBasicBlock &MBB);
45 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
46 MachineBasicBlock::iterator &NextMBBI);
47 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
50 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
51 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
52 unsigned ExtendImm, unsigned ZeroReg,
53 MachineBasicBlock::iterator &NextMBBI);
54 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
55 MachineBasicBlock::iterator MBBI,
56 MachineBasicBlock::iterator &NextMBBI);
58 char AArch64ExpandPseudo::ID = 0;
61 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
62 AARCH64_EXPAND_PSEUDO_NAME, false, false)
64 /// \brief Transfer implicit operands on the pseudo instruction to the
65 /// instructions created from the expansion.
66 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
67 MachineInstrBuilder &DefMI) {
68 const MCInstrDesc &Desc = OldMI.getDesc();
69 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
71 const MachineOperand &MO = OldMI.getOperand(i);
72 assert(MO.isReg() && MO.getReg());
80 /// \brief Helper function which extracts the specified 16-bit chunk from a
82 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
83 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
85 return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
88 /// \brief Helper function which replicates a 16-bit chunk within a 64-bit
89 /// value. Indices correspond to element numbers in a v4i16.
90 static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
91 assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!");
92 const unsigned ShiftAmt = ToIdx * 16;
94 // Replicate the source chunk to the destination position.
95 const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt;
96 // Clear the destination chunk.
97 Imm &= ~(0xFFFFLL << ShiftAmt);
98 // Insert the replicated chunk.
102 /// \brief Helper function which tries to materialize a 64-bit value with an
103 /// ORR + MOVK instruction sequence.
104 static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
105 MachineBasicBlock &MBB,
106 MachineBasicBlock::iterator &MBBI,
107 const AArch64InstrInfo *TII, unsigned ChunkIdx) {
108 assert(ChunkIdx < 4 && "Out of range chunk index specified!");
109 const unsigned ShiftAmt = ChunkIdx * 16;
112 if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
113 // Create the ORR-immediate instruction.
114 MachineInstrBuilder MIB =
115 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
116 .add(MI.getOperand(0))
117 .addReg(AArch64::XZR)
120 // Create the MOVK instruction.
121 const unsigned Imm16 = getChunk(UImm, ChunkIdx);
122 const unsigned DstReg = MI.getOperand(0).getReg();
123 const bool DstIsDead = MI.getOperand(0).isDead();
124 MachineInstrBuilder MIB1 =
125 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
126 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
129 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
131 transferImpOps(MI, MIB, MIB1);
132 MI.eraseFromParent();
139 /// \brief Check whether the given 16-bit chunk replicated to full 64-bit width
140 /// can be materialized with an ORR instruction.
141 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
142 Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
144 return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
147 /// \brief Check for identical 16-bit chunks within the constant and if so
148 /// materialize them with a single ORR instruction. The remaining one or two
149 /// 16-bit chunks will be materialized with MOVK instructions.
151 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
152 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
153 /// an ORR instruction.
155 static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
156 MachineBasicBlock &MBB,
157 MachineBasicBlock::iterator &MBBI,
158 const AArch64InstrInfo *TII) {
159 typedef DenseMap<uint64_t, unsigned> CountMap;
162 // Scan the constant and count how often every chunk occurs.
163 for (unsigned Idx = 0; Idx < 4; ++Idx)
164 ++Counts[getChunk(UImm, Idx)];
166 // Traverse the chunks to find one which occurs more than once.
167 for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
168 Chunk != End; ++Chunk) {
169 const uint64_t ChunkVal = Chunk->first;
170 const unsigned Count = Chunk->second;
172 uint64_t Encoding = 0;
174 // We are looking for chunks which have two or three instances and can be
175 // materialized with an ORR instruction.
176 if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
179 const bool CountThree = Count == 3;
180 // Create the ORR-immediate instruction.
181 MachineInstrBuilder MIB =
182 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
183 .add(MI.getOperand(0))
184 .addReg(AArch64::XZR)
187 const unsigned DstReg = MI.getOperand(0).getReg();
188 const bool DstIsDead = MI.getOperand(0).isDead();
190 unsigned ShiftAmt = 0;
192 // Find the first chunk not materialized with the ORR instruction.
193 for (; ShiftAmt < 64; ShiftAmt += 16) {
194 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
196 if (Imm16 != ChunkVal)
200 // Create the first MOVK instruction.
201 MachineInstrBuilder MIB1 =
202 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
204 RegState::Define | getDeadRegState(DstIsDead && CountThree))
207 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
209 // In case we have three instances the whole constant is now materialized
212 transferImpOps(MI, MIB, MIB1);
213 MI.eraseFromParent();
217 // Find the remaining chunk which needs to be materialized.
218 for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
219 Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
221 if (Imm16 != ChunkVal)
225 // Create the second MOVK instruction.
226 MachineInstrBuilder MIB2 =
227 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
228 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
231 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
233 transferImpOps(MI, MIB, MIB2);
234 MI.eraseFromParent();
241 /// \brief Check whether this chunk matches the pattern '1...0...'. This pattern
242 /// starts a contiguous sequence of ones if we look at the bits from the LSB
244 static bool isStartChunk(uint64_t Chunk) {
245 if (Chunk == 0 || Chunk == UINT64_MAX)
248 return isMask_64(~Chunk);
251 /// \brief Check whether this chunk matches the pattern '0...1...' This pattern
252 /// ends a contiguous sequence of ones if we look at the bits from the LSB
254 static bool isEndChunk(uint64_t Chunk) {
255 if (Chunk == 0 || Chunk == UINT64_MAX)
258 return isMask_64(Chunk);
261 /// \brief Clear or set all bits in the chunk at the given index.
262 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
263 const uint64_t Mask = 0xFFFF;
266 // Clear chunk in the immediate.
267 Imm &= ~(Mask << (Idx * 16));
269 // Set all bits in the immediate for the particular chunk.
270 Imm |= Mask << (Idx * 16);
275 /// \brief Check whether the constant contains a sequence of contiguous ones,
276 /// which might be interrupted by one or two chunks. If so, materialize the
277 /// sequence of contiguous ones with an ORR instruction.
278 /// Materialize the chunks which are either interrupting the sequence or outside
279 /// of the sequence with a MOVK instruction.
281 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
282 /// which ends the sequence (0...1...). Then we are looking for constants which
283 /// contain at least one S and E chunk.
284 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
286 /// We are also looking for constants like |S|A|B|E| where the contiguous
287 /// sequence of ones wraps around the MSB into the LSB.
289 static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
290 MachineBasicBlock &MBB,
291 MachineBasicBlock::iterator &MBBI,
292 const AArch64InstrInfo *TII) {
293 const int NotSet = -1;
294 const uint64_t Mask = 0xFFFF;
296 int StartIdx = NotSet;
298 // Try to find the chunks which start/end a contiguous sequence of ones.
299 for (int Idx = 0; Idx < 4; ++Idx) {
300 int64_t Chunk = getChunk(UImm, Idx);
301 // Sign extend the 16-bit chunk to 64-bit.
302 Chunk = (Chunk << 48) >> 48;
304 if (isStartChunk(Chunk))
306 else if (isEndChunk(Chunk))
310 // Early exit in case we can't find a start/end chunk.
311 if (StartIdx == NotSet || EndIdx == NotSet)
314 // Outside of the contiguous sequence of ones everything needs to be zero.
315 uint64_t Outside = 0;
316 // Chunks between the start and end chunk need to have all their bits set.
317 uint64_t Inside = Mask;
319 // If our contiguous sequence of ones wraps around from the MSB into the LSB,
320 // just swap indices and pretend we are materializing a contiguous sequence
321 // of zeros surrounded by a contiguous sequence of ones.
322 if (StartIdx > EndIdx) {
323 std::swap(StartIdx, EndIdx);
324 std::swap(Outside, Inside);
327 uint64_t OrrImm = UImm;
328 int FirstMovkIdx = NotSet;
329 int SecondMovkIdx = NotSet;
331 // Find out which chunks we need to patch up to obtain a contiguous sequence
333 for (int Idx = 0; Idx < 4; ++Idx) {
334 const uint64_t Chunk = getChunk(UImm, Idx);
336 // Check whether we are looking at a chunk which is not part of the
337 // contiguous sequence of ones.
338 if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
339 OrrImm = updateImm(OrrImm, Idx, Outside == 0);
341 // Remember the index we need to patch.
342 if (FirstMovkIdx == NotSet)
347 // Check whether we are looking a chunk which is part of the contiguous
349 } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
350 OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
352 // Remember the index we need to patch.
353 if (FirstMovkIdx == NotSet)
359 assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
361 // Create the ORR-immediate instruction.
362 uint64_t Encoding = 0;
363 AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
364 MachineInstrBuilder MIB =
365 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
366 .add(MI.getOperand(0))
367 .addReg(AArch64::XZR)
370 const unsigned DstReg = MI.getOperand(0).getReg();
371 const bool DstIsDead = MI.getOperand(0).isDead();
373 const bool SingleMovk = SecondMovkIdx == NotSet;
374 // Create the first MOVK instruction.
375 MachineInstrBuilder MIB1 =
376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
378 RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
380 .addImm(getChunk(UImm, FirstMovkIdx))
382 AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
384 // Early exit in case we only need to emit a single MOVK instruction.
386 transferImpOps(MI, MIB, MIB1);
387 MI.eraseFromParent();
391 // Create the second MOVK instruction.
392 MachineInstrBuilder MIB2 =
393 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
394 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
396 .addImm(getChunk(UImm, SecondMovkIdx))
398 AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
400 transferImpOps(MI, MIB, MIB2);
401 MI.eraseFromParent();
405 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
406 /// real move-immediate instructions to synthesize the immediate.
407 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
408 MachineBasicBlock::iterator MBBI,
410 MachineInstr &MI = *MBBI;
411 unsigned DstReg = MI.getOperand(0).getReg();
412 uint64_t Imm = MI.getOperand(1).getImm();
413 const unsigned Mask = 0xFFFF;
415 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
416 // Useless def, and we don't want to risk creating an invalid ORR (which
417 // would really write to sp).
418 MI.eraseFromParent();
422 // Try a MOVI instruction (aka ORR-immediate with the zero register).
423 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
425 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
426 unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
427 MachineInstrBuilder MIB =
428 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
429 .add(MI.getOperand(0))
430 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
432 transferImpOps(MI, MIB, MIB);
433 MI.eraseFromParent();
437 // Scan the immediate and count the number of 16-bit chunks which are either
438 // all ones or all zeros.
439 unsigned OneChunks = 0;
440 unsigned ZeroChunks = 0;
441 for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
442 const unsigned Chunk = (Imm >> Shift) & Mask;
449 // Since we can't materialize the constant with a single ORR instruction,
450 // let's see whether we can materialize 3/4 of the constant with an ORR
451 // instruction and use an additional MOVK instruction to materialize the
454 // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|.
456 // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR,
457 // we would create the following instruction sequence:
459 // ORR x0, xzr, |A|X|A|X|
460 // MOVK x0, |B|, LSL #16
462 // Only look at 64-bit constants which can't be materialized with a single
463 // instruction e.g. which have less than either three all zero or all one
466 // Ignore 32-bit constants here, they always can be materialized with a
467 // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized
468 // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair.
469 // Thus we fall back to the default code below which in the best case creates
470 // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one).
472 if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) {
473 // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2
475 if (getChunk(UImm, 0) == getChunk(UImm, 2)) {
476 // See if we can come up with a constant which can be materialized with
477 // ORR-immediate by replicating element 3 into element 1.
478 uint64_t OrrImm = replicateChunk(UImm, 3, 1);
479 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1))
482 // See if we can come up with a constant which can be materialized with
483 // ORR-immediate by replicating element 1 into element 3.
484 OrrImm = replicateChunk(UImm, 1, 3);
485 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3))
488 // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3
490 } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) {
491 // See if we can come up with a constant which can be materialized with
492 // ORR-immediate by replicating element 2 into element 0.
493 uint64_t OrrImm = replicateChunk(UImm, 2, 0);
494 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0))
497 // See if we can come up with a constant which can be materialized with
498 // ORR-immediate by replicating element 1 into element 3.
499 OrrImm = replicateChunk(UImm, 0, 2);
500 if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2))
505 // Check for identical 16-bit chunks within the constant and if so materialize
506 // them with a single ORR instruction. The remaining one or two 16-bit chunks
507 // will be materialized with MOVK instructions.
508 if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII))
511 // Check whether the constant contains a sequence of contiguous ones, which
512 // might be interrupted by one or two chunks. If so, materialize the sequence
513 // of contiguous ones with an ORR instruction. Materialize the chunks which
514 // are either interrupting the sequence or outside of the sequence with a
516 if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII))
519 // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
520 // more MOVK instructions to insert additional 16-bit portions into the
524 // Use MOVN to materialize the high bits if we have more all one chunks
525 // than all zero chunks.
526 if (OneChunks > ZeroChunks) {
533 Imm &= (1LL << 32) - 1;
534 FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
536 FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
538 unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
539 unsigned LastShift = 0; // LSL amount for last MOVK
541 unsigned LZ = countLeadingZeros(Imm);
542 unsigned TZ = countTrailingZeros(Imm);
543 Shift = (TZ / 16) * 16;
544 LastShift = ((63 - LZ) / 16) * 16;
546 unsigned Imm16 = (Imm >> Shift) & Mask;
547 bool DstIsDead = MI.getOperand(0).isDead();
548 MachineInstrBuilder MIB1 =
549 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc))
550 .addReg(DstReg, RegState::Define |
551 getDeadRegState(DstIsDead && Shift == LastShift))
553 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
555 // If a MOVN was used for the high bits of a negative value, flip the rest
556 // of the bits back for use with MOVK.
560 if (Shift == LastShift) {
561 transferImpOps(MI, MIB1, MIB1);
562 MI.eraseFromParent();
566 MachineInstrBuilder MIB2;
567 unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
568 while (Shift < LastShift) {
570 Imm16 = (Imm >> Shift) & Mask;
571 if (Imm16 == (isNeg ? Mask : 0))
572 continue; // This 16-bit portion is already set correctly.
573 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
576 getDeadRegState(DstIsDead && Shift == LastShift))
579 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
582 transferImpOps(MI, MIB1, MIB2);
583 MI.eraseFromParent();
587 bool AArch64ExpandPseudo::expandCMP_SWAP(
588 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
589 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
590 MachineBasicBlock::iterator &NextMBBI) {
591 MachineInstr &MI = *MBBI;
592 DebugLoc DL = MI.getDebugLoc();
593 const MachineOperand &Dest = MI.getOperand(0);
594 unsigned StatusReg = MI.getOperand(1).getReg();
595 bool StatusDead = MI.getOperand(1).isDead();
596 // Duplicating undef operands into 2 instructions does not guarantee the same
597 // value on both; However undef should be replaced by xzr anyway.
598 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
599 unsigned AddrReg = MI.getOperand(2).getReg();
600 unsigned DesiredReg = MI.getOperand(3).getReg();
601 unsigned NewReg = MI.getOperand(4).getReg();
603 MachineFunction *MF = MBB.getParent();
604 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
605 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
606 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
608 MF->insert(++MBB.getIterator(), LoadCmpBB);
609 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
610 MF->insert(++StoreBB->getIterator(), DoneBB);
614 // ldaxr xDest, [xAddr]
615 // cmp xDest, xDesired
618 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
619 .addImm(0).addImm(0);
620 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
622 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
623 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
626 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
627 .addImm(AArch64CC::NE)
629 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
630 LoadCmpBB->addSuccessor(DoneBB);
631 LoadCmpBB->addSuccessor(StoreBB);
634 // stlxr wStatus, xNew, [xAddr]
635 // cbnz wStatus, .Lloadcmp
636 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
639 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
640 .addReg(StatusReg, getKillRegState(StatusDead))
642 StoreBB->addSuccessor(LoadCmpBB);
643 StoreBB->addSuccessor(DoneBB);
645 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
646 DoneBB->transferSuccessors(&MBB);
648 MBB.addSuccessor(LoadCmpBB);
650 NextMBBI = MBB.end();
651 MI.eraseFromParent();
653 // Recompute livein lists.
654 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
655 LivePhysRegs LiveRegs;
656 computeLiveIns(LiveRegs, MRI, *DoneBB);
657 computeLiveIns(LiveRegs, MRI, *StoreBB);
658 computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
659 // Do an extra pass around the loop to get loop carried registers right.
660 StoreBB->clearLiveIns();
661 computeLiveIns(LiveRegs, MRI, *StoreBB);
662 LoadCmpBB->clearLiveIns();
663 computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
668 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
669 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
670 MachineBasicBlock::iterator &NextMBBI) {
672 MachineInstr &MI = *MBBI;
673 DebugLoc DL = MI.getDebugLoc();
674 MachineOperand &DestLo = MI.getOperand(0);
675 MachineOperand &DestHi = MI.getOperand(1);
676 unsigned StatusReg = MI.getOperand(2).getReg();
677 bool StatusDead = MI.getOperand(2).isDead();
678 // Duplicating undef operands into 2 instructions does not guarantee the same
679 // value on both; However undef should be replaced by xzr anyway.
680 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
681 unsigned AddrReg = MI.getOperand(3).getReg();
682 unsigned DesiredLoReg = MI.getOperand(4).getReg();
683 unsigned DesiredHiReg = MI.getOperand(5).getReg();
684 unsigned NewLoReg = MI.getOperand(6).getReg();
685 unsigned NewHiReg = MI.getOperand(7).getReg();
687 MachineFunction *MF = MBB.getParent();
688 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
689 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
690 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
692 MF->insert(++MBB.getIterator(), LoadCmpBB);
693 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
694 MF->insert(++StoreBB->getIterator(), DoneBB);
697 // ldaxp xDestLo, xDestHi, [xAddr]
698 // cmp xDestLo, xDesiredLo
699 // sbcs xDestHi, xDesiredHi
701 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
702 .addReg(DestLo.getReg(), RegState::Define)
703 .addReg(DestHi.getReg(), RegState::Define)
705 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
706 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
707 .addReg(DesiredLoReg)
709 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
710 .addUse(AArch64::WZR)
711 .addUse(AArch64::WZR)
712 .addImm(AArch64CC::EQ);
713 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
714 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
715 .addReg(DesiredHiReg)
717 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
718 .addUse(StatusReg, RegState::Kill)
719 .addUse(StatusReg, RegState::Kill)
720 .addImm(AArch64CC::EQ);
721 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
722 .addUse(StatusReg, getKillRegState(StatusDead))
724 LoadCmpBB->addSuccessor(DoneBB);
725 LoadCmpBB->addSuccessor(StoreBB);
728 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
729 // cbnz wStatus, .Lloadcmp
730 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
734 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
735 .addReg(StatusReg, getKillRegState(StatusDead))
737 StoreBB->addSuccessor(LoadCmpBB);
738 StoreBB->addSuccessor(DoneBB);
740 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
741 DoneBB->transferSuccessors(&MBB);
743 MBB.addSuccessor(LoadCmpBB);
745 NextMBBI = MBB.end();
746 MI.eraseFromParent();
748 // Recompute liveness bottom up.
749 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
750 LivePhysRegs LiveRegs;
751 computeLiveIns(LiveRegs, MRI, *DoneBB);
752 computeLiveIns(LiveRegs, MRI, *StoreBB);
753 computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
754 // Do an extra pass in the loop to get the loop carried dependencies right.
755 StoreBB->clearLiveIns();
756 computeLiveIns(LiveRegs, MRI, *StoreBB);
757 LoadCmpBB->clearLiveIns();
758 computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
763 /// \brief If MBBI references a pseudo instruction that should be expanded here,
764 /// do the expansion and return true. Otherwise return false.
765 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
766 MachineBasicBlock::iterator MBBI,
767 MachineBasicBlock::iterator &NextMBBI) {
768 MachineInstr &MI = *MBBI;
769 unsigned Opcode = MI.getOpcode();
774 case AArch64::ADDWrr:
775 case AArch64::SUBWrr:
776 case AArch64::ADDXrr:
777 case AArch64::SUBXrr:
778 case AArch64::ADDSWrr:
779 case AArch64::SUBSWrr:
780 case AArch64::ADDSXrr:
781 case AArch64::SUBSXrr:
782 case AArch64::ANDWrr:
783 case AArch64::ANDXrr:
784 case AArch64::BICWrr:
785 case AArch64::BICXrr:
786 case AArch64::ANDSWrr:
787 case AArch64::ANDSXrr:
788 case AArch64::BICSWrr:
789 case AArch64::BICSXrr:
790 case AArch64::EONWrr:
791 case AArch64::EONXrr:
792 case AArch64::EORWrr:
793 case AArch64::EORXrr:
794 case AArch64::ORNWrr:
795 case AArch64::ORNXrr:
796 case AArch64::ORRWrr:
797 case AArch64::ORRXrr: {
799 switch (MI.getOpcode()) {
802 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
803 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
804 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
805 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
806 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
807 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
808 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
809 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
810 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
811 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
812 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
813 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
814 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
815 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
816 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
817 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
818 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
819 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
820 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
821 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
822 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
823 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
824 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
825 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
827 MachineInstrBuilder MIB1 =
828 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
829 MI.getOperand(0).getReg())
830 .add(MI.getOperand(1))
831 .add(MI.getOperand(2))
832 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
833 transferImpOps(MI, MIB1, MIB1);
834 MI.eraseFromParent();
838 case AArch64::LOADgot: {
839 // Expand into ADRP + LDR.
840 unsigned DstReg = MI.getOperand(0).getReg();
841 const MachineOperand &MO1 = MI.getOperand(1);
842 unsigned Flags = MO1.getTargetFlags();
843 MachineInstrBuilder MIB1 =
844 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
845 MachineInstrBuilder MIB2 =
846 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
847 .add(MI.getOperand(0))
850 if (MO1.isGlobal()) {
851 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
852 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
853 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
854 } else if (MO1.isSymbol()) {
855 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
856 MIB2.addExternalSymbol(MO1.getSymbolName(),
857 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
859 assert(MO1.isCPI() &&
860 "Only expect globals, externalsymbols, or constant pools");
861 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
862 Flags | AArch64II::MO_PAGE);
863 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
864 Flags | AArch64II::MO_PAGEOFF |
868 transferImpOps(MI, MIB1, MIB2);
869 MI.eraseFromParent();
873 case AArch64::MOVaddr:
874 case AArch64::MOVaddrJT:
875 case AArch64::MOVaddrCP:
876 case AArch64::MOVaddrBA:
877 case AArch64::MOVaddrTLS:
878 case AArch64::MOVaddrEXT: {
879 // Expand into ADRP + ADD.
880 unsigned DstReg = MI.getOperand(0).getReg();
881 MachineInstrBuilder MIB1 =
882 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
883 .add(MI.getOperand(1));
885 MachineInstrBuilder MIB2 =
886 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
887 .add(MI.getOperand(0))
889 .add(MI.getOperand(2))
892 transferImpOps(MI, MIB1, MIB2);
893 MI.eraseFromParent();
896 case AArch64::MOVbaseTLS: {
897 unsigned DstReg = MI.getOperand(0).getReg();
898 auto SysReg = AArch64SysReg::TPIDR_EL0;
899 MachineFunction *MF = MBB.getParent();
900 if (MF->getTarget().getTargetTriple().isOSFuchsia() &&
901 MF->getTarget().getCodeModel() == CodeModel::Kernel)
902 SysReg = AArch64SysReg::TPIDR_EL1;
903 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
905 MI.eraseFromParent();
909 case AArch64::MOVi32imm:
910 return expandMOVImm(MBB, MBBI, 32);
911 case AArch64::MOVi64imm:
912 return expandMOVImm(MBB, MBBI, 64);
913 case AArch64::RET_ReallyLR: {
914 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
915 // function and missing live-ins. We are fine in practice because callee
916 // saved register handling ensures the register value is restored before
917 // RET, but we need the undef flag here to appease the MachineVerifier
919 MachineInstrBuilder MIB =
920 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
921 .addReg(AArch64::LR, RegState::Undef);
922 transferImpOps(MI, MIB, MIB);
923 MI.eraseFromParent();
926 case AArch64::CMP_SWAP_8:
927 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
929 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
930 AArch64::WZR, NextMBBI);
931 case AArch64::CMP_SWAP_16:
932 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
934 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
935 AArch64::WZR, NextMBBI);
936 case AArch64::CMP_SWAP_32:
937 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
939 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
940 AArch64::WZR, NextMBBI);
941 case AArch64::CMP_SWAP_64:
942 return expandCMP_SWAP(MBB, MBBI,
943 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
944 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
945 AArch64::XZR, NextMBBI);
946 case AArch64::CMP_SWAP_128:
947 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
953 /// \brief Iterate over the instructions in basic block MBB and expand any
954 /// pseudo instructions. Return true if anything was modified.
955 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
956 bool Modified = false;
958 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
960 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
961 Modified |= expandMI(MBB, MBBI, NMBBI);
968 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
969 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
971 bool Modified = false;
973 Modified |= expandMBB(MBB);
977 /// \brief Returns an instance of the pseudo instruction expansion pass.
978 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
979 return new AArch64ExpandPseudo();