1 //===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains code to lower X86 MachineInstrs to their corresponding
13 //===----------------------------------------------------------------------===//
15 #include "InstPrinter/X86ATTInstPrinter.h"
16 #include "InstPrinter/X86InstComments.h"
17 #include "MCTargetDesc/X86BaseInfo.h"
18 #include "MCTargetDesc/X86TargetStreamer.h"
19 #include "Utils/X86ShuffleDecode.h"
20 #include "X86AsmPrinter.h"
21 #include "X86RegisterInfo.h"
22 #include "X86ShuffleDecodeConstantPool.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/GlobalValue.h"
33 #include "llvm/IR/Mangler.h"
34 #include "llvm/MC/MCAsmInfo.h"
35 #include "llvm/MC/MCCodeEmitter.h"
36 #include "llvm/MC/MCContext.h"
37 #include "llvm/MC/MCExpr.h"
38 #include "llvm/MC/MCFixup.h"
39 #include "llvm/MC/MCInst.h"
40 #include "llvm/MC/MCInstBuilder.h"
41 #include "llvm/MC/MCSection.h"
42 #include "llvm/MC/MCSectionELF.h"
43 #include "llvm/MC/MCStreamer.h"
44 #include "llvm/MC/MCSymbol.h"
45 #include "llvm/MC/MCSymbolELF.h"
46 #include "llvm/Target/TargetLoweringObjectFile.h"
52 /// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
53 class X86MCInstLower {
55 const MachineFunction &MF;
56 const TargetMachine &TM;
58 X86AsmPrinter &AsmPrinter;
61 X86MCInstLower(const MachineFunction &MF, X86AsmPrinter &asmprinter);
63 Optional<MCOperand> LowerMachineOperand(const MachineInstr *MI,
64 const MachineOperand &MO) const;
65 void Lower(const MachineInstr *MI, MCInst &OutMI) const;
67 MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
68 MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
71 MachineModuleInfoMachO &getMachOMMI() const;
74 } // end anonymous namespace
76 // Emit a minimal sequence of nops spanning NumBytes bytes.
77 static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
78 const MCSubtargetInfo &STI);
80 void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
81 const MCSubtargetInfo &STI,
82 MCCodeEmitter *CodeEmitter) {
84 SmallString<256> Code;
85 SmallVector<MCFixup, 4> Fixups;
86 raw_svector_ostream VecOS(Code);
87 CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
88 CurrentShadowSize += Code.size();
89 if (CurrentShadowSize >= RequiredShadowSize)
90 InShadow = false; // The shadow is big enough. Stop counting.
94 void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
95 MCStreamer &OutStreamer, const MCSubtargetInfo &STI) {
96 if (InShadow && CurrentShadowSize < RequiredShadowSize) {
98 EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize,
99 MF->getSubtarget<X86Subtarget>().is64Bit(), STI);
103 void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
104 OutStreamer->EmitInstruction(Inst, getSubtargetInfo(),
105 EnablePrintSchedInfo &&
106 !(Inst.getFlags() & X86::NO_SCHED_INFO));
107 SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
110 X86MCInstLower::X86MCInstLower(const MachineFunction &mf,
111 X86AsmPrinter &asmprinter)
112 : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), MAI(*TM.getMCAsmInfo()),
113 AsmPrinter(asmprinter) {}
115 MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
116 return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
119 /// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
120 /// operand to an MCSymbol.
121 MCSymbol *X86MCInstLower::GetSymbolFromOperand(const MachineOperand &MO) const {
122 const DataLayout &DL = MF.getDataLayout();
123 assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) &&
124 "Isn't a symbol reference");
126 MCSymbol *Sym = nullptr;
127 SmallString<128> Name;
130 switch (MO.getTargetFlags()) {
131 case X86II::MO_DLLIMPORT:
132 // Handle dllimport linkage.
135 case X86II::MO_COFFSTUB:
138 case X86II::MO_DARWIN_NONLAZY:
139 case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
140 Suffix = "$non_lazy_ptr";
145 Name += DL.getPrivateGlobalPrefix();
148 const GlobalValue *GV = MO.getGlobal();
149 AsmPrinter.getNameWithPrefix(Name, GV);
150 } else if (MO.isSymbol()) {
151 Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
152 } else if (MO.isMBB()) {
153 assert(Suffix.empty());
154 Sym = MO.getMBB()->getSymbol();
159 Sym = Ctx.getOrCreateSymbol(Name);
161 // If the target flags on the operand changes the name of the symbol, do that
162 // before we return the symbol.
163 switch (MO.getTargetFlags()) {
166 case X86II::MO_COFFSTUB: {
167 MachineModuleInfoCOFF &MMICOFF =
168 MF.getMMI().getObjFileInfo<MachineModuleInfoCOFF>();
169 MachineModuleInfoImpl::StubValueTy &StubSym = MMICOFF.getGVStubEntry(Sym);
170 if (!StubSym.getPointer()) {
171 assert(MO.isGlobal() && "Extern symbol not handled yet");
172 StubSym = MachineModuleInfoImpl::StubValueTy(
173 AsmPrinter.getSymbol(MO.getGlobal()), true);
177 case X86II::MO_DARWIN_NONLAZY:
178 case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
179 MachineModuleInfoImpl::StubValueTy &StubSym =
180 getMachOMMI().getGVStubEntry(Sym);
181 if (!StubSym.getPointer()) {
182 assert(MO.isGlobal() && "Extern symbol not handled yet");
183 StubSym = MachineModuleInfoImpl::StubValueTy(
184 AsmPrinter.getSymbol(MO.getGlobal()),
185 !MO.getGlobal()->hasInternalLinkage());
194 MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
195 MCSymbol *Sym) const {
196 // FIXME: We would like an efficient form for this, so we don't have to do a
197 // lot of extra uniquing.
198 const MCExpr *Expr = nullptr;
199 MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
201 switch (MO.getTargetFlags()) {
203 llvm_unreachable("Unknown target flag on GV operand");
204 case X86II::MO_NO_FLAG: // No flag.
205 // These affect the name of the symbol, not any suffix.
206 case X86II::MO_DARWIN_NONLAZY:
207 case X86II::MO_DLLIMPORT:
208 case X86II::MO_COFFSTUB:
212 RefKind = MCSymbolRefExpr::VK_TLVP;
214 case X86II::MO_TLVP_PIC_BASE:
215 Expr = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
216 // Subtract the pic base.
217 Expr = MCBinaryExpr::createSub(
218 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
220 case X86II::MO_SECREL:
221 RefKind = MCSymbolRefExpr::VK_SECREL;
223 case X86II::MO_TLSGD:
224 RefKind = MCSymbolRefExpr::VK_TLSGD;
226 case X86II::MO_TLSLD:
227 RefKind = MCSymbolRefExpr::VK_TLSLD;
229 case X86II::MO_TLSLDM:
230 RefKind = MCSymbolRefExpr::VK_TLSLDM;
232 case X86II::MO_GOTTPOFF:
233 RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
235 case X86II::MO_INDNTPOFF:
236 RefKind = MCSymbolRefExpr::VK_INDNTPOFF;
238 case X86II::MO_TPOFF:
239 RefKind = MCSymbolRefExpr::VK_TPOFF;
241 case X86II::MO_DTPOFF:
242 RefKind = MCSymbolRefExpr::VK_DTPOFF;
244 case X86II::MO_NTPOFF:
245 RefKind = MCSymbolRefExpr::VK_NTPOFF;
247 case X86II::MO_GOTNTPOFF:
248 RefKind = MCSymbolRefExpr::VK_GOTNTPOFF;
250 case X86II::MO_GOTPCREL:
251 RefKind = MCSymbolRefExpr::VK_GOTPCREL;
254 RefKind = MCSymbolRefExpr::VK_GOT;
256 case X86II::MO_GOTOFF:
257 RefKind = MCSymbolRefExpr::VK_GOTOFF;
260 RefKind = MCSymbolRefExpr::VK_PLT;
263 RefKind = MCSymbolRefExpr::VK_X86_ABS8;
265 case X86II::MO_PIC_BASE_OFFSET:
266 case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
267 Expr = MCSymbolRefExpr::create(Sym, Ctx);
268 // Subtract the pic base.
269 Expr = MCBinaryExpr::createSub(
270 Expr, MCSymbolRefExpr::create(MF.getPICBaseSymbol(), Ctx), Ctx);
272 assert(MAI.doesSetDirectiveSuppressReloc());
273 // If .set directive is supported, use it to reduce the number of
274 // relocations the assembler will generate for differences between
275 // local labels. This is only safe when the symbols are in the same
276 // section so we are restricting it to jumptable references.
277 MCSymbol *Label = Ctx.createTempSymbol();
278 AsmPrinter.OutStreamer->EmitAssignment(Label, Expr);
279 Expr = MCSymbolRefExpr::create(Label, Ctx);
285 Expr = MCSymbolRefExpr::create(Sym, RefKind, Ctx);
287 if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
288 Expr = MCBinaryExpr::createAdd(
289 Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
290 return MCOperand::createExpr(Expr);
293 /// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
294 /// a short fixed-register form.
295 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
296 unsigned ImmOp = Inst.getNumOperands() - 1;
297 assert(Inst.getOperand(0).isReg() &&
298 (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
299 ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
300 Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
301 Inst.getNumOperands() == 2) &&
302 "Unexpected instruction!");
304 // Check whether the destination register can be fixed.
305 unsigned Reg = Inst.getOperand(0).getReg();
306 if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
309 // If so, rewrite the instruction.
310 MCOperand Saved = Inst.getOperand(ImmOp);
312 Inst.setOpcode(Opcode);
313 Inst.addOperand(Saved);
316 /// If a movsx instruction has a shorter encoding for the used register
317 /// simplify the instruction to use it instead.
318 static void SimplifyMOVSX(MCInst &Inst) {
319 unsigned NewOpcode = 0;
320 unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
321 switch (Inst.getOpcode()) {
323 llvm_unreachable("Unexpected instruction!");
324 case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw
325 if (Op0 == X86::AX && Op1 == X86::AL)
326 NewOpcode = X86::CBW;
328 case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl
329 if (Op0 == X86::EAX && Op1 == X86::AX)
330 NewOpcode = X86::CWDE;
332 case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
333 if (Op0 == X86::RAX && Op1 == X86::EAX)
334 NewOpcode = X86::CDQE;
338 if (NewOpcode != 0) {
340 Inst.setOpcode(NewOpcode);
344 /// Simplify things like MOV32rm to MOV32o32a.
345 static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
347 // Don't make these simplifications in 64-bit mode; other assemblers don't
348 // perform them because they make the code larger.
349 if (Printer.getSubtarget().is64Bit())
352 bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
353 unsigned AddrBase = IsStore;
354 unsigned RegOp = IsStore ? 0 : 5;
355 unsigned AddrOp = AddrBase + 3;
357 Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
358 Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
359 Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
360 Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
361 Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
362 (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
363 "Unexpected instruction!");
365 // Check whether the destination register can be fixed.
366 unsigned Reg = Inst.getOperand(RegOp).getReg();
367 if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
370 // Check whether this is an absolute address.
371 // FIXME: We know TLVP symbol refs aren't, but there should be a better way
373 bool Absolute = true;
374 if (Inst.getOperand(AddrOp).isExpr()) {
375 const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
376 if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
377 if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
382 (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
383 Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
384 Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
387 // If so, rewrite the instruction.
388 MCOperand Saved = Inst.getOperand(AddrOp);
389 MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
391 Inst.setOpcode(Opcode);
392 Inst.addOperand(Saved);
393 Inst.addOperand(Seg);
396 static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
397 return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
401 X86MCInstLower::LowerMachineOperand(const MachineInstr *MI,
402 const MachineOperand &MO) const {
403 switch (MO.getType()) {
406 llvm_unreachable("unknown operand type");
407 case MachineOperand::MO_Register:
408 // Ignore all implicit register operands.
411 return MCOperand::createReg(MO.getReg());
412 case MachineOperand::MO_Immediate:
413 return MCOperand::createImm(MO.getImm());
414 case MachineOperand::MO_MachineBasicBlock:
415 case MachineOperand::MO_GlobalAddress:
416 case MachineOperand::MO_ExternalSymbol:
417 return LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
418 case MachineOperand::MO_MCSymbol:
419 return LowerSymbolOperand(MO, MO.getMCSymbol());
420 case MachineOperand::MO_JumpTableIndex:
421 return LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
422 case MachineOperand::MO_ConstantPoolIndex:
423 return LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
424 case MachineOperand::MO_BlockAddress:
425 return LowerSymbolOperand(
426 MO, AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
427 case MachineOperand::MO_RegisterMask:
428 // Ignore call clobbers.
433 void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
434 OutMI.setOpcode(MI->getOpcode());
436 for (const MachineOperand &MO : MI->operands())
437 if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
438 OutMI.addOperand(MaybeMCOp.getValue());
440 // Handle a few special cases to eliminate operand modifiers.
442 switch (OutMI.getOpcode()) {
447 // LEA should have a segment register, but it must be empty.
448 assert(OutMI.getNumOperands() == 1 + X86::AddrNumOperands &&
449 "Unexpected # of LEA operands");
450 assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
451 "LEA has segment specified!");
454 // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
455 // if one of the registers is extended, but other isn't.
456 case X86::VMOVZPQILo2PQIrr:
458 case X86::VMOVAPDYrr:
460 case X86::VMOVAPSYrr:
462 case X86::VMOVDQAYrr:
464 case X86::VMOVDQUYrr:
466 case X86::VMOVUPDYrr:
468 case X86::VMOVUPSYrr: {
469 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
470 X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
472 switch (OutMI.getOpcode()) {
473 default: llvm_unreachable("Invalid opcode");
474 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
475 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
476 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
477 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
478 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
479 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
480 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
481 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
482 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
483 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
484 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
485 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
486 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
488 OutMI.setOpcode(NewOpc);
493 case X86::VMOVSSrr: {
494 if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
495 X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
497 switch (OutMI.getOpcode()) {
498 default: llvm_unreachable("Invalid opcode");
499 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
500 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
502 OutMI.setOpcode(NewOpc);
507 // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
508 // inputs modeled as normal uses instead of implicit uses. As such, truncate
509 // off all but the first operand (the callee). FIXME: Change isel.
510 case X86::TAILJMPr64:
511 case X86::TAILJMPr64_REX:
513 case X86::CALL64pcrel32: {
514 unsigned Opcode = OutMI.getOpcode();
515 MCOperand Saved = OutMI.getOperand(0);
517 OutMI.setOpcode(Opcode);
518 OutMI.addOperand(Saved);
523 case X86::EH_RETURN64: {
525 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
529 case X86::CLEANUPRET: {
530 // Replace CLEANUPRET with the appropriate RET.
532 OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
536 case X86::CATCHRET: {
537 // Replace CATCHRET with the appropriate RET.
538 const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
539 unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
541 OutMI.setOpcode(getRetOpcode(Subtarget));
542 OutMI.addOperand(MCOperand::createReg(ReturnReg));
546 // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
551 Opcode = X86::JMP32r;
552 goto SetTailJmpOpcode;
554 case X86::TAILJMPd64:
556 goto SetTailJmpOpcode;
557 case X86::TAILJMPd_CC:
558 case X86::TAILJMPd64_CC:
559 Opcode = X86::GetCondBranchFromCond(
560 static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
561 goto SetTailJmpOpcode;
564 MCOperand Saved = OutMI.getOperand(0);
566 OutMI.setOpcode(Opcode);
567 OutMI.addOperand(Saved);
575 // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
576 if (!AsmPrinter.getSubtarget().is64Bit()) {
578 switch (OutMI.getOpcode()) {
579 default: llvm_unreachable("Invalid opcode");
580 case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
581 case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
582 case X86::INC16r: Opcode = X86::INC16r_alt; break;
583 case X86::INC32r: Opcode = X86::INC32r_alt; break;
585 OutMI.setOpcode(Opcode);
589 // These are pseudo-ops for OR to help with the OR->ADD transformation. We do
590 // this with an ugly goto in case the resultant OR uses EAX and needs the
592 case X86::ADD16rr_DB: OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
593 case X86::ADD32rr_DB: OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
594 case X86::ADD64rr_DB: OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
595 case X86::ADD16ri_DB: OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
596 case X86::ADD32ri_DB: OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
597 case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
598 case X86::ADD16ri8_DB: OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
599 case X86::ADD32ri8_DB: OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
600 case X86::ADD64ri8_DB: OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
602 // We don't currently select the correct instruction form for instructions
603 // which have a short %eax, etc. form. Handle this by custom lowering, for
606 // Note, we are currently not handling the following instructions:
607 // MOV64ao8, MOV64o8a
608 // XCHG16ar, XCHG32ar, XCHG64ar
609 case X86::MOV8mr_NOREX:
611 case X86::MOV8rm_NOREX:
618 switch (OutMI.getOpcode()) {
619 default: llvm_unreachable("Invalid opcode");
620 case X86::MOV8mr_NOREX:
621 case X86::MOV8mr: NewOpc = X86::MOV8o32a; break;
622 case X86::MOV8rm_NOREX:
623 case X86::MOV8rm: NewOpc = X86::MOV8ao32; break;
624 case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
625 case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
626 case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
627 case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
629 SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
633 case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
634 case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
635 case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
636 case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
637 case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32:
638 case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
639 case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
640 case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
641 case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
643 switch (OutMI.getOpcode()) {
644 default: llvm_unreachable("Invalid opcode");
645 case X86::ADC8ri: NewOpc = X86::ADC8i8; break;
646 case X86::ADC16ri: NewOpc = X86::ADC16i16; break;
647 case X86::ADC32ri: NewOpc = X86::ADC32i32; break;
648 case X86::ADC64ri32: NewOpc = X86::ADC64i32; break;
649 case X86::ADD8ri: NewOpc = X86::ADD8i8; break;
650 case X86::ADD16ri: NewOpc = X86::ADD16i16; break;
651 case X86::ADD32ri: NewOpc = X86::ADD32i32; break;
652 case X86::ADD64ri32: NewOpc = X86::ADD64i32; break;
653 case X86::AND8ri: NewOpc = X86::AND8i8; break;
654 case X86::AND16ri: NewOpc = X86::AND16i16; break;
655 case X86::AND32ri: NewOpc = X86::AND32i32; break;
656 case X86::AND64ri32: NewOpc = X86::AND64i32; break;
657 case X86::CMP8ri: NewOpc = X86::CMP8i8; break;
658 case X86::CMP16ri: NewOpc = X86::CMP16i16; break;
659 case X86::CMP32ri: NewOpc = X86::CMP32i32; break;
660 case X86::CMP64ri32: NewOpc = X86::CMP64i32; break;
661 case X86::OR8ri: NewOpc = X86::OR8i8; break;
662 case X86::OR16ri: NewOpc = X86::OR16i16; break;
663 case X86::OR32ri: NewOpc = X86::OR32i32; break;
664 case X86::OR64ri32: NewOpc = X86::OR64i32; break;
665 case X86::SBB8ri: NewOpc = X86::SBB8i8; break;
666 case X86::SBB16ri: NewOpc = X86::SBB16i16; break;
667 case X86::SBB32ri: NewOpc = X86::SBB32i32; break;
668 case X86::SBB64ri32: NewOpc = X86::SBB64i32; break;
669 case X86::SUB8ri: NewOpc = X86::SUB8i8; break;
670 case X86::SUB16ri: NewOpc = X86::SUB16i16; break;
671 case X86::SUB32ri: NewOpc = X86::SUB32i32; break;
672 case X86::SUB64ri32: NewOpc = X86::SUB64i32; break;
673 case X86::TEST8ri: NewOpc = X86::TEST8i8; break;
674 case X86::TEST16ri: NewOpc = X86::TEST16i16; break;
675 case X86::TEST32ri: NewOpc = X86::TEST32i32; break;
676 case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
677 case X86::XOR8ri: NewOpc = X86::XOR8i8; break;
678 case X86::XOR16ri: NewOpc = X86::XOR16i16; break;
679 case X86::XOR32ri: NewOpc = X86::XOR32i32; break;
680 case X86::XOR64ri32: NewOpc = X86::XOR64i32; break;
682 SimplifyShortImmForm(OutMI, NewOpc);
686 // Try to shrink some forms of movsx.
687 case X86::MOVSX16rr8:
688 case X86::MOVSX32rr16:
689 case X86::MOVSX64rr32:
690 SimplifyMOVSX(OutMI);
695 void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering,
696 const MachineInstr &MI) {
698 bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
699 MI.getOpcode() == X86::TLS_base_addr64;
701 bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
703 MCContext &context = OutStreamer->getContext();
706 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
708 MCSymbolRefExpr::VariantKind SRVK;
709 switch (MI.getOpcode()) {
710 case X86::TLS_addr32:
711 case X86::TLS_addr64:
712 SRVK = MCSymbolRefExpr::VK_TLSGD;
714 case X86::TLS_base_addr32:
715 SRVK = MCSymbolRefExpr::VK_TLSLDM;
717 case X86::TLS_base_addr64:
718 SRVK = MCSymbolRefExpr::VK_TLSLD;
721 llvm_unreachable("unexpected opcode");
724 MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
725 const MCSymbolRefExpr *symRef = MCSymbolRefExpr::create(sym, SRVK, context);
729 LEA.setOpcode(X86::LEA64r);
730 LEA.addOperand(MCOperand::createReg(X86::RDI)); // dest
731 LEA.addOperand(MCOperand::createReg(X86::RIP)); // base
732 LEA.addOperand(MCOperand::createImm(1)); // scale
733 LEA.addOperand(MCOperand::createReg(0)); // index
734 LEA.addOperand(MCOperand::createExpr(symRef)); // disp
735 LEA.addOperand(MCOperand::createReg(0)); // seg
736 } else if (SRVK == MCSymbolRefExpr::VK_TLSLDM) {
737 LEA.setOpcode(X86::LEA32r);
738 LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
739 LEA.addOperand(MCOperand::createReg(X86::EBX)); // base
740 LEA.addOperand(MCOperand::createImm(1)); // scale
741 LEA.addOperand(MCOperand::createReg(0)); // index
742 LEA.addOperand(MCOperand::createExpr(symRef)); // disp
743 LEA.addOperand(MCOperand::createReg(0)); // seg
745 LEA.setOpcode(X86::LEA32r);
746 LEA.addOperand(MCOperand::createReg(X86::EAX)); // dest
747 LEA.addOperand(MCOperand::createReg(0)); // base
748 LEA.addOperand(MCOperand::createImm(1)); // scale
749 LEA.addOperand(MCOperand::createReg(X86::EBX)); // index
750 LEA.addOperand(MCOperand::createExpr(symRef)); // disp
751 LEA.addOperand(MCOperand::createReg(0)); // seg
753 EmitAndCountInstruction(LEA);
756 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
757 EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX));
758 EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX));
761 StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
762 MCSymbol *tlsGetAddr = context.getOrCreateSymbol(name);
763 const MCSymbolRefExpr *tlsRef =
764 MCSymbolRefExpr::create(tlsGetAddr, MCSymbolRefExpr::VK_PLT, context);
766 EmitAndCountInstruction(
767 MCInstBuilder(is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
771 /// Emit the largest nop instruction smaller than or equal to \p NumBytes
772 /// bytes. Return the size of nop emitted.
773 static unsigned EmitNop(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
774 const MCSubtargetInfo &STI) {
775 // This works only for 64bit. For 32bit we have to do additional checking if
776 // the CPU supports multi-byte nops.
777 assert(Is64Bit && "EmitNops only supports X86-64");
780 unsigned Opc, BaseReg, ScaleVal, IndexReg, Displacement, SegmentReg;
781 Opc = IndexReg = Displacement = SegmentReg = 0;
786 llvm_unreachable("Zero nops?");
839 SegmentReg = X86::CS;
843 unsigned NumPrefixes = std::min(NumBytes - NopSize, 5U);
844 NopSize += NumPrefixes;
845 for (unsigned i = 0; i != NumPrefixes; ++i)
846 OS.EmitBytes("\x66");
849 default: llvm_unreachable("Unexpected opcode");
851 OS.EmitInstruction(MCInstBuilder(Opc), STI);
854 OS.EmitInstruction(MCInstBuilder(Opc).addReg(X86::AX).addReg(X86::AX), STI);
858 OS.EmitInstruction(MCInstBuilder(Opc)
862 .addImm(Displacement)
867 assert(NopSize <= NumBytes && "We overemitted?");
871 /// Emit the optimal amount of multi-byte nops on X86.
872 static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
873 const MCSubtargetInfo &STI) {
874 unsigned NopsToEmit = NumBytes;
877 NumBytes -= EmitNop(OS, NumBytes, Is64Bit, STI);
878 assert(NopsToEmit >= NumBytes && "Emitted more than I asked for!");
882 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
883 X86MCInstLower &MCIL) {
884 assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
886 StatepointOpers SOpers(&MI);
887 if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
888 EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
891 // Lower call target and choose correct opcode
892 const MachineOperand &CallTarget = SOpers.getCallTarget();
893 MCOperand CallTargetMCOp;
895 switch (CallTarget.getType()) {
896 case MachineOperand::MO_GlobalAddress:
897 case MachineOperand::MO_ExternalSymbol:
898 CallTargetMCOp = MCIL.LowerSymbolOperand(
899 CallTarget, MCIL.GetSymbolFromOperand(CallTarget));
900 CallOpcode = X86::CALL64pcrel32;
901 // Currently, we only support relative addressing with statepoints.
902 // Otherwise, we'll need a scratch register to hold the target
903 // address. You'll fail asserts during load & relocation if this
904 // symbol is to far away. (TODO: support non-relative addressing)
906 case MachineOperand::MO_Immediate:
907 CallTargetMCOp = MCOperand::createImm(CallTarget.getImm());
908 CallOpcode = X86::CALL64pcrel32;
909 // Currently, we only support relative addressing with statepoints.
910 // Otherwise, we'll need a scratch register to hold the target
911 // immediate. You'll fail asserts during load & relocation if this
912 // address is to far away. (TODO: support non-relative addressing)
914 case MachineOperand::MO_Register:
915 // FIXME: Add retpoline support and remove this.
916 if (Subtarget->useRetpolineIndirectCalls())
917 report_fatal_error("Lowering register statepoints with retpoline not "
919 CallTargetMCOp = MCOperand::createReg(CallTarget.getReg());
920 CallOpcode = X86::CALL64r;
923 llvm_unreachable("Unsupported operand type in statepoint call target");
929 CallInst.setOpcode(CallOpcode);
930 CallInst.addOperand(CallTargetMCOp);
931 OutStreamer->EmitInstruction(CallInst, getSubtargetInfo());
934 // Record our statepoint node in the same section used by STACKMAP
936 SM.recordStatepoint(MI);
939 void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
940 X86MCInstLower &MCIL) {
941 // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
942 // <opcode>, <operands>
944 unsigned DefRegister = FaultingMI.getOperand(0).getReg();
945 FaultMaps::FaultKind FK =
946 static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
947 MCSymbol *HandlerLabel = FaultingMI.getOperand(2).getMBB()->getSymbol();
948 unsigned Opcode = FaultingMI.getOperand(3).getImm();
949 unsigned OperandsBeginIdx = 4;
951 assert(FK < FaultMaps::FaultKindMax && "Invalid Faulting Kind!");
952 FM.recordFaultingOp(FK, HandlerLabel);
955 MI.setOpcode(Opcode);
957 if (DefRegister != X86::NoRegister)
958 MI.addOperand(MCOperand::createReg(DefRegister));
960 for (auto I = FaultingMI.operands_begin() + OperandsBeginIdx,
961 E = FaultingMI.operands_end();
963 if (auto MaybeOperand = MCIL.LowerMachineOperand(&FaultingMI, *I))
964 MI.addOperand(MaybeOperand.getValue());
966 OutStreamer->EmitInstruction(MI, getSubtargetInfo());
969 void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
970 X86MCInstLower &MCIL) {
971 bool Is64Bits = Subtarget->is64Bit();
972 MCContext &Ctx = OutStreamer->getContext();
973 MCSymbol *fentry = Ctx.getOrCreateSymbol("__fentry__");
974 const MCSymbolRefExpr *Op =
975 MCSymbolRefExpr::create(fentry, MCSymbolRefExpr::VK_None, Ctx);
977 EmitAndCountInstruction(
978 MCInstBuilder(Is64Bits ? X86::CALL64pcrel32 : X86::CALLpcrel32)
982 void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
983 X86MCInstLower &MCIL) {
984 // PATCHABLE_OP minsize, opcode, operands
986 unsigned MinSize = MI.getOperand(0).getImm();
987 unsigned Opcode = MI.getOperand(1).getImm();
990 MCI.setOpcode(Opcode);
991 for (auto &MO : make_range(MI.operands_begin() + 2, MI.operands_end()))
992 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
993 MCI.addOperand(MaybeOperand.getValue());
995 SmallString<256> Code;
996 SmallVector<MCFixup, 4> Fixups;
997 raw_svector_ostream VecOS(Code);
998 CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
1000 if (Code.size() < MinSize) {
1001 if (MinSize == 2 && Opcode == X86::PUSH64r) {
1002 // This is an optimization that lets us get away without emitting a nop in
1005 // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two
1006 // bytes too, so the check on MinSize is important.
1007 MCI.setOpcode(X86::PUSH64rmr);
1009 unsigned NopSize = EmitNop(*OutStreamer, MinSize, Subtarget->is64Bit(),
1010 getSubtargetInfo());
1011 assert(NopSize == MinSize && "Could not implement MinSize!");
1016 OutStreamer->EmitInstruction(MCI, getSubtargetInfo());
1019 // Lower a stackmap of the form:
1020 // <id>, <shadowBytes>, ...
1021 void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) {
1022 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1023 SM.recordStackMap(MI);
1024 unsigned NumShadowBytes = MI.getOperand(1).getImm();
1025 SMShadowTracker.reset(NumShadowBytes);
1028 // Lower a patchpoint of the form:
1029 // [<def>], <id>, <numBytes>, <target>, <numArgs>, <cc>, ...
1030 void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
1031 X86MCInstLower &MCIL) {
1032 assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64");
1034 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
1036 SM.recordPatchPoint(MI);
1038 PatchPointOpers opers(&MI);
1039 unsigned ScratchIdx = opers.getNextScratchIdx();
1040 unsigned EncodedBytes = 0;
1041 const MachineOperand &CalleeMO = opers.getCallTarget();
1043 // Check for null target. If target is non-null (i.e. is non-zero or is
1044 // symbolic) then emit a call.
1045 if (!(CalleeMO.isImm() && !CalleeMO.getImm())) {
1046 MCOperand CalleeMCOp;
1047 switch (CalleeMO.getType()) {
1049 /// FIXME: Add a verifier check for bad callee types.
1050 llvm_unreachable("Unrecognized callee operand type.");
1051 case MachineOperand::MO_Immediate:
1052 if (CalleeMO.getImm())
1053 CalleeMCOp = MCOperand::createImm(CalleeMO.getImm());
1055 case MachineOperand::MO_ExternalSymbol:
1056 case MachineOperand::MO_GlobalAddress:
1057 CalleeMCOp = MCIL.LowerSymbolOperand(CalleeMO,
1058 MCIL.GetSymbolFromOperand(CalleeMO));
1062 // Emit MOV to materialize the target address and the CALL to target.
1063 // This is encoded with 12-13 bytes, depending on which register is used.
1064 unsigned ScratchReg = MI.getOperand(ScratchIdx).getReg();
1065 if (X86II::isX86_64ExtendedReg(ScratchReg))
1070 EmitAndCountInstruction(
1071 MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp));
1072 // FIXME: Add retpoline support and remove this.
1073 if (Subtarget->useRetpolineIndirectCalls())
1075 "Lowering patchpoint with retpoline not yet implemented.");
1076 EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg));
1080 unsigned NumBytes = opers.getNumPatchBytes();
1081 assert(NumBytes >= EncodedBytes &&
1082 "Patchpoint can't request size less than the length of a call.");
1084 EmitNops(*OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(),
1085 getSubtargetInfo());
1088 void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
1089 X86MCInstLower &MCIL) {
1090 assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
1092 // We want to emit the following pattern, which follows the x86 calling
1093 // convention to prepare for the trampoline call to be patched in.
1096 // .Lxray_event_sled_N:
1097 // jmp +N // jump across the instrumentation sled
1098 // ... // set up arguments in register
1099 // callq __xray_CustomEvent@plt // force dependency to symbol
1103 // After patching, it would look something like:
1105 // nopw (2-byte nop)
1107 // callq __xrayCustomEvent // already lowered
1111 // First we emit the label and the jump.
1112 auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
1113 OutStreamer->AddComment("# XRay Custom Event Log");
1114 OutStreamer->EmitCodeAlignment(2);
1115 OutStreamer->EmitLabel(CurSled);
1117 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1118 // an operand (computed as an offset from the jmp instruction).
1119 // FIXME: Find another less hacky way do force the relative jump.
1120 OutStreamer->EmitBinaryData("\xeb\x0f");
1122 // The default C calling convention will place two arguments into %rcx and
1123 // %rdx -- so we only work with those.
1124 unsigned DestRegs[] = {X86::RDI, X86::RSI};
1125 bool UsedMask[] = {false, false};
1126 // Filled out in loop.
1127 unsigned SrcRegs[] = {0, 0};
1129 // Then we put the operands in the %rdi and %rsi registers. We spill the
1130 // values in the register before we clobber them, and mark them as used in
1131 // UsedMask. In case the arguments are already in the correct register, we use
1132 // emit nops appropriately sized to keep the sled the same size in every
1134 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1135 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1136 assert(Op->isReg() && "Only support arguments in registers");
1137 SrcRegs[I] = Op->getReg();
1138 if (SrcRegs[I] != DestRegs[I]) {
1140 EmitAndCountInstruction(
1141 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1143 EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
1147 // Now that the register values are stashed, mov arguments into place.
1148 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1149 if (SrcRegs[I] != DestRegs[I])
1150 EmitAndCountInstruction(
1151 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1153 // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
1154 // name of the trampoline to be implemented by the XRay runtime.
1155 auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
1156 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1157 if (isPositionIndependent())
1158 TOp.setTargetFlags(X86II::MO_PLT);
1160 // Emit the call instruction.
1161 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1162 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1164 // Restore caller-saved and used registers.
1165 for (unsigned I = sizeof UsedMask; I-- > 0;)
1167 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1169 EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
1171 OutStreamer->AddComment("xray custom event end.");
1173 // Record the sled version. Older versions of this sled were spelled
1174 // differently, so we let the runtime handle the different offsets we're
1176 recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1);
1179 void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
1180 X86MCInstLower &MCIL) {
1181 assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
1183 // We want to emit the following pattern, which follows the x86 calling
1184 // convention to prepare for the trampoline call to be patched in.
1187 // .Lxray_event_sled_N:
1188 // jmp +N // jump across the instrumentation sled
1189 // ... // set up arguments in register
1190 // callq __xray_TypedEvent@plt // force dependency to symbol
1194 // After patching, it would look something like:
1196 // nopw (2-byte nop)
1198 // callq __xrayTypedEvent // already lowered
1202 // First we emit the label and the jump.
1203 auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
1204 OutStreamer->AddComment("# XRay Typed Event Log");
1205 OutStreamer->EmitCodeAlignment(2);
1206 OutStreamer->EmitLabel(CurSled);
1208 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1209 // an operand (computed as an offset from the jmp instruction).
1210 // FIXME: Find another less hacky way do force the relative jump.
1211 OutStreamer->EmitBinaryData("\xeb\x14");
1213 // An x86-64 convention may place three arguments into %rcx, %rdx, and R8,
1214 // so we'll work with those. Or we may be called via SystemV, in which case
1215 // we don't have to do any translation.
1216 unsigned DestRegs[] = {X86::RDI, X86::RSI, X86::RDX};
1217 bool UsedMask[] = {false, false, false};
1219 // Will fill out src regs in the loop.
1220 unsigned SrcRegs[] = {0, 0, 0};
1222 // Then we put the operands in the SystemV registers. We spill the values in
1223 // the registers before we clobber them, and mark them as used in UsedMask.
1224 // In case the arguments are already in the correct register, we emit nops
1225 // appropriately sized to keep the sled the same size in every situation.
1226 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1227 if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
1228 // TODO: Is register only support adequate?
1229 assert(Op->isReg() && "Only supports arguments in registers");
1230 SrcRegs[I] = Op->getReg();
1231 if (SrcRegs[I] != DestRegs[I]) {
1233 EmitAndCountInstruction(
1234 MCInstBuilder(X86::PUSH64r).addReg(DestRegs[I]));
1236 EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo());
1240 // In the above loop we only stash all of the destination registers or emit
1241 // nops if the arguments are already in the right place. Doing the actually
1242 // moving is postponed until after all the registers are stashed so nothing
1243 // is clobbers. We've already added nops to account for the size of mov and
1244 // push if the register is in the right place, so we only have to worry about
1246 for (unsigned I = 0; I < MI.getNumOperands(); ++I)
1248 EmitAndCountInstruction(
1249 MCInstBuilder(X86::MOV64rr).addReg(DestRegs[I]).addReg(SrcRegs[I]));
1251 // We emit a hard dependency on the __xray_TypedEvent symbol, which is the
1252 // name of the trampoline to be implemented by the XRay runtime.
1253 auto TSym = OutContext.getOrCreateSymbol("__xray_TypedEvent");
1254 MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
1255 if (isPositionIndependent())
1256 TOp.setTargetFlags(X86II::MO_PLT);
1258 // Emit the call instruction.
1259 EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32)
1260 .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
1262 // Restore caller-saved and used registers.
1263 for (unsigned I = sizeof UsedMask; I-- > 0;)
1265 EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(DestRegs[I]));
1267 EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo());
1269 OutStreamer->AddComment("xray typed event end.");
1271 // Record the sled version.
1272 recordSled(CurSled, MI, SledKind::TYPED_EVENT, 0);
1275 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
1276 X86MCInstLower &MCIL) {
1277 // We want to emit the following pattern:
1282 // # 9 bytes worth of noops
1284 // We need the 9 bytes because at runtime, we'd be patching over the full 11
1285 // bytes with the following pattern:
1287 // mov %r10, <function id, 32-bit> // 6 bytes
1288 // call <relative offset, 32-bits> // 5 bytes
1290 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1291 OutStreamer->EmitCodeAlignment(2);
1292 OutStreamer->EmitLabel(CurSled);
1294 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1295 // an operand (computed as an offset from the jmp instruction).
1296 // FIXME: Find another less hacky way do force the relative jump.
1297 OutStreamer->EmitBytes("\xeb\x09");
1298 EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1299 recordSled(CurSled, MI, SledKind::FUNCTION_ENTER);
1302 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
1303 X86MCInstLower &MCIL) {
1304 // Since PATCHABLE_RET takes the opcode of the return statement as an
1305 // argument, we use that to emit the correct form of the RET that we want.
1306 // i.e. when we see this:
1308 // PATCHABLE_RET X86::RET ...
1310 // We should emit the RET followed by sleds.
1314 // ret # or equivalent instruction
1315 // # 10 bytes worth of noops
1317 // This just makes sure that the alignment for the next instruction is 2.
1318 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1319 OutStreamer->EmitCodeAlignment(2);
1320 OutStreamer->EmitLabel(CurSled);
1321 unsigned OpCode = MI.getOperand(0).getImm();
1323 Ret.setOpcode(OpCode);
1324 for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1325 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1326 Ret.addOperand(MaybeOperand.getValue());
1327 OutStreamer->EmitInstruction(Ret, getSubtargetInfo());
1328 EmitNops(*OutStreamer, 10, Subtarget->is64Bit(), getSubtargetInfo());
1329 recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
1332 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
1333 X86MCInstLower &MCIL) {
1334 // Like PATCHABLE_RET, we have the actual instruction in the operands to this
1335 // instruction so we lower that particular instruction and its operands.
1336 // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
1337 // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
1338 // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
1339 // tail call much like how we have it in PATCHABLE_RET.
1340 auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
1341 OutStreamer->EmitCodeAlignment(2);
1342 OutStreamer->EmitLabel(CurSled);
1343 auto Target = OutContext.createTempSymbol();
1345 // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
1346 // an operand (computed as an offset from the jmp instruction).
1347 // FIXME: Find another less hacky way do force the relative jump.
1348 OutStreamer->EmitBytes("\xeb\x09");
1349 EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
1350 OutStreamer->EmitLabel(Target);
1351 recordSled(CurSled, MI, SledKind::TAIL_CALL);
1353 unsigned OpCode = MI.getOperand(0).getImm();
1355 TC.setOpcode(OpCode);
1357 // Before emitting the instruction, add a comment to indicate that this is
1358 // indeed a tail call.
1359 OutStreamer->AddComment("TAILCALL");
1360 for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
1361 if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
1362 TC.addOperand(MaybeOperand.getValue());
1363 OutStreamer->EmitInstruction(TC, getSubtargetInfo());
1366 // Returns instruction preceding MBBI in MachineFunction.
1367 // If MBBI is the first instruction of the first basic block, returns null.
1368 static MachineBasicBlock::const_iterator
1369 PrevCrossBBInst(MachineBasicBlock::const_iterator MBBI) {
1370 const MachineBasicBlock *MBB = MBBI->getParent();
1371 while (MBBI == MBB->begin()) {
1372 if (MBB == &MBB->getParent()->front())
1373 return MachineBasicBlock::const_iterator();
1374 MBB = MBB->getPrevNode();
1380 static const Constant *getConstantFromPool(const MachineInstr &MI,
1381 const MachineOperand &Op) {
1382 if (!Op.isCPI() || Op.getOffset() != 0)
1385 ArrayRef<MachineConstantPoolEntry> Constants =
1386 MI.getParent()->getParent()->getConstantPool()->getConstants();
1387 const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
1389 // Bail if this is a machine constant pool entry, we won't be able to dig out
1391 if (ConstantEntry.isMachineConstantPoolEntry())
1394 const Constant *C = ConstantEntry.Val.ConstVal;
1395 assert((!C || ConstantEntry.getType() == C->getType()) &&
1396 "Expected a constant of the same type!");
1400 static std::string getShuffleComment(const MachineInstr *MI, unsigned SrcOp1Idx,
1401 unsigned SrcOp2Idx, ArrayRef<int> Mask) {
1402 std::string Comment;
1404 // Compute the name for a register. This is really goofy because we have
1405 // multiple instruction printers that could (in theory) use different
1406 // names. Fortunately most people use the ATT style (outside of Windows)
1407 // and they actually agree on register naming here. Ultimately, this is
1408 // a comment, and so its OK if it isn't perfect.
1409 auto GetRegisterName = [](unsigned RegNum) -> StringRef {
1410 return X86ATTInstPrinter::getRegisterName(RegNum);
1413 const MachineOperand &DstOp = MI->getOperand(0);
1414 const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
1415 const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
1417 StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
1418 StringRef Src1Name =
1419 SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
1420 StringRef Src2Name =
1421 SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
1423 // One source operand, fix the mask to print all elements in one span.
1424 SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
1425 if (Src1Name == Src2Name)
1426 for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
1427 if (ShuffleMask[i] >= e)
1428 ShuffleMask[i] -= e;
1430 raw_string_ostream CS(Comment);
1433 // Handle AVX512 MASK/MASXZ write mask comments.
1435 // MASKZ: zmmX {%kY} {z}
1436 if (SrcOp1Idx > 1) {
1437 assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
1439 const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
1440 if (WriteMaskOp.isReg()) {
1441 CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
1443 if (SrcOp1Idx == 2) {
1451 for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
1454 if (ShuffleMask[i] == SM_SentinelZero) {
1459 // Otherwise, it must come from src1 or src2. Print the span of elements
1460 // that comes from this src.
1461 bool isSrc1 = ShuffleMask[i] < (int)e;
1462 CS << (isSrc1 ? Src1Name : Src2Name) << '[';
1464 bool IsFirst = true;
1465 while (i != e && ShuffleMask[i] != SM_SentinelZero &&
1466 (ShuffleMask[i] < (int)e) == isSrc1) {
1471 if (ShuffleMask[i] == SM_SentinelUndef)
1474 CS << ShuffleMask[i] % (int)e;
1478 --i; // For loop increments element #.
1485 static void printConstant(const APInt &Val, raw_ostream &CS) {
1486 if (Val.getBitWidth() <= 64) {
1487 CS << Val.getZExtValue();
1489 // print multi-word constant as (w0,w1)
1491 for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
1494 CS << Val.getRawData()[i];
1500 static void printConstant(const APFloat &Flt, raw_ostream &CS) {
1501 SmallString<32> Str;
1502 // Force scientific notation to distinquish from integers.
1503 Flt.toString(Str, 0, 0);
1507 static void printConstant(const Constant *COp, raw_ostream &CS) {
1508 if (isa<UndefValue>(COp)) {
1510 } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
1511 printConstant(CI->getValue(), CS);
1512 } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
1513 printConstant(CF->getValueAPF(), CS);
1519 void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
1520 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1521 assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
1522 const X86RegisterInfo *RI =
1523 MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1525 // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
1527 X86TargetStreamer *XTS =
1528 static_cast<X86TargetStreamer *>(OutStreamer->getTargetStreamer());
1529 switch (MI->getOpcode()) {
1530 case X86::SEH_PushReg:
1531 XTS->emitFPOPushReg(MI->getOperand(0).getImm());
1533 case X86::SEH_StackAlloc:
1534 XTS->emitFPOStackAlloc(MI->getOperand(0).getImm());
1536 case X86::SEH_StackAlign:
1537 XTS->emitFPOStackAlign(MI->getOperand(0).getImm());
1539 case X86::SEH_SetFrame:
1540 assert(MI->getOperand(1).getImm() == 0 &&
1541 ".cv_fpo_setframe takes no offset");
1542 XTS->emitFPOSetFrame(MI->getOperand(0).getImm());
1544 case X86::SEH_EndPrologue:
1545 XTS->emitFPOEndPrologue();
1547 case X86::SEH_SaveReg:
1548 case X86::SEH_SaveXMM:
1549 case X86::SEH_PushFrame:
1550 llvm_unreachable("SEH_ directive incompatible with FPO");
1553 llvm_unreachable("expected SEH_ instruction");
1558 // Otherwise, use the .seh_ directives for all other Windows platforms.
1559 switch (MI->getOpcode()) {
1560 case X86::SEH_PushReg:
1561 OutStreamer->EmitWinCFIPushReg(
1562 RI->getSEHRegNum(MI->getOperand(0).getImm()));
1565 case X86::SEH_SaveReg:
1566 OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
1567 MI->getOperand(1).getImm());
1570 case X86::SEH_SaveXMM:
1571 OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
1572 MI->getOperand(1).getImm());
1575 case X86::SEH_StackAlloc:
1576 OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
1579 case X86::SEH_SetFrame:
1580 OutStreamer->EmitWinCFISetFrame(
1581 RI->getSEHRegNum(MI->getOperand(0).getImm()),
1582 MI->getOperand(1).getImm());
1585 case X86::SEH_PushFrame:
1586 OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
1589 case X86::SEH_EndPrologue:
1590 OutStreamer->EmitWinCFIEndProlog();
1594 llvm_unreachable("expected SEH_ instruction");
1598 static unsigned getRegisterWidth(const MCOperandInfo &Info) {
1599 if (Info.RegClass == X86::VR128RegClassID ||
1600 Info.RegClass == X86::VR128XRegClassID)
1602 if (Info.RegClass == X86::VR256RegClassID ||
1603 Info.RegClass == X86::VR256XRegClassID)
1605 if (Info.RegClass == X86::VR512RegClassID)
1607 llvm_unreachable("Unknown register class!");
1610 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
1611 X86MCInstLower MCInstLowering(*MF, *this);
1612 const X86RegisterInfo *RI =
1613 MF->getSubtarget<X86Subtarget>().getRegisterInfo();
1615 // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
1616 // are compressed from EVEX encoding to VEX encoding.
1617 if (TM.Options.MCOptions.ShowMCEncoding) {
1618 if (MI->getAsmPrinterFlags() & X86::AC_EVEX_2_VEX)
1619 OutStreamer->AddComment("EVEX TO VEX Compression ", false);
1622 switch (MI->getOpcode()) {
1623 case TargetOpcode::DBG_VALUE:
1624 llvm_unreachable("Should be handled target independently");
1626 // Emit nothing here but a comment if we can.
1627 case X86::Int_MemBarrier:
1628 OutStreamer->emitRawComment("MEMBARRIER");
1631 case X86::EH_RETURN:
1632 case X86::EH_RETURN64: {
1633 // Lower these as normal, but add some comments.
1634 unsigned Reg = MI->getOperand(0).getReg();
1635 OutStreamer->AddComment(StringRef("eh_return, addr: %") +
1636 X86ATTInstPrinter::getRegisterName(Reg));
1639 case X86::CLEANUPRET: {
1640 // Lower these as normal, but add some comments.
1641 OutStreamer->AddComment("CLEANUPRET");
1645 case X86::CATCHRET: {
1646 // Lower these as normal, but add some comments.
1647 OutStreamer->AddComment("CATCHRET");
1654 case X86::TAILJMPd_CC:
1655 case X86::TAILJMPr64:
1656 case X86::TAILJMPm64:
1657 case X86::TAILJMPd64:
1658 case X86::TAILJMPd64_CC:
1659 case X86::TAILJMPr64_REX:
1660 case X86::TAILJMPm64_REX:
1661 // Lower these as normal, but add some comments.
1662 OutStreamer->AddComment("TAILCALL");
1665 case X86::TLS_addr32:
1666 case X86::TLS_addr64:
1667 case X86::TLS_base_addr32:
1668 case X86::TLS_base_addr64:
1669 return LowerTlsAddr(MCInstLowering, *MI);
1671 case X86::MOVPC32r: {
1672 // This is a pseudo op for a two instruction sequence with a label, which
1679 MCSymbol *PICBase = MF->getPICBaseSymbol();
1680 // FIXME: We would like an efficient form for this, so we don't have to do a
1681 // lot of extra uniquing.
1682 EmitAndCountInstruction(
1683 MCInstBuilder(X86::CALLpcrel32)
1684 .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
1686 const X86FrameLowering *FrameLowering =
1687 MF->getSubtarget<X86Subtarget>().getFrameLowering();
1688 bool hasFP = FrameLowering->hasFP(*MF);
1690 // TODO: This is needed only if we require precise CFA.
1691 bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
1692 !OutStreamer->getDwarfFrameInfos().back().End;
1694 int stackGrowth = -RI->getSlotSize();
1696 if (HasActiveDwarfFrame && !hasFP) {
1697 OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
1701 OutStreamer->EmitLabel(PICBase);
1704 EmitAndCountInstruction(
1705 MCInstBuilder(X86::POP32r).addReg(MI->getOperand(0).getReg()));
1707 if (HasActiveDwarfFrame && !hasFP) {
1708 OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
1713 case X86::ADD32ri: {
1714 // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
1715 if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
1718 // Okay, we have something like:
1719 // EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
1721 // For this, we want to print something like:
1722 // MYGLOBAL + (. - PICBASE)
1723 // However, we can't generate a ".", so just emit a new label here and refer
1725 MCSymbol *DotSym = OutContext.createTempSymbol();
1726 OutStreamer->EmitLabel(DotSym);
1728 // Now that we have emitted the label, lower the complex operand expression.
1729 MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
1731 const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
1732 const MCExpr *PICBase =
1733 MCSymbolRefExpr::create(MF->getPICBaseSymbol(), OutContext);
1734 DotExpr = MCBinaryExpr::createSub(DotExpr, PICBase, OutContext);
1736 DotExpr = MCBinaryExpr::createAdd(
1737 MCSymbolRefExpr::create(OpSym, OutContext), DotExpr, OutContext);
1739 EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri)
1740 .addReg(MI->getOperand(0).getReg())
1741 .addReg(MI->getOperand(1).getReg())
1745 case TargetOpcode::STATEPOINT:
1746 return LowerSTATEPOINT(*MI, MCInstLowering);
1748 case TargetOpcode::FAULTING_OP:
1749 return LowerFAULTING_OP(*MI, MCInstLowering);
1751 case TargetOpcode::FENTRY_CALL:
1752 return LowerFENTRY_CALL(*MI, MCInstLowering);
1754 case TargetOpcode::PATCHABLE_OP:
1755 return LowerPATCHABLE_OP(*MI, MCInstLowering);
1757 case TargetOpcode::STACKMAP:
1758 return LowerSTACKMAP(*MI);
1760 case TargetOpcode::PATCHPOINT:
1761 return LowerPATCHPOINT(*MI, MCInstLowering);
1763 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
1764 return LowerPATCHABLE_FUNCTION_ENTER(*MI, MCInstLowering);
1766 case TargetOpcode::PATCHABLE_RET:
1767 return LowerPATCHABLE_RET(*MI, MCInstLowering);
1769 case TargetOpcode::PATCHABLE_TAIL_CALL:
1770 return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
1772 case TargetOpcode::PATCHABLE_EVENT_CALL:
1773 return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
1775 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
1776 return LowerPATCHABLE_TYPED_EVENT_CALL(*MI, MCInstLowering);
1778 case X86::MORESTACK_RET:
1779 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
1782 case X86::MORESTACK_RET_RESTORE_R10:
1783 // Return, then restore R10.
1784 EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
1785 EmitAndCountInstruction(
1786 MCInstBuilder(X86::MOV64rr).addReg(X86::R10).addReg(X86::RAX));
1789 case X86::SEH_PushReg:
1790 case X86::SEH_SaveReg:
1791 case X86::SEH_SaveXMM:
1792 case X86::SEH_StackAlloc:
1793 case X86::SEH_StackAlign:
1794 case X86::SEH_SetFrame:
1795 case X86::SEH_PushFrame:
1796 case X86::SEH_EndPrologue:
1797 EmitSEHInstruction(MI);
1800 case X86::SEH_Epilogue: {
1801 assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
1802 MachineBasicBlock::const_iterator MBBI(MI);
1803 // Check if preceded by a call and emit nop if so.
1804 for (MBBI = PrevCrossBBInst(MBBI);
1805 MBBI != MachineBasicBlock::const_iterator();
1806 MBBI = PrevCrossBBInst(MBBI)) {
1807 // Conservatively assume that pseudo instructions don't emit code and keep
1808 // looking for a call. We may emit an unnecessary nop in some cases.
1809 if (!MBBI->isPseudo()) {
1811 EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
1818 // Lower PSHUFB and VPERMILP normally but add a comment if we can find
1819 // a constant shuffle mask. We won't be able to do this at the MC layer
1820 // because the mask isn't an immediate.
1822 case X86::VPSHUFBrm:
1823 case X86::VPSHUFBYrm:
1824 case X86::VPSHUFBZ128rm:
1825 case X86::VPSHUFBZ128rmk:
1826 case X86::VPSHUFBZ128rmkz:
1827 case X86::VPSHUFBZ256rm:
1828 case X86::VPSHUFBZ256rmk:
1829 case X86::VPSHUFBZ256rmkz:
1830 case X86::VPSHUFBZrm:
1831 case X86::VPSHUFBZrmk:
1832 case X86::VPSHUFBZrmkz: {
1833 if (!OutStreamer->isVerboseAsm())
1835 unsigned SrcIdx, MaskIdx;
1836 switch (MI->getOpcode()) {
1837 default: llvm_unreachable("Invalid opcode");
1839 case X86::VPSHUFBrm:
1840 case X86::VPSHUFBYrm:
1841 case X86::VPSHUFBZ128rm:
1842 case X86::VPSHUFBZ256rm:
1843 case X86::VPSHUFBZrm:
1844 SrcIdx = 1; MaskIdx = 5; break;
1845 case X86::VPSHUFBZ128rmkz:
1846 case X86::VPSHUFBZ256rmkz:
1847 case X86::VPSHUFBZrmkz:
1848 SrcIdx = 2; MaskIdx = 6; break;
1849 case X86::VPSHUFBZ128rmk:
1850 case X86::VPSHUFBZ256rmk:
1851 case X86::VPSHUFBZrmk:
1852 SrcIdx = 3; MaskIdx = 7; break;
1855 assert(MI->getNumOperands() >= 6 &&
1856 "We should always have at least 6 operands!");
1858 const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
1859 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1860 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
1861 SmallVector<int, 64> Mask;
1862 DecodePSHUFBMask(C, Width, Mask);
1864 OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
1865 !EnablePrintSchedInfo);
1870 case X86::VPERMILPSrm:
1871 case X86::VPERMILPSYrm:
1872 case X86::VPERMILPSZ128rm:
1873 case X86::VPERMILPSZ128rmk:
1874 case X86::VPERMILPSZ128rmkz:
1875 case X86::VPERMILPSZ256rm:
1876 case X86::VPERMILPSZ256rmk:
1877 case X86::VPERMILPSZ256rmkz:
1878 case X86::VPERMILPSZrm:
1879 case X86::VPERMILPSZrmk:
1880 case X86::VPERMILPSZrmkz:
1881 case X86::VPERMILPDrm:
1882 case X86::VPERMILPDYrm:
1883 case X86::VPERMILPDZ128rm:
1884 case X86::VPERMILPDZ128rmk:
1885 case X86::VPERMILPDZ128rmkz:
1886 case X86::VPERMILPDZ256rm:
1887 case X86::VPERMILPDZ256rmk:
1888 case X86::VPERMILPDZ256rmkz:
1889 case X86::VPERMILPDZrm:
1890 case X86::VPERMILPDZrmk:
1891 case X86::VPERMILPDZrmkz: {
1892 if (!OutStreamer->isVerboseAsm())
1894 unsigned SrcIdx, MaskIdx;
1896 switch (MI->getOpcode()) {
1897 default: llvm_unreachable("Invalid opcode");
1898 case X86::VPERMILPSrm:
1899 case X86::VPERMILPSYrm:
1900 case X86::VPERMILPSZ128rm:
1901 case X86::VPERMILPSZ256rm:
1902 case X86::VPERMILPSZrm:
1903 SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
1904 case X86::VPERMILPSZ128rmkz:
1905 case X86::VPERMILPSZ256rmkz:
1906 case X86::VPERMILPSZrmkz:
1907 SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
1908 case X86::VPERMILPSZ128rmk:
1909 case X86::VPERMILPSZ256rmk:
1910 case X86::VPERMILPSZrmk:
1911 SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
1912 case X86::VPERMILPDrm:
1913 case X86::VPERMILPDYrm:
1914 case X86::VPERMILPDZ128rm:
1915 case X86::VPERMILPDZ256rm:
1916 case X86::VPERMILPDZrm:
1917 SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
1918 case X86::VPERMILPDZ128rmkz:
1919 case X86::VPERMILPDZ256rmkz:
1920 case X86::VPERMILPDZrmkz:
1921 SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
1922 case X86::VPERMILPDZ128rmk:
1923 case X86::VPERMILPDZ256rmk:
1924 case X86::VPERMILPDZrmk:
1925 SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
1928 assert(MI->getNumOperands() >= 6 &&
1929 "We should always have at least 6 operands!");
1931 const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
1932 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1933 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
1934 SmallVector<int, 16> Mask;
1935 DecodeVPERMILPMask(C, ElSize, Width, Mask);
1937 OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
1938 !EnablePrintSchedInfo);
1943 case X86::VPERMIL2PDrm:
1944 case X86::VPERMIL2PSrm:
1945 case X86::VPERMIL2PDYrm:
1946 case X86::VPERMIL2PSYrm: {
1947 if (!OutStreamer->isVerboseAsm())
1949 assert(MI->getNumOperands() >= 8 &&
1950 "We should always have at least 8 operands!");
1952 const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
1953 if (!CtrlOp.isImm())
1957 switch (MI->getOpcode()) {
1958 default: llvm_unreachable("Invalid opcode");
1959 case X86::VPERMIL2PSrm: case X86::VPERMIL2PSYrm: ElSize = 32; break;
1960 case X86::VPERMIL2PDrm: case X86::VPERMIL2PDYrm: ElSize = 64; break;
1963 const MachineOperand &MaskOp = MI->getOperand(6);
1964 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1965 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
1966 SmallVector<int, 16> Mask;
1967 DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
1969 OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
1970 !EnablePrintSchedInfo);
1975 case X86::VPPERMrrm: {
1976 if (!OutStreamer->isVerboseAsm())
1978 assert(MI->getNumOperands() >= 7 &&
1979 "We should always have at least 7 operands!");
1981 const MachineOperand &MaskOp = MI->getOperand(6);
1982 if (auto *C = getConstantFromPool(*MI, MaskOp)) {
1983 unsigned Width = getRegisterWidth(MI->getDesc().OpInfo[0]);
1984 SmallVector<int, 16> Mask;
1985 DecodeVPPERMMask(C, Width, Mask);
1987 OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
1988 !EnablePrintSchedInfo);
1993 case X86::MMX_MOVQ64rm: {
1994 if (!OutStreamer->isVerboseAsm())
1996 if (MI->getNumOperands() <= 4)
1998 if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
1999 std::string Comment;
2000 raw_string_ostream CS(Comment);
2001 const MachineOperand &DstOp = MI->getOperand(0);
2002 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2003 if (auto *CF = dyn_cast<ConstantFP>(C)) {
2004 CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
2005 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
2011 #define MOV_CASE(Prefix, Suffix) \
2012 case X86::Prefix##MOVAPD##Suffix##rm: \
2013 case X86::Prefix##MOVAPS##Suffix##rm: \
2014 case X86::Prefix##MOVUPD##Suffix##rm: \
2015 case X86::Prefix##MOVUPS##Suffix##rm: \
2016 case X86::Prefix##MOVDQA##Suffix##rm: \
2017 case X86::Prefix##MOVDQU##Suffix##rm:
2019 #define MOV_AVX512_CASE(Suffix) \
2020 case X86::VMOVDQA64##Suffix##rm: \
2021 case X86::VMOVDQA32##Suffix##rm: \
2022 case X86::VMOVDQU64##Suffix##rm: \
2023 case X86::VMOVDQU32##Suffix##rm: \
2024 case X86::VMOVDQU16##Suffix##rm: \
2025 case X86::VMOVDQU8##Suffix##rm: \
2026 case X86::VMOVAPS##Suffix##rm: \
2027 case X86::VMOVAPD##Suffix##rm: \
2028 case X86::VMOVUPS##Suffix##rm: \
2029 case X86::VMOVUPD##Suffix##rm:
2031 #define CASE_ALL_MOV_RM() \
2032 MOV_CASE(, ) /* SSE */ \
2033 MOV_CASE(V, ) /* AVX-128 */ \
2034 MOV_CASE(V, Y) /* AVX-256 */ \
2035 MOV_AVX512_CASE(Z) \
2036 MOV_AVX512_CASE(Z256) \
2037 MOV_AVX512_CASE(Z128)
2039 // For loads from a constant pool to a vector register, print the constant
2042 case X86::VBROADCASTF128:
2043 case X86::VBROADCASTI128:
2044 case X86::VBROADCASTF32X4Z256rm:
2045 case X86::VBROADCASTF32X4rm:
2046 case X86::VBROADCASTF32X8rm:
2047 case X86::VBROADCASTF64X2Z128rm:
2048 case X86::VBROADCASTF64X2rm:
2049 case X86::VBROADCASTF64X4rm:
2050 case X86::VBROADCASTI32X4Z256rm:
2051 case X86::VBROADCASTI32X4rm:
2052 case X86::VBROADCASTI32X8rm:
2053 case X86::VBROADCASTI64X2Z128rm:
2054 case X86::VBROADCASTI64X2rm:
2055 case X86::VBROADCASTI64X4rm:
2056 if (!OutStreamer->isVerboseAsm())
2058 if (MI->getNumOperands() <= 4)
2060 if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2062 // Override NumLanes for the broadcast instructions.
2063 switch (MI->getOpcode()) {
2064 case X86::VBROADCASTF128: NumLanes = 2; break;
2065 case X86::VBROADCASTI128: NumLanes = 2; break;
2066 case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break;
2067 case X86::VBROADCASTF32X4rm: NumLanes = 4; break;
2068 case X86::VBROADCASTF32X8rm: NumLanes = 2; break;
2069 case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break;
2070 case X86::VBROADCASTF64X2rm: NumLanes = 4; break;
2071 case X86::VBROADCASTF64X4rm: NumLanes = 2; break;
2072 case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break;
2073 case X86::VBROADCASTI32X4rm: NumLanes = 4; break;
2074 case X86::VBROADCASTI32X8rm: NumLanes = 2; break;
2075 case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break;
2076 case X86::VBROADCASTI64X2rm: NumLanes = 4; break;
2077 case X86::VBROADCASTI64X4rm: NumLanes = 2; break;
2080 std::string Comment;
2081 raw_string_ostream CS(Comment);
2082 const MachineOperand &DstOp = MI->getOperand(0);
2083 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2084 if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
2086 for (int l = 0; l != NumLanes; ++l) {
2087 for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements;
2089 if (i != 0 || l != 0)
2091 if (CDS->getElementType()->isIntegerTy())
2092 printConstant(CDS->getElementAsAPInt(i), CS);
2093 else if (CDS->getElementType()->isHalfTy() ||
2094 CDS->getElementType()->isFloatTy() ||
2095 CDS->getElementType()->isDoubleTy())
2096 printConstant(CDS->getElementAsAPFloat(i), CS);
2102 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
2103 } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
2105 for (int l = 0; l != NumLanes; ++l) {
2106 for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands;
2108 if (i != 0 || l != 0)
2110 printConstant(CV->getOperand(i), CS);
2114 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
2118 case X86::MOVDDUPrm:
2119 case X86::VMOVDDUPrm:
2120 case X86::VMOVDDUPZ128rm:
2121 case X86::VBROADCASTSSrm:
2122 case X86::VBROADCASTSSYrm:
2123 case X86::VBROADCASTSSZ128m:
2124 case X86::VBROADCASTSSZ256m:
2125 case X86::VBROADCASTSSZm:
2126 case X86::VBROADCASTSDYrm:
2127 case X86::VBROADCASTSDZ256m:
2128 case X86::VBROADCASTSDZm:
2129 case X86::VPBROADCASTBrm:
2130 case X86::VPBROADCASTBYrm:
2131 case X86::VPBROADCASTBZ128m:
2132 case X86::VPBROADCASTBZ256m:
2133 case X86::VPBROADCASTBZm:
2134 case X86::VPBROADCASTDrm:
2135 case X86::VPBROADCASTDYrm:
2136 case X86::VPBROADCASTDZ128m:
2137 case X86::VPBROADCASTDZ256m:
2138 case X86::VPBROADCASTDZm:
2139 case X86::VPBROADCASTQrm:
2140 case X86::VPBROADCASTQYrm:
2141 case X86::VPBROADCASTQZ128m:
2142 case X86::VPBROADCASTQZ256m:
2143 case X86::VPBROADCASTQZm:
2144 case X86::VPBROADCASTWrm:
2145 case X86::VPBROADCASTWYrm:
2146 case X86::VPBROADCASTWZ128m:
2147 case X86::VPBROADCASTWZ256m:
2148 case X86::VPBROADCASTWZm:
2149 if (!OutStreamer->isVerboseAsm())
2151 if (MI->getNumOperands() <= 4)
2153 if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
2155 switch (MI->getOpcode()) {
2156 default: llvm_unreachable("Invalid opcode");
2157 case X86::MOVDDUPrm: NumElts = 2; break;
2158 case X86::VMOVDDUPrm: NumElts = 2; break;
2159 case X86::VMOVDDUPZ128rm: NumElts = 2; break;
2160 case X86::VBROADCASTSSrm: NumElts = 4; break;
2161 case X86::VBROADCASTSSYrm: NumElts = 8; break;
2162 case X86::VBROADCASTSSZ128m: NumElts = 4; break;
2163 case X86::VBROADCASTSSZ256m: NumElts = 8; break;
2164 case X86::VBROADCASTSSZm: NumElts = 16; break;
2165 case X86::VBROADCASTSDYrm: NumElts = 4; break;
2166 case X86::VBROADCASTSDZ256m: NumElts = 4; break;
2167 case X86::VBROADCASTSDZm: NumElts = 8; break;
2168 case X86::VPBROADCASTBrm: NumElts = 16; break;
2169 case X86::VPBROADCASTBYrm: NumElts = 32; break;
2170 case X86::VPBROADCASTBZ128m: NumElts = 16; break;
2171 case X86::VPBROADCASTBZ256m: NumElts = 32; break;
2172 case X86::VPBROADCASTBZm: NumElts = 64; break;
2173 case X86::VPBROADCASTDrm: NumElts = 4; break;
2174 case X86::VPBROADCASTDYrm: NumElts = 8; break;
2175 case X86::VPBROADCASTDZ128m: NumElts = 4; break;
2176 case X86::VPBROADCASTDZ256m: NumElts = 8; break;
2177 case X86::VPBROADCASTDZm: NumElts = 16; break;
2178 case X86::VPBROADCASTQrm: NumElts = 2; break;
2179 case X86::VPBROADCASTQYrm: NumElts = 4; break;
2180 case X86::VPBROADCASTQZ128m: NumElts = 2; break;
2181 case X86::VPBROADCASTQZ256m: NumElts = 4; break;
2182 case X86::VPBROADCASTQZm: NumElts = 8; break;
2183 case X86::VPBROADCASTWrm: NumElts = 8; break;
2184 case X86::VPBROADCASTWYrm: NumElts = 16; break;
2185 case X86::VPBROADCASTWZ128m: NumElts = 8; break;
2186 case X86::VPBROADCASTWZ256m: NumElts = 16; break;
2187 case X86::VPBROADCASTWZm: NumElts = 32; break;
2190 std::string Comment;
2191 raw_string_ostream CS(Comment);
2192 const MachineOperand &DstOp = MI->getOperand(0);
2193 CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
2195 for (int i = 0; i != NumElts; ++i) {
2198 printConstant(C, CS);
2201 OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
2206 MCInstLowering.Lower(MI, TmpInst);
2207 if (MI->getAsmPrinterFlag(MachineInstr::NoSchedComment))
2208 TmpInst.setFlags(TmpInst.getFlags() | X86::NO_SCHED_INFO);
2210 // Stackmap shadows cannot include branch targets, so we can count the bytes
2211 // in a call towards the shadow, but must ensure that the no thread returns
2212 // in to the stackmap shadow. The only way to achieve this is if the call
2213 // is at the end of the shadow.
2215 // Count then size of the call towards the shadow
2216 SMShadowTracker.count(TmpInst, getSubtargetInfo(), CodeEmitter.get());
2217 // Then flush the shadow so that we fill with nops before the call, not
2219 SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
2220 // Then emit the call
2221 OutStreamer->EmitInstruction(TmpInst, getSubtargetInfo());
2225 EmitAndCountInstruction(TmpInst);