contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp

   1 //===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "MCTargetDesc/X86BaseInfo.h"
  10 #include "MCTargetDesc/X86FixupKinds.h"
  11 #include "llvm/ADT/StringSwitch.h"
  12 #include "llvm/BinaryFormat/ELF.h"
  13 #include "llvm/BinaryFormat/MachO.h"
  14 #include "llvm/MC/MCAsmBackend.h"
  15 #include "llvm/MC/MCAsmLayout.h"
  16 #include "llvm/MC/MCAssembler.h"
  17 #include "llvm/MC/MCCodeEmitter.h"
  18 #include "llvm/MC/MCContext.h"
  19 #include "llvm/MC/MCDwarf.h"
  20 #include "llvm/MC/MCELFObjectWriter.h"
  21 #include "llvm/MC/MCExpr.h"
  22 #include "llvm/MC/MCFixupKindInfo.h"
  23 #include "llvm/MC/MCInst.h"
  24 #include "llvm/MC/MCInstrInfo.h"
  25 #include "llvm/MC/MCMachObjectWriter.h"
  26 #include "llvm/MC/MCObjectStreamer.h"
  27 #include "llvm/MC/MCObjectWriter.h"
  28 #include "llvm/MC/MCRegisterInfo.h"
  29 #include "llvm/MC/MCSectionMachO.h"
  30 #include "llvm/MC/MCSubtargetInfo.h"
  31 #include "llvm/MC/MCValue.h"
  32 #include "llvm/Support/CommandLine.h"
  33 #include "llvm/Support/ErrorHandling.h"
  34 #include "llvm/Support/TargetRegistry.h"
  35 #include "llvm/Support/raw_ostream.h"
  36
  37 using namespace llvm;
  38
  39 namespace {
  40 /// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind
  41 class X86AlignBranchKind {
  42 private:
  43   uint8_t AlignBranchKind = 0;
  44
  45 public:
  46   void operator=(const std::string &Val) {
  47     if (Val.empty())
  48       return;
  49     SmallVector<StringRef, 6> BranchTypes;
  50     StringRef(Val).split(BranchTypes, '+', -1, false);
  51     for (auto BranchType : BranchTypes) {
  52       if (BranchType == "fused")
  53         addKind(X86::AlignBranchFused);
  54       else if (BranchType == "jcc")
  55         addKind(X86::AlignBranchJcc);
  56       else if (BranchType == "jmp")
  57         addKind(X86::AlignBranchJmp);
  58       else if (BranchType == "call")
  59         addKind(X86::AlignBranchCall);
  60       else if (BranchType == "ret")
  61         addKind(X86::AlignBranchRet);
  62       else if (BranchType == "indirect")
  63         addKind(X86::AlignBranchIndirect);
  64       else {
  65         errs() << "invalid argument " << BranchType.str()
  66                << " to -x86-align-branch=; each element must be one of: fused, "
  67                   "jcc, jmp, call, ret, indirect.(plus separated)\n";
  68       }
  69     }
  70   }
  71
  72   operator uint8_t() const { return AlignBranchKind; }
  73   void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }
  74 };
  75
  76 X86AlignBranchKind X86AlignBranchKindLoc;
  77
  78 cl::opt<unsigned> X86AlignBranchBoundary(
  79     "x86-align-branch-boundary", cl::init(0),
  80     cl::desc(
  81         "Control how the assembler should align branches with NOP. If the "
  82         "boundary's size is not 0, it should be a power of 2 and no less "
  83         "than 32. Branches will be aligned to prevent from being across or "
  84         "against the boundary of specified size. The default value 0 does not "
  85         "align branches."));
  86
  87 cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(
  88     "x86-align-branch",
  89     cl::desc(
  90         "Specify types of branches to align (plus separated list of types):"
  91              "\njcc      indicates conditional jumps"
  92              "\nfused    indicates fused conditional jumps"
  93              "\njmp      indicates direct unconditional jumps"
  94              "\ncall     indicates direct and indirect calls"
  95              "\nret      indicates rets"
  96              "\nindirect indicates indirect unconditional jumps"),
  97     cl::location(X86AlignBranchKindLoc));
  98
  99 cl::opt<bool> X86AlignBranchWithin32BBoundaries(
 100     "x86-branches-within-32B-boundaries", cl::init(false),
 101     cl::desc(
 102         "Align selected instructions to mitigate negative performance impact "
 103         "of Intel's micro code update for errata skx102.  May break "
 104         "assumptions about labels corresponding to particular instructions, "
 105         "and should be used with caution."));
 106
 107 cl::opt<unsigned> X86PadMaxPrefixSize(
 108     "x86-pad-max-prefix-size", cl::init(0),
 109     cl::desc("Maximum number of prefixes to use for padding"));
 110
 111 cl::opt<bool> X86PadForAlign(
 112     "x86-pad-for-align", cl::init(true), cl::Hidden,
 113     cl::desc("Pad previous instructions to implement align directives"));
 114
 115 cl::opt<bool> X86PadForBranchAlign(
 116     "x86-pad-for-branch-align", cl::init(true), cl::Hidden,
 117     cl::desc("Pad previous instructions to implement branch alignment"));
 118
 119 class X86ELFObjectWriter : public MCELFObjectTargetWriter {
 120 public:
 121   X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
 122                      bool HasRelocationAddend, bool foobar)
 123     : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
 124 };
 125
 126 class X86AsmBackend : public MCAsmBackend {
 127   const MCSubtargetInfo &STI;
 128   std::unique_ptr<const MCInstrInfo> MCII;
 129   X86AlignBranchKind AlignBranchType;
 130   Align AlignBoundary;
 131   unsigned TargetPrefixMax = 0;
 132
 133   MCInst PrevInst;
 134   MCBoundaryAlignFragment *PendingBA = nullptr;
 135   std::pair<MCFragment *, size_t> PrevInstPosition;
 136   bool CanPadInst;
 137
 138   uint8_t determinePaddingPrefix(const MCInst &Inst) const;
 139   bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
 140   bool needAlign(const MCInst &Inst) const;
 141   bool canPadBranches(MCObjectStreamer &OS) const;
 142   bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;
 143
 144 public:
 145   X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)
 146       : MCAsmBackend(support::little), STI(STI),
 147         MCII(T.createMCInstrInfo()) {
 148     if (X86AlignBranchWithin32BBoundaries) {
 149       // At the moment, this defaults to aligning fused branches, unconditional
 150       // jumps, and (unfused) conditional jumps with nops.  Both the
 151       // instructions aligned and the alignment method (nop vs prefix) may
 152       // change in the future.
 153       AlignBoundary = assumeAligned(32);;
 154       AlignBranchType.addKind(X86::AlignBranchFused);
 155       AlignBranchType.addKind(X86::AlignBranchJcc);
 156       AlignBranchType.addKind(X86::AlignBranchJmp);
 157     }
 158     // Allow overriding defaults set by master flag
 159     if (X86AlignBranchBoundary.getNumOccurrences())
 160       AlignBoundary = assumeAligned(X86AlignBranchBoundary);
 161     if (X86AlignBranch.getNumOccurrences())
 162       AlignBranchType = X86AlignBranchKindLoc;
 163     if (X86PadMaxPrefixSize.getNumOccurrences())
 164       TargetPrefixMax = X86PadMaxPrefixSize;
 165   }
 166
 167   bool allowAutoPadding() const override;
 168   bool allowEnhancedRelaxation() const override;
 169   void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
 170   void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
 171
 172   unsigned getNumFixupKinds() const override {
 173     return X86::NumTargetFixupKinds;
 174   }
 175
 176   Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
 177
 178   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
 179
 180   bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
 181                              const MCValue &Target) override;
 182
 183   void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
 184                   const MCValue &Target, MutableArrayRef<char> Data,
 185                   uint64_t Value, bool IsResolved,
 186                   const MCSubtargetInfo *STI) const override;
 187
 188   bool mayNeedRelaxation(const MCInst &Inst,
 189                          const MCSubtargetInfo &STI) const override;
 190
 191   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
 192                             const MCRelaxableFragment *DF,
 193                             const MCAsmLayout &Layout) const override;
 194
 195   void relaxInstruction(MCInst &Inst,
 196                         const MCSubtargetInfo &STI) const override;
 197
 198   bool padInstructionViaRelaxation(MCRelaxableFragment &RF,
 199                                    MCCodeEmitter &Emitter,
 200                                    unsigned &RemainingSize) const;
 201
 202   bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
 203                                unsigned &RemainingSize) const;
 204
 205   bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,
 206                               unsigned &RemainingSize) const;
 207
 208   void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
 209
 210   bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
 211 };
 212 } // end anonymous namespace
 213
 214 static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
 215   unsigned Op = Inst.getOpcode();
 216   switch (Op) {
 217   default:
 218     return Op;
 219   case X86::JCC_1:
 220     return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
 221   case X86::JMP_1:
 222     return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;
 223   }
 224 }
 225
 226 static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
 227   unsigned Op = Inst.getOpcode();
 228   switch (Op) {
 229   default:
 230     return Op;
 231
 232     // IMUL
 233   case X86::IMUL16rri8: return X86::IMUL16rri;
 234   case X86::IMUL16rmi8: return X86::IMUL16rmi;
 235   case X86::IMUL32rri8: return X86::IMUL32rri;
 236   case X86::IMUL32rmi8: return X86::IMUL32rmi;
 237   case X86::IMUL64rri8: return X86::IMUL64rri32;
 238   case X86::IMUL64rmi8: return X86::IMUL64rmi32;
 239
 240     // AND
 241   case X86::AND16ri8: return X86::AND16ri;
 242   case X86::AND16mi8: return X86::AND16mi;
 243   case X86::AND32ri8: return X86::AND32ri;
 244   case X86::AND32mi8: return X86::AND32mi;
 245   case X86::AND64ri8: return X86::AND64ri32;
 246   case X86::AND64mi8: return X86::AND64mi32;
 247
 248     // OR
 249   case X86::OR16ri8: return X86::OR16ri;
 250   case X86::OR16mi8: return X86::OR16mi;
 251   case X86::OR32ri8: return X86::OR32ri;
 252   case X86::OR32mi8: return X86::OR32mi;
 253   case X86::OR64ri8: return X86::OR64ri32;
 254   case X86::OR64mi8: return X86::OR64mi32;
 255
 256     // XOR
 257   case X86::XOR16ri8: return X86::XOR16ri;
 258   case X86::XOR16mi8: return X86::XOR16mi;
 259   case X86::XOR32ri8: return X86::XOR32ri;
 260   case X86::XOR32mi8: return X86::XOR32mi;
 261   case X86::XOR64ri8: return X86::XOR64ri32;
 262   case X86::XOR64mi8: return X86::XOR64mi32;
 263
 264     // ADD
 265   case X86::ADD16ri8: return X86::ADD16ri;
 266   case X86::ADD16mi8: return X86::ADD16mi;
 267   case X86::ADD32ri8: return X86::ADD32ri;
 268   case X86::ADD32mi8: return X86::ADD32mi;
 269   case X86::ADD64ri8: return X86::ADD64ri32;
 270   case X86::ADD64mi8: return X86::ADD64mi32;
 271
 272    // ADC
 273   case X86::ADC16ri8: return X86::ADC16ri;
 274   case X86::ADC16mi8: return X86::ADC16mi;
 275   case X86::ADC32ri8: return X86::ADC32ri;
 276   case X86::ADC32mi8: return X86::ADC32mi;
 277   case X86::ADC64ri8: return X86::ADC64ri32;
 278   case X86::ADC64mi8: return X86::ADC64mi32;
 279
 280     // SUB
 281   case X86::SUB16ri8: return X86::SUB16ri;
 282   case X86::SUB16mi8: return X86::SUB16mi;
 283   case X86::SUB32ri8: return X86::SUB32ri;
 284   case X86::SUB32mi8: return X86::SUB32mi;
 285   case X86::SUB64ri8: return X86::SUB64ri32;
 286   case X86::SUB64mi8: return X86::SUB64mi32;
 287
 288    // SBB
 289   case X86::SBB16ri8: return X86::SBB16ri;
 290   case X86::SBB16mi8: return X86::SBB16mi;
 291   case X86::SBB32ri8: return X86::SBB32ri;
 292   case X86::SBB32mi8: return X86::SBB32mi;
 293   case X86::SBB64ri8: return X86::SBB64ri32;
 294   case X86::SBB64mi8: return X86::SBB64mi32;
 295
 296     // CMP
 297   case X86::CMP16ri8: return X86::CMP16ri;
 298   case X86::CMP16mi8: return X86::CMP16mi;
 299   case X86::CMP32ri8: return X86::CMP32ri;
 300   case X86::CMP32mi8: return X86::CMP32mi;
 301   case X86::CMP64ri8: return X86::CMP64ri32;
 302   case X86::CMP64mi8: return X86::CMP64mi32;
 303
 304     // PUSH
 305   case X86::PUSH32i8:  return X86::PUSHi32;
 306   case X86::PUSH16i8:  return X86::PUSHi16;
 307   case X86::PUSH64i8:  return X86::PUSH64i32;
 308   }
 309 }
 310
 311 static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
 312   unsigned R = getRelaxedOpcodeArith(Inst);
 313   if (R != Inst.getOpcode())
 314     return R;
 315   return getRelaxedOpcodeBranch(Inst, Is16BitMode);
 316 }
 317
 318 static X86::CondCode getCondFromBranch(const MCInst &MI,
 319                                        const MCInstrInfo &MCII) {
 320   unsigned Opcode = MI.getOpcode();
 321   switch (Opcode) {
 322   default:
 323     return X86::COND_INVALID;
 324   case X86::JCC_1: {
 325     const MCInstrDesc &Desc = MCII.get(Opcode);
 326     return static_cast<X86::CondCode>(
 327         MI.getOperand(Desc.getNumOperands() - 1).getImm());
 328   }
 329   }
 330 }
 331
 332 static X86::SecondMacroFusionInstKind
 333 classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {
 334   X86::CondCode CC = getCondFromBranch(MI, MCII);
 335   return classifySecondCondCodeInMacroFusion(CC);
 336 }
 337
 338 /// Check if the instruction uses RIP relative addressing.
 339 static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {
 340   unsigned Opcode = MI.getOpcode();
 341   const MCInstrDesc &Desc = MCII.get(Opcode);
 342   uint64_t TSFlags = Desc.TSFlags;
 343   unsigned CurOp = X86II::getOperandBias(Desc);
 344   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
 345   if (MemoryOperand < 0)
 346     return false;
 347   unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;
 348   unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();
 349   return (BaseReg == X86::RIP);
 350 }
 351
 352 /// Check if the instruction is a prefix.
 353 static bool isPrefix(const MCInst &MI, const MCInstrInfo &MCII) {
 354   return X86II::isPrefix(MCII.get(MI.getOpcode()).TSFlags);
 355 }
 356
 357 /// Check if the instruction is valid as the first instruction in macro fusion.
 358 static bool isFirstMacroFusibleInst(const MCInst &Inst,
 359                                     const MCInstrInfo &MCII) {
 360   // An Intel instruction with RIP relative addressing is not macro fusible.
 361   if (isRIPRelative(Inst, MCII))
 362     return false;
 363   X86::FirstMacroFusionInstKind FIK =
 364       X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());
 365   return FIK != X86::FirstMacroFusionInstKind::Invalid;
 366 }
 367
 368 /// X86 can reduce the bytes of NOP by padding instructions with prefixes to
 369 /// get a better peformance in some cases. Here, we determine which prefix is
 370 /// the most suitable.
 371 ///
 372 /// If the instruction has a segment override prefix, use the existing one.
 373 /// If the target is 64-bit, use the CS.
 374 /// If the target is 32-bit,
 375 ///   - If the instruction has a ESP/EBP base register, use SS.
 376 ///   - Otherwise use DS.
 377 uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {
 378   assert((STI.hasFeature(X86::Mode32Bit) || STI.hasFeature(X86::Mode64Bit)) &&
 379          "Prefixes can be added only in 32-bit or 64-bit mode.");
 380   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
 381   uint64_t TSFlags = Desc.TSFlags;
 382
 383   // Determine where the memory operand starts, if present.
 384   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
 385   if (MemoryOperand != -1)
 386     MemoryOperand += X86II::getOperandBias(Desc);
 387
 388   unsigned SegmentReg = 0;
 389   if (MemoryOperand >= 0) {
 390     // Check for explicit segment override on memory operand.
 391     SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();
 392   }
 393
 394   switch (TSFlags & X86II::FormMask) {
 395   default:
 396     break;
 397   case X86II::RawFrmDstSrc: {
 398     // Check segment override opcode prefix as needed (not for %ds).
 399     if (Inst.getOperand(2).getReg() != X86::DS)
 400       SegmentReg = Inst.getOperand(2).getReg();
 401     break;
 402   }
 403   case X86II::RawFrmSrc: {
 404     // Check segment override opcode prefix as needed (not for %ds).
 405     if (Inst.getOperand(1).getReg() != X86::DS)
 406       SegmentReg = Inst.getOperand(1).getReg();
 407     break;
 408   }
 409   case X86II::RawFrmMemOffs: {
 410     // Check segment override opcode prefix as needed.
 411     SegmentReg = Inst.getOperand(1).getReg();
 412     break;
 413   }
 414   }
 415
 416   if (SegmentReg != 0)
 417     return X86::getSegmentOverridePrefixForReg(SegmentReg);
 418
 419   if (STI.hasFeature(X86::Mode64Bit))
 420     return X86::CS_Encoding;
 421
 422   if (MemoryOperand >= 0) {
 423     unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;
 424     unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();
 425     if (BaseReg == X86::ESP || BaseReg == X86::EBP)
 426       return X86::SS_Encoding;
 427   }
 428   return X86::DS_Encoding;
 429 }
 430
 431 /// Check if the two instructions will be macro-fused on the target cpu.
 432 bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {
 433   const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());
 434   if (!InstDesc.isConditionalBranch())
 435     return false;
 436   if (!isFirstMacroFusibleInst(Cmp, *MCII))
 437     return false;
 438   const X86::FirstMacroFusionInstKind CmpKind =
 439       X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());
 440   const X86::SecondMacroFusionInstKind BranchKind =
 441       classifySecondInstInMacroFusion(Jcc, *MCII);
 442   return X86::isMacroFused(CmpKind, BranchKind);
 443 }
 444
 445 /// Check if the instruction has a variant symbol operand.
 446 static bool hasVariantSymbol(const MCInst &MI) {
 447   for (auto &Operand : MI) {
 448     if (!Operand.isExpr())
 449       continue;
 450     const MCExpr &Expr = *Operand.getExpr();
 451     if (Expr.getKind() == MCExpr::SymbolRef &&
 452         cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)
 453       return true;
 454   }
 455   return false;
 456 }
 457
 458 bool X86AsmBackend::allowAutoPadding() const {
 459   return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);
 460 }
 461
 462 bool X86AsmBackend::allowEnhancedRelaxation() const {
 463   return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;
 464 }
 465
 466 /// X86 has certain instructions which enable interrupts exactly one
 467 /// instruction *after* the instruction which stores to SS.  Return true if the
 468 /// given instruction has such an interrupt delay slot.
 469 static bool hasInterruptDelaySlot(const MCInst &Inst) {
 470   switch (Inst.getOpcode()) {
 471   case X86::POPSS16:
 472   case X86::POPSS32:
 473   case X86::STI:
 474     return true;
 475
 476   case X86::MOV16sr:
 477   case X86::MOV32sr:
 478   case X86::MOV64sr:
 479   case X86::MOV16sm:
 480     if (Inst.getOperand(0).getReg() == X86::SS)
 481       return true;
 482     break;
 483   }
 484   return false;
 485 }
 486
 487 /// Check if the instruction to be emitted is right after any data.
 488 static bool
 489 isRightAfterData(MCFragment *CurrentFragment,
 490                  const std::pair<MCFragment *, size_t> &PrevInstPosition) {
 491   MCFragment *F = CurrentFragment;
 492   // Empty data fragments may be created to prevent further data being
 493   // added into the previous fragment, we need to skip them since they
 494   // have no contents.
 495   for (; isa_and_nonnull<MCDataFragment>(F); F = F->getPrevNode())
 496     if (cast<MCDataFragment>(F)->getContents().size() != 0)
 497       break;
 498
 499   // Since data is always emitted into a DataFragment, our check strategy is
 500   // simple here.
 501   //   - If the fragment is a DataFragment
 502   //     - If it's not the fragment where the previous instruction is,
 503   //       returns true.
 504   //     - If it's the fragment holding the previous instruction but its
 505   //       size changed since the the previous instruction was emitted into
 506   //       it, returns true.
 507   //     - Otherwise returns false.
 508   //   - If the fragment is not a DataFragment, returns false.
 509   if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))
 510     return DF != PrevInstPosition.first ||
 511            DF->getContents().size() != PrevInstPosition.second;
 512
 513   return false;
 514 }
 515
 516 /// \returns the fragment size if it has instructions, otherwise returns 0.
 517 static size_t getSizeForInstFragment(const MCFragment *F) {
 518   if (!F || !F->hasInstructions())
 519     return 0;
 520   // MCEncodedFragmentWithContents being templated makes this tricky.
 521   switch (F->getKind()) {
 522   default:
 523     llvm_unreachable("Unknown fragment with instructions!");
 524   case MCFragment::FT_Data:
 525     return cast<MCDataFragment>(*F).getContents().size();
 526   case MCFragment::FT_Relaxable:
 527     return cast<MCRelaxableFragment>(*F).getContents().size();
 528   case MCFragment::FT_CompactEncodedInst:
 529     return cast<MCCompactEncodedInstFragment>(*F).getContents().size();
 530   }
 531 }
 532
 533 /// Return true if we can insert NOP or prefixes automatically before the
 534 /// the instruction to be emitted.
 535 bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {
 536   if (hasVariantSymbol(Inst))
 537     // Linker may rewrite the instruction with variant symbol operand(e.g.
 538     // TLSCALL).
 539     return false;
 540
 541   if (hasInterruptDelaySlot(PrevInst))
 542     // If this instruction follows an interrupt enabling instruction with a one
 543     // instruction delay, inserting a nop would change behavior.
 544     return false;
 545
 546   if (isPrefix(PrevInst, *MCII))
 547     // If this instruction follows a prefix, inserting a nop/prefix would change
 548     // semantic.
 549     return false;
 550
 551   if (isPrefix(Inst, *MCII))
 552     // If this instruction is a prefix, inserting a prefix would change
 553     // semantic.
 554     return false;
 555
 556   if (isRightAfterData(OS.getCurrentFragment(), PrevInstPosition))
 557     // If this instruction follows any data, there is no clear
 558     // instruction boundary, inserting a nop/prefix would change semantic.
 559     return false;
 560
 561   return true;
 562 }
 563
 564 bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {
 565   if (!OS.getAllowAutoPadding())
 566     return false;
 567   assert(allowAutoPadding() && "incorrect initialization!");
 568
 569   // We only pad in text section.
 570   if (!OS.getCurrentSectionOnly()->getKind().isText())
 571     return false;
 572
 573   // To be Done: Currently don't deal with Bundle cases.
 574   if (OS.getAssembler().isBundlingEnabled())
 575     return false;
 576
 577   // Branches only need to be aligned in 32-bit or 64-bit mode.
 578   if (!(STI.hasFeature(X86::Mode64Bit) || STI.hasFeature(X86::Mode32Bit)))
 579     return false;
 580
 581   return true;
 582 }
 583
 584 /// Check if the instruction operand needs to be aligned.
 585 bool X86AsmBackend::needAlign(const MCInst &Inst) const {
 586   const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());
 587   return (Desc.isConditionalBranch() &&
 588           (AlignBranchType & X86::AlignBranchJcc)) ||
 589          (Desc.isUnconditionalBranch() &&
 590           (AlignBranchType & X86::AlignBranchJmp)) ||
 591          (Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||
 592          (Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||
 593          (Desc.isIndirectBranch() &&
 594           (AlignBranchType & X86::AlignBranchIndirect));
 595 }
 596
 597 /// Insert BoundaryAlignFragment before instructions to align branches.
 598 void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
 599                                          const MCInst &Inst) {
 600   CanPadInst = canPadInst(Inst, OS);
 601
 602   if (!canPadBranches(OS))
 603     return;
 604
 605   if (!isMacroFused(PrevInst, Inst))
 606     // Macro fusion doesn't happen indeed, clear the pending.
 607     PendingBA = nullptr;
 608
 609   if (!CanPadInst)
 610     return;
 611
 612   if (PendingBA && OS.getCurrentFragment()->getPrevNode() == PendingBA) {
 613     // Macro fusion actually happens and there is no other fragment inserted
 614     // after the previous instruction.
 615     //
 616     // Do nothing here since we already inserted a BoudaryAlign fragment when
 617     // we met the first instruction in the fused pair and we'll tie them
 618     // together in emitInstructionEnd.
 619     //
 620     // Note: When there is at least one fragment, such as MCAlignFragment,
 621     // inserted after the previous instruction, e.g.
 622     //
 623     // \code
 624     //   cmp %rax %rcx
 625     //   .align 16
 626     //   je .Label0
 627     // \ endcode
 628     //
 629     // We will treat the JCC as a unfused branch although it may be fused
 630     // with the CMP.
 631     return;
 632   }
 633
 634   if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&
 635                           isFirstMacroFusibleInst(Inst, *MCII))) {
 636     // If we meet a unfused branch or the first instuction in a fusiable pair,
 637     // insert a BoundaryAlign fragment.
 638     OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
 639   }
 640 }
 641
 642 /// Set the last fragment to be aligned for the BoundaryAlignFragment.
 643 void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {
 644   PrevInst = Inst;
 645   MCFragment *CF = OS.getCurrentFragment();
 646   PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));
 647   if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))
 648     F->setAllowAutoPadding(CanPadInst);
 649
 650   if (!canPadBranches(OS))
 651     return;
 652
 653   if (!needAlign(Inst) || !PendingBA)
 654     return;
 655
 656   // Tie the aligned instructions into a a pending BoundaryAlign.
 657   PendingBA->setLastFragment(CF);
 658   PendingBA = nullptr;
 659
 660   // We need to ensure that further data isn't added to the current
 661   // DataFragment, so that we can get the size of instructions later in
 662   // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty
 663   // DataFragment.
 664   if (isa_and_nonnull<MCDataFragment>(CF))
 665     OS.insert(new MCDataFragment());
 666
 667   // Update the maximum alignment on the current section if necessary.
 668   MCSection *Sec = OS.getCurrentSectionOnly();
 669   if (AlignBoundary.value() > Sec->getAlignment())
 670     Sec->setAlignment(AlignBoundary);
 671 }
 672
 673 Optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {
 674   if (STI.getTargetTriple().isOSBinFormatELF()) {
 675     unsigned Type;
 676     if (STI.getTargetTriple().getArch() == Triple::x86_64) {
 677       Type = llvm::StringSwitch<unsigned>(Name)
 678 #define ELF_RELOC(X, Y) .Case(#X, Y)
 679 #include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
 680 #undef ELF_RELOC
 681                  .Default(-1u);
 682     } else {
 683       Type = llvm::StringSwitch<unsigned>(Name)
 684 #define ELF_RELOC(X, Y) .Case(#X, Y)
 685 #include "llvm/BinaryFormat/ELFRelocs/i386.def"
 686 #undef ELF_RELOC
 687                  .Default(-1u);
 688     }
 689     if (Type == -1u)
 690       return None;
 691     return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);
 692   }
 693   return MCAsmBackend::getFixupKind(Name);
 694 }
 695
 696 const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
 697   const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
 698       {"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 699       {"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 700       {"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 701       {"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 702       {"reloc_signed_4byte", 0, 32, 0},
 703       {"reloc_signed_4byte_relax", 0, 32, 0},
 704       {"reloc_global_offset_table", 0, 32, 0},
 705       {"reloc_global_offset_table8", 0, 64, 0},
 706       {"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
 707   };
 708
 709   // Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They
 710   // do not require any extra processing.
 711   if (Kind >= FirstLiteralRelocationKind)
 712     return MCAsmBackend::getFixupKindInfo(FK_NONE);
 713
 714   if (Kind < FirstTargetFixupKind)
 715     return MCAsmBackend::getFixupKindInfo(Kind);
 716
 717   assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
 718          "Invalid kind!");
 719   assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");
 720   return Infos[Kind - FirstTargetFixupKind];
 721 }
 722
 723 bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,
 724                                           const MCFixup &Fixup,
 725                                           const MCValue &) {
 726   return Fixup.getKind() >= FirstLiteralRelocationKind;
 727 }
 728
 729 static unsigned getFixupKindSize(unsigned Kind) {
 730   switch (Kind) {
 731   default:
 732     llvm_unreachable("invalid fixup kind!");
 733   case FK_NONE:
 734     return 0;
 735   case FK_PCRel_1:
 736   case FK_SecRel_1:
 737   case FK_Data_1:
 738     return 1;
 739   case FK_PCRel_2:
 740   case FK_SecRel_2:
 741   case FK_Data_2:
 742     return 2;
 743   case FK_PCRel_4:
 744   case X86::reloc_riprel_4byte:
 745   case X86::reloc_riprel_4byte_relax:
 746   case X86::reloc_riprel_4byte_relax_rex:
 747   case X86::reloc_riprel_4byte_movq_load:
 748   case X86::reloc_signed_4byte:
 749   case X86::reloc_signed_4byte_relax:
 750   case X86::reloc_global_offset_table:
 751   case X86::reloc_branch_4byte_pcrel:
 752   case FK_SecRel_4:
 753   case FK_Data_4:
 754     return 4;
 755   case FK_PCRel_8:
 756   case FK_SecRel_8:
 757   case FK_Data_8:
 758   case X86::reloc_global_offset_table8:
 759     return 8;
 760   }
 761 }
 762
 763 void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
 764                                const MCValue &Target,
 765                                MutableArrayRef<char> Data,
 766                                uint64_t Value, bool IsResolved,
 767                                const MCSubtargetInfo *STI) const {
 768   unsigned Kind = Fixup.getKind();
 769   if (Kind >= FirstLiteralRelocationKind)
 770     return;
 771   unsigned Size = getFixupKindSize(Kind);
 772
 773   assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");
 774
 775   int64_t SignedValue = static_cast<int64_t>(Value);
 776   if ((Target.isAbsolute() || IsResolved) &&
 777       getFixupKindInfo(Fixup.getKind()).Flags &
 778       MCFixupKindInfo::FKF_IsPCRel) {
 779     // check that PC relative fixup fits into the fixup size.
 780     if (Size > 0 && !isIntN(Size * 8, SignedValue))
 781       Asm.getContext().reportError(
 782                                    Fixup.getLoc(), "value of " + Twine(SignedValue) +
 783                                    " is too large for field of " + Twine(Size) +
 784                                    ((Size == 1) ? " byte." : " bytes."));
 785   } else {
 786     // Check that uppper bits are either all zeros or all ones.
 787     // Specifically ignore overflow/underflow as long as the leakage is
 788     // limited to the lower bits. This is to remain compatible with
 789     // other assemblers.
 790     assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&
 791            "Value does not fit in the Fixup field");
 792   }
 793
 794   for (unsigned i = 0; i != Size; ++i)
 795     Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
 796 }
 797
 798 bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
 799                                       const MCSubtargetInfo &STI) const {
 800   // Branches can always be relaxed in either mode.
 801   if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
 802     return true;
 803
 804   // Check if this instruction is ever relaxable.
 805   if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
 806     return false;
 807
 808
 809   // Check if the relaxable operand has an expression. For the current set of
 810   // relaxable instructions, the relaxable operand is always the last operand.
 811   unsigned RelaxableOp = Inst.getNumOperands() - 1;
 812   if (Inst.getOperand(RelaxableOp).isExpr())
 813     return true;
 814
 815   return false;
 816 }
 817
 818 bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
 819                                          uint64_t Value,
 820                                          const MCRelaxableFragment *DF,
 821                                          const MCAsmLayout &Layout) const {
 822   // Relax if the value is too big for a (signed) i8.
 823   return !isInt<8>(Value);
 824 }
 825
 826 // FIXME: Can tblgen help at all here to verify there aren't other instructions
 827 // we can relax?
 828 void X86AsmBackend::relaxInstruction(MCInst &Inst,
 829                                      const MCSubtargetInfo &STI) const {
 830   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
 831   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
 832   unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
 833
 834   if (RelaxedOp == Inst.getOpcode()) {
 835     SmallString<256> Tmp;
 836     raw_svector_ostream OS(Tmp);
 837     Inst.dump_pretty(OS);
 838     OS << "\n";
 839     report_fatal_error("unexpected instruction to relax: " + OS.str());
 840   }
 841
 842   Inst.setOpcode(RelaxedOp);
 843 }
 844
 845 /// Return true if this instruction has been fully relaxed into it's most
 846 /// general available form.
 847 static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
 848   auto &Inst = RF.getInst();
 849   auto &STI = *RF.getSubtargetInfo();
 850   bool Is16BitMode = STI.getFeatureBits()[X86::Mode16Bit];
 851   return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
 852 }
 853
 854 bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
 855                                             MCCodeEmitter &Emitter,
 856                                             unsigned &RemainingSize) const {
 857   if (!RF.getAllowAutoPadding())
 858     return false;
 859   // If the instruction isn't fully relaxed, shifting it around might require a
 860   // larger value for one of the fixups then can be encoded.  The outer loop
 861   // will also catch this before moving to the next instruction, but we need to
 862   // prevent padding this single instruction as well.
 863   if (!isFullyRelaxed(RF))
 864     return false;
 865
 866   const unsigned OldSize = RF.getContents().size();
 867   if (OldSize == 15)
 868     return false;
 869
 870   const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
 871   const unsigned RemainingPrefixSize = [&]() -> unsigned {
 872     SmallString<15> Code;
 873     raw_svector_ostream VecOS(Code);
 874     Emitter.emitPrefix(RF.getInst(), VecOS, STI);
 875     assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
 876
 877     // TODO: It turns out we need a decent amount of plumbing for the target
 878     // specific bits to determine number of prefixes its safe to add.  Various
 879     // targets (older chips mostly, but also Atom family) encounter decoder
 880     // stalls with too many prefixes.  For testing purposes, we set the value
 881     // externally for the moment.
 882     unsigned ExistingPrefixSize = Code.size();
 883     if (TargetPrefixMax <= ExistingPrefixSize)
 884       return 0;
 885     return TargetPrefixMax - ExistingPrefixSize;
 886   }();
 887   const unsigned PrefixBytesToAdd =
 888       std::min(MaxPossiblePad, RemainingPrefixSize);
 889   if (PrefixBytesToAdd == 0)
 890     return false;
 891
 892   const uint8_t Prefix = determinePaddingPrefix(RF.getInst());
 893
 894   SmallString<256> Code;
 895   Code.append(PrefixBytesToAdd, Prefix);
 896   Code.append(RF.getContents().begin(), RF.getContents().end());
 897   RF.getContents() = Code;
 898
 899   // Adjust the fixups for the change in offsets
 900   for (auto &F : RF.getFixups()) {
 901     F.setOffset(F.getOffset() + PrefixBytesToAdd);
 902   }
 903
 904   RemainingSize -= PrefixBytesToAdd;
 905   return true;
 906 }
 907
 908 bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
 909                                                 MCCodeEmitter &Emitter,
 910                                                 unsigned &RemainingSize) const {
 911   if (isFullyRelaxed(RF))
 912     // TODO: There are lots of other tricks we could apply for increasing
 913     // encoding size without impacting performance.
 914     return false;
 915
 916   MCInst Relaxed = RF.getInst();
 917   relaxInstruction(Relaxed, *RF.getSubtargetInfo());
 918
 919   SmallVector<MCFixup, 4> Fixups;
 920   SmallString<15> Code;
 921   raw_svector_ostream VecOS(Code);
 922   Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
 923   const unsigned OldSize = RF.getContents().size();
 924   const unsigned NewSize = Code.size();
 925   assert(NewSize >= OldSize && "size decrease during relaxation?");
 926   unsigned Delta = NewSize - OldSize;
 927   if (Delta > RemainingSize)
 928     return false;
 929   RF.setInst(Relaxed);
 930   RF.getContents() = Code;
 931   RF.getFixups() = Fixups;
 932   RemainingSize -= Delta;
 933   return true;
 934 }
 935
 936 bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,
 937                                            MCCodeEmitter &Emitter,
 938                                            unsigned &RemainingSize) const {
 939   bool Changed = false;
 940   if (RemainingSize != 0)
 941     Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);
 942   if (RemainingSize != 0)
 943     Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);
 944   return Changed;
 945 }
 946
 947 void X86AsmBackend::finishLayout(MCAssembler const &Asm,
 948                                  MCAsmLayout &Layout) const {
 949   // See if we can further relax some instructions to cut down on the number of
 950   // nop bytes required for code alignment.  The actual win is in reducing
 951   // instruction count, not number of bytes.  Modern X86-64 can easily end up
 952   // decode limited.  It is often better to reduce the number of instructions
 953   // (i.e. eliminate nops) even at the cost of increasing the size and
 954   // complexity of others.
 955   if (!X86PadForAlign && !X86PadForBranchAlign)
 956     return;
 957
 958   DenseSet<MCFragment *> LabeledFragments;
 959   for (const MCSymbol &S : Asm.symbols())
 960     LabeledFragments.insert(S.getFragment(false));
 961
 962   for (MCSection &Sec : Asm) {
 963     if (!Sec.getKind().isText())
 964       continue;
 965
 966     SmallVector<MCRelaxableFragment *, 4> Relaxable;
 967     for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {
 968       MCFragment &F = *I;
 969
 970       if (LabeledFragments.count(&F))
 971         Relaxable.clear();
 972
 973       if (F.getKind() == MCFragment::FT_Data ||
 974           F.getKind() == MCFragment::FT_CompactEncodedInst)
 975         // Skip and ignore
 976         continue;
 977
 978       if (F.getKind() == MCFragment::FT_Relaxable) {
 979         auto &RF = cast<MCRelaxableFragment>(*I);
 980         Relaxable.push_back(&RF);
 981         continue;
 982       }
 983
 984       auto canHandle = [](MCFragment &F) -> bool {
 985         switch (F.getKind()) {
 986         default:
 987           return false;
 988         case MCFragment::FT_Align:
 989           return X86PadForAlign;
 990         case MCFragment::FT_BoundaryAlign:
 991           return X86PadForBranchAlign;
 992         }
 993       };
 994       // For any unhandled kind, assume we can't change layout.
 995       if (!canHandle(F)) {
 996         Relaxable.clear();
 997         continue;
 998       }
 999
1000 #ifndef NDEBUG
1001       const uint64_t OrigOffset = Layout.getFragmentOffset(&F);
1002 #endif
1003       const uint64_t OrigSize = Asm.computeFragmentSize(Layout, F);
1004
1005       // To keep the effects local, prefer to relax instructions closest to
1006       // the align directive.  This is purely about human understandability
1007       // of the resulting code.  If we later find a reason to expand
1008       // particular instructions over others, we can adjust.
1009       MCFragment *FirstChangedFragment = nullptr;
1010       unsigned RemainingSize = OrigSize;
1011       while (!Relaxable.empty() && RemainingSize != 0) {
1012         auto &RF = *Relaxable.pop_back_val();
1013         // Give the backend a chance to play any tricks it wishes to increase
1014         // the encoding size of the given instruction.  Target independent code
1015         // will try further relaxation, but target's may play further tricks.
1016         if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))
1017           FirstChangedFragment = &RF;
1018
1019         // If we have an instruction which hasn't been fully relaxed, we can't
1020         // skip past it and insert bytes before it.  Changing its starting
1021         // offset might require a larger negative offset than it can encode.
1022         // We don't need to worry about larger positive offsets as none of the
1023         // possible offsets between this and our align are visible, and the
1024         // ones afterwards aren't changing.
1025         if (!isFullyRelaxed(RF))
1026           break;
1027       }
1028       Relaxable.clear();
1029
1030       if (FirstChangedFragment) {
1031         // Make sure the offsets for any fragments in the effected range get
1032         // updated.  Note that this (conservatively) invalidates the offsets of
1033         // those following, but this is not required.
1034         Layout.invalidateFragmentsFrom(FirstChangedFragment);
1035       }
1036
1037       // BoundaryAlign explicitly tracks it's size (unlike align)
1038       if (F.getKind() == MCFragment::FT_BoundaryAlign)
1039         cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);
1040
1041 #ifndef NDEBUG
1042       const uint64_t FinalOffset = Layout.getFragmentOffset(&F);
1043       const uint64_t FinalSize = Asm.computeFragmentSize(Layout, F);
1044       assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&
1045              "can't move start of next fragment!");
1046       assert(FinalSize == RemainingSize && "inconsistent size computation?");
1047 #endif
1048
1049       // If we're looking at a boundary align, make sure we don't try to pad
1050       // its target instructions for some following directive.  Doing so would
1051       // break the alignment of the current boundary align.
1052       if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {
1053         const MCFragment *LastFragment = BF->getLastFragment();
1054         if (!LastFragment)
1055           continue;
1056         while (&*I != LastFragment)
1057           ++I;
1058       }
1059     }
1060   }
1061
1062   // The layout is done. Mark every fragment as valid.
1063   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
1064     MCSection &Section = *Layout.getSectionOrder()[i];
1065     Layout.getFragmentOffset(&*Section.getFragmentList().rbegin());
1066     Asm.computeFragmentSize(Layout, *Section.getFragmentList().rbegin());
1067   }
1068 }
1069
1070 /// Write a sequence of optimal nops to the output, covering \p Count
1071 /// bytes.
1072 /// \return - true on success, false on failure
1073 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
1074   static const char Nops[10][11] = {
1075     // nop
1076     "\x90",
1077     // xchg %ax,%ax
1078     "\x66\x90",
1079     // nopl (%[re]ax)
1080     "\x0f\x1f\x00",
1081     // nopl 0(%[re]ax)
1082     "\x0f\x1f\x40\x00",
1083     // nopl 0(%[re]ax,%[re]ax,1)
1084     "\x0f\x1f\x44\x00\x00",
1085     // nopw 0(%[re]ax,%[re]ax,1)
1086     "\x66\x0f\x1f\x44\x00\x00",
1087     // nopl 0L(%[re]ax)
1088     "\x0f\x1f\x80\x00\x00\x00\x00",
1089     // nopl 0L(%[re]ax,%[re]ax,1)
1090     "\x0f\x1f\x84\x00\x00\x00\x00\x00",
1091     // nopw 0L(%[re]ax,%[re]ax,1)
1092     "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
1093     // nopw %cs:0L(%[re]ax,%[re]ax,1)
1094     "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
1095   };
1096
1097   // This CPU doesn't support long nops. If needed add more.
1098   // FIXME: We could generated something better than plain 0x90.
1099   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit)) {
1100     for (uint64_t i = 0; i < Count; ++i)
1101       OS << '\x90';
1102     return true;
1103   }
1104
1105   // 15-bytes is the longest single NOP instruction, but 10-bytes is
1106   // commonly the longest that can be efficiently decoded.
1107   uint64_t MaxNopLength = 10;
1108   if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
1109     MaxNopLength = 7;
1110   else if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
1111     MaxNopLength = 15;
1112   else if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
1113     MaxNopLength = 11;
1114
1115   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
1116   // length.
1117   do {
1118     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
1119     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
1120     for (uint8_t i = 0; i < Prefixes; i++)
1121       OS << '\x66';
1122     const uint8_t Rest = ThisNopLength - Prefixes;
1123     if (Rest != 0)
1124       OS.write(Nops[Rest - 1], Rest);
1125     Count -= ThisNopLength;
1126   } while (Count != 0);
1127
1128   return true;
1129 }
1130
1131 /* *** */
1132
1133 namespace {
1134
1135 class ELFX86AsmBackend : public X86AsmBackend {
1136 public:
1137   uint8_t OSABI;
1138   ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)
1139       : X86AsmBackend(T, STI), OSABI(OSABI) {}
1140 };
1141
1142 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
1143 public:
1144   ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
1145                       const MCSubtargetInfo &STI)
1146     : ELFX86AsmBackend(T, OSABI, STI) {}
1147
1148   std::unique_ptr<MCObjectTargetWriter>
1149   createObjectTargetWriter() const override {
1150     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);
1151   }
1152 };
1153
1154 class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
1155 public:
1156   ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,
1157                        const MCSubtargetInfo &STI)
1158       : ELFX86AsmBackend(T, OSABI, STI) {}
1159
1160   std::unique_ptr<MCObjectTargetWriter>
1161   createObjectTargetWriter() const override {
1162     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1163                                     ELF::EM_X86_64);
1164   }
1165 };
1166
1167 class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
1168 public:
1169   ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,
1170                          const MCSubtargetInfo &STI)
1171       : ELFX86AsmBackend(T, OSABI, STI) {}
1172
1173   std::unique_ptr<MCObjectTargetWriter>
1174   createObjectTargetWriter() const override {
1175     return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,
1176                                     ELF::EM_IAMCU);
1177   }
1178 };
1179
1180 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
1181 public:
1182   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
1183                       const MCSubtargetInfo &STI)
1184     : ELFX86AsmBackend(T, OSABI, STI) {}
1185
1186   std::unique_ptr<MCObjectTargetWriter>
1187   createObjectTargetWriter() const override {
1188     return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);
1189   }
1190 };
1191
1192 class WindowsX86AsmBackend : public X86AsmBackend {
1193   bool Is64Bit;
1194
1195 public:
1196   WindowsX86AsmBackend(const Target &T, bool is64Bit,
1197                        const MCSubtargetInfo &STI)
1198     : X86AsmBackend(T, STI)
1199     , Is64Bit(is64Bit) {
1200   }
1201
1202   Optional<MCFixupKind> getFixupKind(StringRef Name) const override {
1203     return StringSwitch<Optional<MCFixupKind>>(Name)
1204         .Case("dir32", FK_Data_4)
1205         .Case("secrel32", FK_SecRel_4)
1206         .Case("secidx", FK_SecRel_2)
1207         .Default(MCAsmBackend::getFixupKind(Name));
1208   }
1209
1210   std::unique_ptr<MCObjectTargetWriter>
1211   createObjectTargetWriter() const override {
1212     return createX86WinCOFFObjectWriter(Is64Bit);
1213   }
1214 };
1215
1216 namespace CU {
1217
1218   /// Compact unwind encoding values.
1219   enum CompactUnwindEncodings {
1220     /// [RE]BP based frame where [RE]BP is pused on the stack immediately after
1221     /// the return address, then [RE]SP is moved to [RE]BP.
1222     UNWIND_MODE_BP_FRAME                   = 0x01000000,
1223
1224     /// A frameless function with a small constant stack size.
1225     UNWIND_MODE_STACK_IMMD                 = 0x02000000,
1226
1227     /// A frameless function with a large constant stack size.
1228     UNWIND_MODE_STACK_IND                  = 0x03000000,
1229
1230     /// No compact unwind encoding is available.
1231     UNWIND_MODE_DWARF                      = 0x04000000,
1232
1233     /// Mask for encoding the frame registers.
1234     UNWIND_BP_FRAME_REGISTERS              = 0x00007FFF,
1235
1236     /// Mask for encoding the frameless registers.
1237     UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF
1238   };
1239
1240 } // end CU namespace
1241
1242 class DarwinX86AsmBackend : public X86AsmBackend {
1243   const MCRegisterInfo &MRI;
1244
1245   /// Number of registers that can be saved in a compact unwind encoding.
1246   enum { CU_NUM_SAVED_REGS = 6 };
1247
1248   mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];
1249   Triple TT;
1250   bool Is64Bit;
1251
1252   unsigned OffsetSize;                   ///< Offset of a "push" instruction.
1253   unsigned MoveInstrSize;                ///< Size of a "move" instruction.
1254   unsigned StackDivide;                  ///< Amount to adjust stack size by.
1255 protected:
1256   /// Size of a "push" instruction for the given register.
1257   unsigned PushInstrSize(unsigned Reg) const {
1258     switch (Reg) {
1259       case X86::EBX:
1260       case X86::ECX:
1261       case X86::EDX:
1262       case X86::EDI:
1263       case X86::ESI:
1264       case X86::EBP:
1265       case X86::RBX:
1266       case X86::RBP:
1267         return 1;
1268       case X86::R12:
1269       case X86::R13:
1270       case X86::R14:
1271       case X86::R15:
1272         return 2;
1273     }
1274     return 1;
1275   }
1276
1277 private:
1278   /// Get the compact unwind number for a given register. The number
1279   /// corresponds to the enum lists in compact_unwind_encoding.h.
1280   int getCompactUnwindRegNum(unsigned Reg) const {
1281     static const MCPhysReg CU32BitRegs[7] = {
1282       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
1283     };
1284     static const MCPhysReg CU64BitRegs[] = {
1285       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
1286     };
1287     const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
1288     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
1289       if (*CURegs == Reg)
1290         return Idx;
1291
1292     return -1;
1293   }
1294
1295   /// Return the registers encoded for a compact encoding with a frame
1296   /// pointer.
1297   uint32_t encodeCompactUnwindRegistersWithFrame() const {
1298     // Encode the registers in the order they were saved --- 3-bits per
1299     // register. The list of saved registers is assumed to be in reverse
1300     // order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.
1301     uint32_t RegEnc = 0;
1302     for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {
1303       unsigned Reg = SavedRegs[i];
1304       if (Reg == 0) break;
1305
1306       int CURegNum = getCompactUnwindRegNum(Reg);
1307       if (CURegNum == -1) return ~0U;
1308
1309       // Encode the 3-bit register number in order, skipping over 3-bits for
1310       // each register.
1311       RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
1312     }
1313
1314     assert((RegEnc & 0x3FFFF) == RegEnc &&
1315            "Invalid compact register encoding!");
1316     return RegEnc;
1317   }
1318
1319   /// Create the permutation encoding used with frameless stacks. It is
1320   /// passed the number of registers to be saved and an array of the registers
1321   /// saved.
1322   uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {
1323     // The saved registers are numbered from 1 to 6. In order to encode the
1324     // order in which they were saved, we re-number them according to their
1325     // place in the register order. The re-numbering is relative to the last
1326     // re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in
1327     // that order:
1328     //
1329     //    Orig  Re-Num
1330     //    ----  ------
1331     //     6       6
1332     //     2       2
1333     //     4       3
1334     //     5       3
1335     //
1336     for (unsigned i = 0; i < RegCount; ++i) {
1337       int CUReg = getCompactUnwindRegNum(SavedRegs[i]);
1338       if (CUReg == -1) return ~0U;
1339       SavedRegs[i] = CUReg;
1340     }
1341
1342     // Reverse the list.
1343     std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);
1344
1345     uint32_t RenumRegs[CU_NUM_SAVED_REGS];
1346     for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){
1347       unsigned Countless = 0;
1348       for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
1349         if (SavedRegs[j] < SavedRegs[i])
1350           ++Countless;
1351
1352       RenumRegs[i] = SavedRegs[i] - Countless - 1;
1353     }
1354
1355     // Take the renumbered values and encode them into a 10-bit number.
1356     uint32_t permutationEncoding = 0;
1357     switch (RegCount) {
1358     case 6:
1359       permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
1360                              + 6 * RenumRegs[2] +  2 * RenumRegs[3]
1361                              +     RenumRegs[4];
1362       break;
1363     case 5:
1364       permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]
1365                              + 6 * RenumRegs[3] +  2 * RenumRegs[4]
1366                              +     RenumRegs[5];
1367       break;
1368     case 4:
1369       permutationEncoding |=  60 * RenumRegs[2] + 12 * RenumRegs[3]
1370                              + 3 * RenumRegs[4] +      RenumRegs[5];
1371       break;
1372     case 3:
1373       permutationEncoding |=  20 * RenumRegs[3] +  4 * RenumRegs[4]
1374                              +     RenumRegs[5];
1375       break;
1376     case 2:
1377       permutationEncoding |=   5 * RenumRegs[4] +      RenumRegs[5];
1378       break;
1379     case 1:
1380       permutationEncoding |=       RenumRegs[5];
1381       break;
1382     }
1383
1384     assert((permutationEncoding & 0x3FF) == permutationEncoding &&
1385            "Invalid compact register encoding!");
1386     return permutationEncoding;
1387   }
1388
1389 public:
1390   DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,
1391                       const MCSubtargetInfo &STI)
1392       : X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),
1393         Is64Bit(TT.isArch64Bit()) {
1394     memset(SavedRegs, 0, sizeof(SavedRegs));
1395     OffsetSize = Is64Bit ? 8 : 4;
1396     MoveInstrSize = Is64Bit ? 3 : 2;
1397     StackDivide = Is64Bit ? 8 : 4;
1398   }
1399
1400   std::unique_ptr<MCObjectTargetWriter>
1401   createObjectTargetWriter() const override {
1402     uint32_t CPUType = cantFail(MachO::getCPUType(TT));
1403     uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));
1404     return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);
1405   }
1406
1407   /// Implementation of algorithm to generate the compact unwind encoding
1408   /// for the CFI instructions.
1409   uint32_t
1410   generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
1411     if (Instrs.empty()) return 0;
1412
1413     // Reset the saved registers.
1414     unsigned SavedRegIdx = 0;
1415     memset(SavedRegs, 0, sizeof(SavedRegs));
1416
1417     bool HasFP = false;
1418
1419     // Encode that we are using EBP/RBP as the frame pointer.
1420     uint32_t CompactUnwindEncoding = 0;
1421
1422     unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;
1423     unsigned InstrOffset = 0;
1424     unsigned StackAdjust = 0;
1425     unsigned StackSize = 0;
1426     unsigned NumDefCFAOffsets = 0;
1427
1428     for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
1429       const MCCFIInstruction &Inst = Instrs[i];
1430
1431       switch (Inst.getOperation()) {
1432       default:
1433         // Any other CFI directives indicate a frame that we aren't prepared
1434         // to represent via compact unwind, so just bail out.
1435         return 0;
1436       case MCCFIInstruction::OpDefCfaRegister: {
1437         // Defines a frame pointer. E.g.
1438         //
1439         //     movq %rsp, %rbp
1440         //  L0:
1441         //     .cfi_def_cfa_register %rbp
1442         //
1443         HasFP = true;
1444
1445         // If the frame pointer is other than esp/rsp, we do not have a way to
1446         // generate a compact unwinding representation, so bail out.
1447         if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=
1448             (Is64Bit ? X86::RBP : X86::EBP))
1449           return 0;
1450
1451         // Reset the counts.
1452         memset(SavedRegs, 0, sizeof(SavedRegs));
1453         StackAdjust = 0;
1454         SavedRegIdx = 0;
1455         InstrOffset += MoveInstrSize;
1456         break;
1457       }
1458       case MCCFIInstruction::OpDefCfaOffset: {
1459         // Defines a new offset for the CFA. E.g.
1460         //
1461         //  With frame:
1462         //
1463         //     pushq %rbp
1464         //  L0:
1465         //     .cfi_def_cfa_offset 16
1466         //
1467         //  Without frame:
1468         //
1469         //     subq $72, %rsp
1470         //  L0:
1471         //     .cfi_def_cfa_offset 80
1472         //
1473         StackSize = Inst.getOffset() / StackDivide;
1474         ++NumDefCFAOffsets;
1475         break;
1476       }
1477       case MCCFIInstruction::OpOffset: {
1478         // Defines a "push" of a callee-saved register. E.g.
1479         //
1480         //     pushq %r15
1481         //     pushq %r14
1482         //     pushq %rbx
1483         //  L0:
1484         //     subq $120, %rsp
1485         //  L1:
1486         //     .cfi_offset %rbx, -40
1487         //     .cfi_offset %r14, -32
1488         //     .cfi_offset %r15, -24
1489         //
1490         if (SavedRegIdx == CU_NUM_SAVED_REGS)
1491           // If there are too many saved registers, we cannot use a compact
1492           // unwind encoding.
1493           return CU::UNWIND_MODE_DWARF;
1494
1495         unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
1496         SavedRegs[SavedRegIdx++] = Reg;
1497         StackAdjust += OffsetSize;
1498         InstrOffset += PushInstrSize(Reg);
1499         break;
1500       }
1501       }
1502     }
1503
1504     StackAdjust /= StackDivide;
1505
1506     if (HasFP) {
1507       if ((StackAdjust & 0xFF) != StackAdjust)
1508         // Offset was too big for a compact unwind encoding.
1509         return CU::UNWIND_MODE_DWARF;
1510
1511       // Get the encoding of the saved registers when we have a frame pointer.
1512       uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
1513       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1514
1515       CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;
1516       CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
1517       CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;
1518     } else {
1519       SubtractInstrIdx += InstrOffset;
1520       ++StackAdjust;
1521
1522       if ((StackSize & 0xFF) == StackSize) {
1523         // Frameless stack with a small stack size.
1524         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;
1525
1526         // Encode the stack size.
1527         CompactUnwindEncoding |= (StackSize & 0xFF) << 16;
1528       } else {
1529         if ((StackAdjust & 0x7) != StackAdjust)
1530           // The extra stack adjustments are too big for us to handle.
1531           return CU::UNWIND_MODE_DWARF;
1532
1533         // Frameless stack with an offset too large for us to encode compactly.
1534         CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;
1535
1536         // Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'
1537         // instruction.
1538         CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
1539
1540         // Encode any extra stack adjustments (done via push instructions).
1541         CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
1542       }
1543
1544       // Encode the number of registers saved. (Reverse the list first.)
1545       std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);
1546       CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
1547
1548       // Get the encoding of the saved registers when we don't have a frame
1549       // pointer.
1550       uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);
1551       if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;
1552
1553       // Encode the register encoding.
1554       CompactUnwindEncoding |=
1555         RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;
1556     }
1557
1558     return CompactUnwindEncoding;
1559   }
1560 };
1561
1562 } // end anonymous namespace
1563
1564 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
1565                                            const MCSubtargetInfo &STI,
1566                                            const MCRegisterInfo &MRI,
1567                                            const MCTargetOptions &Options) {
1568   const Triple &TheTriple = STI.getTargetTriple();
1569   if (TheTriple.isOSBinFormatMachO())
1570     return new DarwinX86AsmBackend(T, MRI, STI);
1571
1572   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1573     return new WindowsX86AsmBackend(T, false, STI);
1574
1575   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1576
1577   if (TheTriple.isOSIAMCU())
1578     return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);
1579
1580   return new ELFX86_32AsmBackend(T, OSABI, STI);
1581 }
1582
1583 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
1584                                            const MCSubtargetInfo &STI,
1585                                            const MCRegisterInfo &MRI,
1586                                            const MCTargetOptions &Options) {
1587   const Triple &TheTriple = STI.getTargetTriple();
1588   if (TheTriple.isOSBinFormatMachO())
1589     return new DarwinX86AsmBackend(T, MRI, STI);
1590
1591   if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
1592     return new WindowsX86AsmBackend(T, true, STI);
1593
1594   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
1595
1596   if (TheTriple.getEnvironment() == Triple::GNUX32)
1597     return new ELFX86_X32AsmBackend(T, OSABI, STI);
1598   return new ELFX86_64AsmBackend(T, OSABI, STI);
1599 }