contrib/llvm/tools/lld/ELF/Arch/X86_64.cpp

   1 //===- X86_64.cpp ---------------------------------------------------------===//
   2 //
   3 //                             The LLVM Linker
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "InputFiles.h"
  11 #include "Symbols.h"
  12 #include "SyntheticSections.h"
  13 #include "Target.h"
  14 #include "lld/Common/ErrorHandler.h"
  15 #include "llvm/Object/ELF.h"
  16 #include "llvm/Support/Endian.h"
  17
  18 using namespace llvm;
  19 using namespace llvm::object;
  20 using namespace llvm::support::endian;
  21 using namespace llvm::ELF;
  22 using namespace lld;
  23 using namespace lld::elf;
  24
  25 namespace {
  26 template <class ELFT> class X86_64 final : public TargetInfo {
  27 public:
  28   X86_64();
  29   RelExpr getRelExpr(RelType Type, const Symbol &S,
  30                      const uint8_t *Loc) const override;
  31   bool isPicRel(RelType Type) const override;
  32   void writeGotPltHeader(uint8_t *Buf) const override;
  33   void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
  34   void writePltHeader(uint8_t *Buf) const override;
  35   void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
  36                 int32_t Index, unsigned RelOff) const override;
  37   void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
  38
  39   RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
  40                           RelExpr Expr) const override;
  41   void relaxGot(uint8_t *Loc, uint64_t Val) const override;
  42   void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
  43   void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
  44   void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
  45   void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
  46
  47 private:
  48   void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
  49                      uint8_t ModRm) const;
  50 };
  51 } // namespace
  52
  53 template <class ELFT> X86_64<ELFT>::X86_64() {
  54   GotBaseSymOff = -1;
  55   CopyRel = R_X86_64_COPY;
  56   GotRel = R_X86_64_GLOB_DAT;
  57   PltRel = R_X86_64_JUMP_SLOT;
  58   RelativeRel = R_X86_64_RELATIVE;
  59   IRelativeRel = R_X86_64_IRELATIVE;
  60   TlsGotRel = R_X86_64_TPOFF64;
  61   TlsModuleIndexRel = R_X86_64_DTPMOD64;
  62   TlsOffsetRel = R_X86_64_DTPOFF64;
  63   GotEntrySize = 8;
  64   GotPltEntrySize = 8;
  65   PltEntrySize = 16;
  66   PltHeaderSize = 16;
  67   TlsGdRelaxSkip = 2;
  68   TrapInstr = 0xcccccccc; // 0xcc = INT3
  69
  70   // Align to the large page size (known as a superpage or huge page).
  71   // FreeBSD automatically promotes large, superpage-aligned allocations.
  72   DefaultImageBase = 0x200000;
  73 }
  74
  75 template <class ELFT>
  76 RelExpr X86_64<ELFT>::getRelExpr(RelType Type, const Symbol &S,
  77                                  const uint8_t *Loc) const {
  78   switch (Type) {
  79   case R_X86_64_8:
  80   case R_X86_64_16:
  81   case R_X86_64_32:
  82   case R_X86_64_32S:
  83   case R_X86_64_64:
  84   case R_X86_64_DTPOFF32:
  85   case R_X86_64_DTPOFF64:
  86     return R_ABS;
  87   case R_X86_64_TPOFF32:
  88     return R_TLS;
  89   case R_X86_64_TLSLD:
  90     return R_TLSLD_PC;
  91   case R_X86_64_TLSGD:
  92     return R_TLSGD_PC;
  93   case R_X86_64_SIZE32:
  94   case R_X86_64_SIZE64:
  95     return R_SIZE;
  96   case R_X86_64_PLT32:
  97     return R_PLT_PC;
  98   case R_X86_64_PC32:
  99   case R_X86_64_PC64:
 100     return R_PC;
 101   case R_X86_64_GOT32:
 102   case R_X86_64_GOT64:
 103     return R_GOT_FROM_END;
 104   case R_X86_64_GOTPCREL:
 105   case R_X86_64_GOTPCRELX:
 106   case R_X86_64_REX_GOTPCRELX:
 107   case R_X86_64_GOTTPOFF:
 108     return R_GOT_PC;
 109   case R_X86_64_NONE:
 110     return R_NONE;
 111   default:
 112     return R_INVALID;
 113   }
 114 }
 115
 116 template <class ELFT> void X86_64<ELFT>::writeGotPltHeader(uint8_t *Buf) const {
 117   // The first entry holds the value of _DYNAMIC. It is not clear why that is
 118   // required, but it is documented in the psabi and the glibc dynamic linker
 119   // seems to use it (note that this is relevant for linking ld.so, not any
 120   // other program).
 121   write64le(Buf, InX::Dynamic->getVA());
 122 }
 123
 124 template <class ELFT>
 125 void X86_64<ELFT>::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
 126   // See comments in X86::writeGotPlt.
 127   write32le(Buf, S.getPltVA() + 6);
 128 }
 129
 130 template <class ELFT> void X86_64<ELFT>::writePltHeader(uint8_t *Buf) const {
 131   const uint8_t PltData[] = {
 132       0xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip)
 133       0xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)
 134       0x0f, 0x1f, 0x40, 0x00, // nop
 135   };
 136   memcpy(Buf, PltData, sizeof(PltData));
 137   uint64_t GotPlt = InX::GotPlt->getVA();
 138   uint64_t Plt = InX::Plt->getVA();
 139   write32le(Buf + 2, GotPlt - Plt + 2); // GOTPLT+8
 140   write32le(Buf + 8, GotPlt - Plt + 4); // GOTPLT+16
 141 }
 142
 143 template <class ELFT>
 144 void X86_64<ELFT>::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
 145                             uint64_t PltEntryAddr, int32_t Index,
 146                             unsigned RelOff) const {
 147   const uint8_t Inst[] = {
 148       0xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)
 149       0x68, 0, 0, 0, 0,       // pushq <relocation index>
 150       0xe9, 0, 0, 0, 0,       // jmpq plt[0]
 151   };
 152   memcpy(Buf, Inst, sizeof(Inst));
 153
 154   write32le(Buf + 2, GotPltEntryAddr - PltEntryAddr - 6);
 155   write32le(Buf + 7, Index);
 156   write32le(Buf + 12, -Index * PltEntrySize - PltHeaderSize - 16);
 157 }
 158
 159 template <class ELFT> bool X86_64<ELFT>::isPicRel(RelType Type) const {
 160   return Type != R_X86_64_PC32 && Type != R_X86_64_32 &&
 161          Type != R_X86_64_TPOFF32;
 162 }
 163
 164 template <class ELFT>
 165 void X86_64<ELFT>::relaxTlsGdToLe(uint8_t *Loc, RelType Type,
 166                                   uint64_t Val) const {
 167   // Convert
 168   //   .byte 0x66
 169   //   leaq x@tlsgd(%rip), %rdi
 170   //   .word 0x6666
 171   //   rex64
 172   //   call __tls_get_addr@plt
 173   // to
 174   //   mov %fs:0x0,%rax
 175   //   lea x@tpoff,%rax
 176   const uint8_t Inst[] = {
 177       0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
 178       0x48, 0x8d, 0x80, 0, 0, 0, 0,                         // lea x@tpoff,%rax
 179   };
 180   memcpy(Loc - 4, Inst, sizeof(Inst));
 181
 182   // The original code used a pc relative relocation and so we have to
 183   // compensate for the -4 in had in the addend.
 184   write32le(Loc + 8, Val + 4);
 185 }
 186
 187 template <class ELFT>
 188 void X86_64<ELFT>::relaxTlsGdToIe(uint8_t *Loc, RelType Type,
 189                                   uint64_t Val) const {
 190   // Convert
 191   //   .byte 0x66
 192   //   leaq x@tlsgd(%rip), %rdi
 193   //   .word 0x6666
 194   //   rex64
 195   //   call __tls_get_addr@plt
 196   // to
 197   //   mov %fs:0x0,%rax
 198   //   addq x@tpoff,%rax
 199   const uint8_t Inst[] = {
 200       0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0x0,%rax
 201       0x48, 0x03, 0x05, 0, 0, 0, 0,                         // addq x@tpoff,%rax
 202   };
 203   memcpy(Loc - 4, Inst, sizeof(Inst));
 204
 205   // Both code sequences are PC relatives, but since we are moving the constant
 206   // forward by 8 bytes we have to subtract the value by 8.
 207   write32le(Loc + 8, Val - 8);
 208 }
 209
 210 // In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to
 211 // R_X86_64_TPOFF32 so that it does not use GOT.
 212 template <class ELFT>
 213 void X86_64<ELFT>::relaxTlsIeToLe(uint8_t *Loc, RelType Type,
 214                                   uint64_t Val) const {
 215   uint8_t *Inst = Loc - 3;
 216   uint8_t Reg = Loc[-1] >> 3;
 217   uint8_t *RegSlot = Loc - 1;
 218
 219   // Note that ADD with RSP or R12 is converted to ADD instead of LEA
 220   // because LEA with these registers needs 4 bytes to encode and thus
 221   // wouldn't fit the space.
 222
 223   if (memcmp(Inst, "\x48\x03\x25", 3) == 0) {
 224     // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"
 225     memcpy(Inst, "\x48\x81\xc4", 3);
 226   } else if (memcmp(Inst, "\x4c\x03\x25", 3) == 0) {
 227     // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"
 228     memcpy(Inst, "\x49\x81\xc4", 3);
 229   } else if (memcmp(Inst, "\x4c\x03", 2) == 0) {
 230     // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"
 231     memcpy(Inst, "\x4d\x8d", 2);
 232     *RegSlot = 0x80 | (Reg << 3) | Reg;
 233   } else if (memcmp(Inst, "\x48\x03", 2) == 0) {
 234     // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"
 235     memcpy(Inst, "\x48\x8d", 2);
 236     *RegSlot = 0x80 | (Reg << 3) | Reg;
 237   } else if (memcmp(Inst, "\x4c\x8b", 2) == 0) {
 238     // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"
 239     memcpy(Inst, "\x49\xc7", 2);
 240     *RegSlot = 0xc0 | Reg;
 241   } else if (memcmp(Inst, "\x48\x8b", 2) == 0) {
 242     // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"
 243     memcpy(Inst, "\x48\xc7", 2);
 244     *RegSlot = 0xc0 | Reg;
 245   } else {
 246     error(getErrorLocation(Loc - 3) +
 247           "R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");
 248   }
 249
 250   // The original code used a PC relative relocation.
 251   // Need to compensate for the -4 it had in the addend.
 252   write32le(Loc, Val + 4);
 253 }
 254
 255 template <class ELFT>
 256 void X86_64<ELFT>::relaxTlsLdToLe(uint8_t *Loc, RelType Type,
 257                                   uint64_t Val) const {
 258   // Convert
 259   //   leaq bar@tlsld(%rip), %rdi
 260   //   callq __tls_get_addr@PLT
 261   //   leaq bar@dtpoff(%rax), %rcx
 262   // to
 263   //   .word 0x6666
 264   //   .byte 0x66
 265   //   mov %fs:0,%rax
 266   //   leaq bar@tpoff(%rax), %rcx
 267   if (Type == R_X86_64_DTPOFF64) {
 268     write64le(Loc, Val);
 269     return;
 270   }
 271   if (Type == R_X86_64_DTPOFF32) {
 272     write32le(Loc, Val);
 273     return;
 274   }
 275
 276   const uint8_t Inst[] = {
 277       0x66, 0x66,                                           // .word 0x6666
 278       0x66,                                                 // .byte 0x66
 279       0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax
 280   };
 281   memcpy(Loc - 3, Inst, sizeof(Inst));
 282 }
 283
 284 template <class ELFT>
 285 void X86_64<ELFT>::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
 286   switch (Type) {
 287   case R_X86_64_8:
 288     checkUInt<8>(Loc, Val, Type);
 289     *Loc = Val;
 290     break;
 291   case R_X86_64_16:
 292     checkUInt<16>(Loc, Val, Type);
 293     write16le(Loc, Val);
 294     break;
 295   case R_X86_64_32:
 296     checkUInt<32>(Loc, Val, Type);
 297     write32le(Loc, Val);
 298     break;
 299   case R_X86_64_32S:
 300   case R_X86_64_TPOFF32:
 301   case R_X86_64_GOT32:
 302   case R_X86_64_GOTPCREL:
 303   case R_X86_64_GOTPCRELX:
 304   case R_X86_64_REX_GOTPCRELX:
 305   case R_X86_64_PC32:
 306   case R_X86_64_GOTTPOFF:
 307   case R_X86_64_PLT32:
 308   case R_X86_64_TLSGD:
 309   case R_X86_64_TLSLD:
 310   case R_X86_64_DTPOFF32:
 311   case R_X86_64_SIZE32:
 312     checkInt<32>(Loc, Val, Type);
 313     write32le(Loc, Val);
 314     break;
 315   case R_X86_64_64:
 316   case R_X86_64_DTPOFF64:
 317   case R_X86_64_GLOB_DAT:
 318   case R_X86_64_PC64:
 319   case R_X86_64_SIZE64:
 320   case R_X86_64_GOT64:
 321     write64le(Loc, Val);
 322     break;
 323   default:
 324     error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
 325   }
 326 }
 327
 328 template <class ELFT>
 329 RelExpr X86_64<ELFT>::adjustRelaxExpr(RelType Type, const uint8_t *Data,
 330                                       RelExpr RelExpr) const {
 331   if (Type != R_X86_64_GOTPCRELX && Type != R_X86_64_REX_GOTPCRELX)
 332     return RelExpr;
 333   const uint8_t Op = Data[-2];
 334   const uint8_t ModRm = Data[-1];
 335
 336   // FIXME: When PIC is disabled and foo is defined locally in the
 337   // lower 32 bit address space, memory operand in mov can be converted into
 338   // immediate operand. Otherwise, mov must be changed to lea. We support only
 339   // latter relaxation at this moment.
 340   if (Op == 0x8b)
 341     return R_RELAX_GOT_PC;
 342
 343   // Relax call and jmp.
 344   if (Op == 0xff && (ModRm == 0x15 || ModRm == 0x25))
 345     return R_RELAX_GOT_PC;
 346
 347   // Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.
 348   // If PIC then no relaxation is available.
 349   // We also don't relax test/binop instructions without REX byte,
 350   // they are 32bit operations and not common to have.
 351   assert(Type == R_X86_64_REX_GOTPCRELX);
 352   return Config->Pic ? RelExpr : R_RELAX_GOT_PC_NOPIC;
 353 }
 354
 355 // A subset of relaxations can only be applied for no-PIC. This method
 356 // handles such relaxations. Instructions encoding information was taken from:
 357 // "Intel 64 and IA-32 Architectures Software Developer's Manual V2"
 358 // (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/
 359 //    64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)
 360 template <class ELFT>
 361 void X86_64<ELFT>::relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
 362                                  uint8_t ModRm) const {
 363   const uint8_t Rex = Loc[-3];
 364   // Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".
 365   if (Op == 0x85) {
 366     // See "TEST-Logical Compare" (4-428 Vol. 2B),
 367     // TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).
 368
 369     // ModR/M byte has form XX YYY ZZZ, where
 370     // YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).
 371     // XX has different meanings:
 372     // 00: The operand's memory address is in reg1.
 373     // 01: The operand's memory address is reg1 + a byte-sized displacement.
 374     // 10: The operand's memory address is reg1 + a word-sized displacement.
 375     // 11: The operand is reg1 itself.
 376     // If an instruction requires only one operand, the unused reg2 field
 377     // holds extra opcode bits rather than a register code
 378     // 0xC0 == 11 000 000 binary.
 379     // 0x38 == 00 111 000 binary.
 380     // We transfer reg2 to reg1 here as operand.
 381     // See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).
 382     Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3; // ModR/M byte.
 383
 384     // Change opcode from TEST r/m64, r64 to TEST r/m64, imm32
 385     // See "TEST-Logical Compare" (4-428 Vol. 2B).
 386     Loc[-2] = 0xf7;
 387
 388     // Move R bit to the B bit in REX byte.
 389     // REX byte is encoded as 0100WRXB, where
 390     // 0100 is 4bit fixed pattern.
 391     // REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the
 392     //   default operand size is used (which is 32-bit for most but not all
 393     //   instructions).
 394     // REX.R This 1-bit value is an extension to the MODRM.reg field.
 395     // REX.X This 1-bit value is an extension to the SIB.index field.
 396     // REX.B This 1-bit value is an extension to the MODRM.rm field or the
 397     // SIB.base field.
 398     // See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).
 399     Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2;
 400     write32le(Loc, Val);
 401     return;
 402   }
 403
 404   // If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub
 405   // or xor operations.
 406
 407   // Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".
 408   // Logic is close to one for test instruction above, but we also
 409   // write opcode extension here, see below for details.
 410   Loc[-1] = 0xc0 | (ModRm & 0x38) >> 3 | (Op & 0x3c); // ModR/M byte.
 411
 412   // Primary opcode is 0x81, opcode extension is one of:
 413   // 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,
 414   // 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.
 415   // This value was wrote to MODRM.reg in a line above.
 416   // See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),
 417   // "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for
 418   // descriptions about each operation.
 419   Loc[-2] = 0x81;
 420   Loc[-3] = (Rex & ~0x4) | (Rex & 0x4) >> 2;
 421   write32le(Loc, Val);
 422 }
 423
 424 template <class ELFT>
 425 void X86_64<ELFT>::relaxGot(uint8_t *Loc, uint64_t Val) const {
 426   const uint8_t Op = Loc[-2];
 427   const uint8_t ModRm = Loc[-1];
 428
 429   // Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
 430   if (Op == 0x8b) {
 431     Loc[-2] = 0x8d;
 432     write32le(Loc, Val);
 433     return;
 434   }
 435
 436   if (Op != 0xff) {
 437     // We are relaxing a rip relative to an absolute, so compensate
 438     // for the old -4 addend.
 439     assert(!Config->Pic);
 440     relaxGotNoPic(Loc, Val + 4, Op, ModRm);
 441     return;
 442   }
 443
 444   // Convert call/jmp instructions.
 445   if (ModRm == 0x15) {
 446     // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".
 447     // Instead we convert to "addr32 call foo" where addr32 is an instruction
 448     // prefix. That makes result expression to be a single instruction.
 449     Loc[-2] = 0x67; // addr32 prefix
 450     Loc[-1] = 0xe8; // call
 451     write32le(Loc, Val);
 452     return;
 453   }
 454
 455   // Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".
 456   // jmp doesn't return, so it is fine to use nop here, it is just a stub.
 457   assert(ModRm == 0x25);
 458   Loc[-2] = 0xe9; // jmp
 459   Loc[3] = 0x90;  // nop
 460   write32le(Loc - 1, Val + 1);
 461 }
 462
 463 TargetInfo *elf::getX32TargetInfo() {
 464   static X86_64<ELF32LE> Target;
 465   return &Target;
 466 }
 467
 468 TargetInfo *elf::getX86_64TargetInfo() {
 469   static X86_64<ELF64LE> Target;
 470   return &Target;
 471 }