1 //===- X86.cpp ------------------------------------------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputFiles.h"
12 #include "SyntheticSections.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "llvm/Support/Endian.h"
18 using namespace llvm::support::endian;
19 using namespace llvm::ELF;
21 using namespace lld::elf;
24 class X86 : public TargetInfo {
27 RelExpr getRelExpr(RelType Type, const Symbol &S,
28 const uint8_t *Loc) const override;
29 int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override;
30 void writeGotPltHeader(uint8_t *Buf) const override;
31 RelType getDynRel(RelType Type) const override;
32 void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
33 void writeIgotPlt(uint8_t *Buf, const Symbol &S) const override;
34 void writePltHeader(uint8_t *Buf) const override;
35 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
36 int32_t Index, unsigned RelOff) const override;
37 void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
39 RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
40 RelExpr Expr) const override;
41 void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
42 void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
43 void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
44 void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
50 GotRel = R_386_GLOB_DAT;
52 PltRel = R_386_JUMP_SLOT;
53 IRelativeRel = R_386_IRELATIVE;
54 RelativeRel = R_386_RELATIVE;
55 TlsGotRel = R_386_TLS_TPOFF;
56 TlsModuleIndexRel = R_386_TLS_DTPMOD32;
57 TlsOffsetRel = R_386_TLS_DTPOFF32;
63 TrapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
65 // Align to the non-PAE large page size (known as a superpage or huge page).
66 // FreeBSD automatically promotes large, superpage-aligned allocations.
67 DefaultImageBase = 0x400000;
70 static bool hasBaseReg(uint8_t ModRM) { return (ModRM & 0xc7) != 0x5; }
72 RelExpr X86::getRelExpr(RelType Type, const Symbol &S,
73 const uint8_t *Loc) const {
74 // There are 4 different TLS variable models with varying degrees of
75 // flexibility and performance. LocalExec and InitialExec models are fast but
76 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
77 // dynamic section to let runtime know about that.
78 if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 || Type == R_386_TLS_IE ||
79 Type == R_386_TLS_GOTIE)
80 Config->HasStaticTlsModel = true;
86 case R_386_TLS_LDO_32:
89 return R_TLSGD_GOT_FROM_END;
91 return R_TLSLD_GOT_FROM_END;
99 return R_GOTONLY_PC_FROM_END;
104 // These relocations are arguably mis-designed because their calculations
105 // depend on the instructions they are applied to. This is bad because we
106 // usually don't care about whether the target section contains valid
107 // machine instructions or not. But this is part of the documented ABI, so
108 // we had to implement as the standard requires.
110 // x86 does not support PC-relative data access. Therefore, in order to
111 // access GOT contents, a GOT address needs to be known at link-time
112 // (which means non-PIC) or compilers have to emit code to get a GOT
113 // address at runtime (which means code is position-independent but
114 // compilers need to emit extra code for each GOT access.) This decision
115 // is made at compile-time. In the latter case, compilers emit code to
116 // load an GOT address to a register, which is usually %ebx.
118 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
121 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
122 // find such relocation, we should report an error. foo@GOT is resolved to
123 // an *absolute* address of foo's GOT entry, because both GOT address and
124 // foo's offset are known. In other words, it's G + A.
126 // foo@GOT(%reg) needs to be resolved to a *relative* offset from a GOT to
127 // foo's GOT entry in the table, because GOT address is not known but foo's
128 // offset in the table is known. It's G + A - GOT.
130 // It's unfortunate that compilers emit the same relocation for these
131 // different use cases. In order to distinguish them, we have to read a
132 // machine instruction.
134 // The following code implements it. We assume that Loc[0] is the first
135 // byte of a displacement or an immediate field of a valid machine
136 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
137 // the byte, we can determine whether the instruction is register-relative
138 // (i.e. it was generated for foo@GOT(%reg)) or absolute (i.e. foo@GOT).
139 return hasBaseReg(Loc[-1]) ? R_GOT_FROM_END : R_GOT;
140 case R_386_TLS_GOTIE:
141 return R_GOT_FROM_END;
143 return R_GOTREL_FROM_END;
146 case R_386_TLS_LE_32:
155 RelExpr X86::adjustRelaxExpr(RelType Type, const uint8_t *Data,
156 RelExpr Expr) const {
160 case R_RELAX_TLS_GD_TO_IE:
161 return R_RELAX_TLS_GD_TO_IE_END;
162 case R_RELAX_TLS_GD_TO_LE:
163 return R_RELAX_TLS_GD_TO_LE_NEG;
167 void X86::writeGotPltHeader(uint8_t *Buf) const {
168 write32le(Buf, In.Dynamic->getVA());
171 void X86::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
172 // Entries in .got.plt initially points back to the corresponding
173 // PLT entries with a fixed offset to skip the first instruction.
174 write32le(Buf, S.getPltVA() + 6);
177 void X86::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
178 // An x86 entry is the address of the ifunc resolver function.
179 write32le(Buf, S.getVA());
182 RelType X86::getDynRel(RelType Type) const {
183 if (Type == R_386_TLS_LE)
184 return R_386_TLS_TPOFF;
185 if (Type == R_386_TLS_LE_32)
186 return R_386_TLS_TPOFF32;
190 void X86::writePltHeader(uint8_t *Buf) const {
192 const uint8_t V[] = {
193 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl GOTPLT+4(%ebx)
194 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *GOTPLT+8(%ebx)
195 0x90, 0x90, 0x90, 0x90 // nop
197 memcpy(Buf, V, sizeof(V));
199 uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
200 uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
201 write32le(Buf + 2, GotPlt + 4);
202 write32le(Buf + 8, GotPlt + 8);
206 const uint8_t PltData[] = {
207 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
208 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
209 0x90, 0x90, 0x90, 0x90, // nop
211 memcpy(Buf, PltData, sizeof(PltData));
212 uint32_t GotPlt = In.GotPlt->getVA();
213 write32le(Buf + 2, GotPlt + 4);
214 write32le(Buf + 8, GotPlt + 8);
217 void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
218 uint64_t PltEntryAddr, int32_t Index,
219 unsigned RelOff) const {
220 const uint8_t Inst[] = {
221 0xff, 0x00, 0, 0, 0, 0, // jmp *foo_in_GOT or jmp *foo@GOT(%ebx)
222 0x68, 0, 0, 0, 0, // pushl $reloc_offset
223 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
225 memcpy(Buf, Inst, sizeof(Inst));
228 // jmp *foo@GOT(%ebx)
229 uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
231 write32le(Buf + 2, GotPltEntryAddr - Ebx);
235 write32le(Buf + 2, GotPltEntryAddr);
238 write32le(Buf + 7, RelOff);
239 write32le(Buf + 12, -getPltEntryOffset(Index) - 16);
242 int64_t X86::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
246 return SignExtend64<8>(*Buf);
249 return SignExtend64<16>(read16le(Buf));
257 case R_386_TLS_LDO_32:
259 return SignExtend64<32>(read32le(Buf));
265 void X86::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
268 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
269 // being used for some 16-bit programs such as boot loaders, so
270 // we want to support them.
271 checkIntUInt(Loc, Val, 8, Type);
275 checkInt(Loc, Val, 8, Type);
279 checkIntUInt(Loc, Val, 16, Type);
283 // R_386_PC16 is normally used with 16 bit code. In that situation
284 // the PC is 16 bits, just like the addend. This means that it can
285 // point from any 16 bit address to any other if the possibility
286 // of wrapping is included.
287 // The only restriction we have to check then is that the destination
288 // address fits in 16 bits. That is impossible to do here. The problem is
289 // that we are passed the final value, which already had the
290 // current location subtracted from it.
291 // We just check that Val fits in 17 bits. This misses some cases, but
292 // should have no false positives.
293 checkInt(Loc, Val, 17, Type);
305 case R_386_TLS_DTPMOD32:
306 case R_386_TLS_DTPOFF32:
308 case R_386_TLS_GOTIE:
311 case R_386_TLS_LDO_32:
313 case R_386_TLS_LE_32:
314 case R_386_TLS_TPOFF:
315 case R_386_TLS_TPOFF32:
316 checkInt(Loc, Val, 32, Type);
320 error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
324 void X86::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
326 // leal x@tlsgd(, %ebx, 1),
327 // call __tls_get_addr@plt
330 // subl $x@ntpoff,%eax
331 const uint8_t Inst[] = {
332 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
333 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
335 memcpy(Loc - 3, Inst, sizeof(Inst));
336 write32le(Loc + 5, Val);
339 void X86::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
341 // leal x@tlsgd(, %ebx, 1),
342 // call __tls_get_addr@plt
345 // addl x@gotntpoff(%ebx), %eax
346 const uint8_t Inst[] = {
347 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
348 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
350 memcpy(Loc - 3, Inst, sizeof(Inst));
351 write32le(Loc + 5, Val);
354 // In some conditions, relocations can be optimized to avoid using GOT.
355 // This function does that for Initial Exec to Local Exec case.
356 void X86::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
357 // Ulrich's document section 6.2 says that @gotntpoff can
358 // be used with MOVL or ADDL instructions.
359 // @indntpoff is similar to @gotntpoff, but for use in
360 // position dependent code.
361 uint8_t Reg = (Loc[-1] >> 3) & 7;
363 if (Type == R_386_TLS_IE) {
364 if (Loc[-1] == 0xa1) {
365 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
366 // This case is different from the generic case below because
367 // this is a 5 byte instruction while below is 6 bytes.
369 } else if (Loc[-2] == 0x8b) {
370 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
372 Loc[-1] = 0xc0 | Reg;
374 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
376 Loc[-1] = 0xc0 | Reg;
379 assert(Type == R_386_TLS_GOTIE);
380 if (Loc[-2] == 0x8b) {
381 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
383 Loc[-1] = 0xc0 | Reg;
385 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
387 Loc[-1] = 0x80 | (Reg << 3) | Reg;
393 void X86::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
394 if (Type == R_386_TLS_LDO_32) {
400 // leal foo(%reg),%eax
401 // call ___tls_get_addr
405 // leal 0(%esi,1),%esi
406 const uint8_t Inst[] = {
407 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
409 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
411 memcpy(Loc - 2, Inst, sizeof(Inst));
415 class RetpolinePic : public X86 {
418 void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
419 void writePltHeader(uint8_t *Buf) const override;
420 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
421 int32_t Index, unsigned RelOff) const override;
424 class RetpolineNoPic : public X86 {
427 void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
428 void writePltHeader(uint8_t *Buf) const override;
429 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
430 int32_t Index, unsigned RelOff) const override;
434 RetpolinePic::RetpolinePic() {
439 void RetpolinePic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
440 write32le(Buf, S.getPltVA() + 17);
443 void RetpolinePic::writePltHeader(uint8_t *Buf) const {
444 const uint8_t Insn[] = {
445 0xff, 0xb3, 0, 0, 0, 0, // 0: pushl GOTPLT+4(%ebx)
446 0x50, // 6: pushl %eax
447 0x8b, 0x83, 0, 0, 0, 0, // 7: mov GOTPLT+8(%ebx), %eax
448 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
449 0xf3, 0x90, // 12: loop: pause
450 0x0f, 0xae, 0xe8, // 14: lfence
451 0xeb, 0xf9, // 17: jmp loop
452 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
453 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
454 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
455 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
456 0x89, 0xc8, // 2b: mov %ecx, %eax
457 0x59, // 2d: pop %ecx
459 0xcc, // 2f: int3; padding
461 memcpy(Buf, Insn, sizeof(Insn));
463 uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
464 uint32_t GotPlt = In.GotPlt->getVA() - Ebx;
465 write32le(Buf + 2, GotPlt + 4);
466 write32le(Buf + 9, GotPlt + 8);
469 void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
470 uint64_t PltEntryAddr, int32_t Index,
471 unsigned RelOff) const {
472 const uint8_t Insn[] = {
474 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
475 0xe8, 0, 0, 0, 0, // call plt+0x20
476 0xe9, 0, 0, 0, 0, // jmp plt+0x12
477 0x68, 0, 0, 0, 0, // pushl $reloc_offset
478 0xe9, 0, 0, 0, 0, // jmp plt+0
479 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
481 memcpy(Buf, Insn, sizeof(Insn));
483 uint32_t Ebx = In.Got->getVA() + In.Got->getSize();
484 unsigned Off = getPltEntryOffset(Index);
485 write32le(Buf + 3, GotPltEntryAddr - Ebx);
486 write32le(Buf + 8, -Off - 12 + 32);
487 write32le(Buf + 13, -Off - 17 + 18);
488 write32le(Buf + 18, RelOff);
489 write32le(Buf + 23, -Off - 27);
492 RetpolineNoPic::RetpolineNoPic() {
497 void RetpolineNoPic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
498 write32le(Buf, S.getPltVA() + 16);
501 void RetpolineNoPic::writePltHeader(uint8_t *Buf) const {
502 const uint8_t Insn[] = {
503 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
504 0x50, // 6: pushl %eax
505 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
506 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
507 0xf3, 0x90, // 11: loop: pause
508 0x0f, 0xae, 0xe8, // 13: lfence
509 0xeb, 0xf9, // 16: jmp loop
510 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
511 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
512 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
513 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
514 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
515 0x89, 0xc8, // 2b: mov %ecx, %eax
516 0x59, // 2d: pop %ecx
518 0xcc, // 2f: int3; padding
520 memcpy(Buf, Insn, sizeof(Insn));
522 uint32_t GotPlt = In.GotPlt->getVA();
523 write32le(Buf + 2, GotPlt + 4);
524 write32le(Buf + 8, GotPlt + 8);
527 void RetpolineNoPic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
528 uint64_t PltEntryAddr, int32_t Index,
529 unsigned RelOff) const {
530 const uint8_t Insn[] = {
531 0x50, // 0: pushl %eax
532 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
533 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
534 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
535 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
536 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
537 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
538 0xcc, // 1f: int3; padding
540 memcpy(Buf, Insn, sizeof(Insn));
542 unsigned Off = getPltEntryOffset(Index);
543 write32le(Buf + 2, GotPltEntryAddr);
544 write32le(Buf + 7, -Off - 11 + 32);
545 write32le(Buf + 12, -Off - 16 + 17);
546 write32le(Buf + 17, RelOff);
547 write32le(Buf + 22, -Off - 26);
550 TargetInfo *elf::getX86TargetInfo() {
551 if (Config->ZRetpolineplt) {
553 static RetpolinePic T;
556 static RetpolineNoPic T;