1 //===- X86.cpp ------------------------------------------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputFiles.h"
12 #include "SyntheticSections.h"
14 #include "lld/Common/ErrorHandler.h"
15 #include "llvm/Support/Endian.h"
18 using namespace llvm::support::endian;
19 using namespace llvm::ELF;
21 using namespace lld::elf;
24 class X86 : public TargetInfo {
27 RelExpr getRelExpr(RelType Type, const Symbol &S,
28 const uint8_t *Loc) const override;
29 int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override;
30 void writeGotPltHeader(uint8_t *Buf) const override;
31 RelType getDynRel(RelType Type) const override;
32 void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
33 void writeIgotPlt(uint8_t *Buf, const Symbol &S) const override;
34 void writePltHeader(uint8_t *Buf) const override;
35 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
36 int32_t Index, unsigned RelOff) const override;
37 void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
39 RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
40 RelExpr Expr) const override;
41 void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
42 void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
43 void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
44 void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
50 GotRel = R_386_GLOB_DAT;
51 PltRel = R_386_JUMP_SLOT;
52 IRelativeRel = R_386_IRELATIVE;
53 RelativeRel = R_386_RELATIVE;
54 TlsGotRel = R_386_TLS_TPOFF;
55 TlsModuleIndexRel = R_386_TLS_DTPMOD32;
56 TlsOffsetRel = R_386_TLS_DTPOFF32;
62 TrapInstr = 0xcccccccc; // 0xcc = INT3
64 // Align to the non-PAE large page size (known as a superpage or huge page).
65 // FreeBSD automatically promotes large, superpage-aligned allocations.
66 DefaultImageBase = 0x400000;
69 static bool hasBaseReg(uint8_t ModRM) { return (ModRM & 0xc7) != 0x5; }
71 RelExpr X86::getRelExpr(RelType Type, const Symbol &S,
72 const uint8_t *Loc) const {
73 // There are 4 different TLS variable models with varying degrees of
74 // flexibility and performance. LocalExec and InitialExec models are fast but
75 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
76 // dynamic section to let runtime know about that.
77 if (Type == R_386_TLS_LE || Type == R_386_TLS_LE_32 || Type == R_386_TLS_IE ||
78 Type == R_386_TLS_GOTIE)
79 Config->HasStaticTlsModel = true;
85 case R_386_TLS_LDO_32:
88 return R_TLSGD_GOT_FROM_END;
90 return R_TLSLD_GOT_FROM_END;
98 return R_GOTONLY_PC_FROM_END;
103 // These relocations are arguably mis-designed because their calculations
104 // depend on the instructions they are applied to. This is bad because we
105 // usually don't care about whether the target section contains valid
106 // machine instructions or not. But this is part of the documented ABI, so
107 // we had to implement as the standard requires.
109 // x86 does not support PC-relative data access. Therefore, in order to
110 // access GOT contents, a GOT address needs to be known at link-time
111 // (which means non-PIC) or compilers have to emit code to get a GOT
112 // address at runtime (which means code is position-independent but
113 // compilers need to emit extra code for each GOT access.) This decision
114 // is made at compile-time. In the latter case, compilers emit code to
115 // load an GOT address to a register, which is usually %ebx.
117 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
120 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
121 // find such relocation, we should report an error. foo@GOT is resolved to
122 // an *absolute* address of foo's GOT entry, because both GOT address and
123 // foo's offset are known. In other words, it's G + A.
125 // foo@GOT(%reg) needs to be resolved to a *relative* offset from a GOT to
126 // foo's GOT entry in the table, because GOT address is not known but foo's
127 // offset in the table is known. It's G + A - GOT.
129 // It's unfortunate that compilers emit the same relocation for these
130 // different use cases. In order to distinguish them, we have to read a
131 // machine instruction.
133 // The following code implements it. We assume that Loc[0] is the first
134 // byte of a displacement or an immediate field of a valid machine
135 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
136 // the byte, we can determine whether the instruction is register-relative
137 // (i.e. it was generated for foo@GOT(%reg)) or absolute (i.e. foo@GOT).
138 return hasBaseReg(Loc[-1]) ? R_GOT_FROM_END : R_GOT;
139 case R_386_TLS_GOTIE:
140 return R_GOT_FROM_END;
142 return R_GOTREL_FROM_END;
145 case R_386_TLS_LE_32:
154 RelExpr X86::adjustRelaxExpr(RelType Type, const uint8_t *Data,
155 RelExpr Expr) const {
159 case R_RELAX_TLS_GD_TO_IE:
160 return R_RELAX_TLS_GD_TO_IE_END;
161 case R_RELAX_TLS_GD_TO_LE:
162 return R_RELAX_TLS_GD_TO_LE_NEG;
166 void X86::writeGotPltHeader(uint8_t *Buf) const {
167 write32le(Buf, InX::Dynamic->getVA());
170 void X86::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
171 // Entries in .got.plt initially points back to the corresponding
172 // PLT entries with a fixed offset to skip the first instruction.
173 write32le(Buf, S.getPltVA() + 6);
176 void X86::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
177 // An x86 entry is the address of the ifunc resolver function.
178 write32le(Buf, S.getVA());
181 RelType X86::getDynRel(RelType Type) const {
182 if (Type == R_386_TLS_LE)
183 return R_386_TLS_TPOFF;
184 if (Type == R_386_TLS_LE_32)
185 return R_386_TLS_TPOFF32;
189 void X86::writePltHeader(uint8_t *Buf) const {
191 const uint8_t V[] = {
192 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl GOTPLT+4(%ebx)
193 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *GOTPLT+8(%ebx)
194 0x90, 0x90, 0x90, 0x90 // nop
196 memcpy(Buf, V, sizeof(V));
198 uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
199 uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
200 write32le(Buf + 2, GotPlt + 4);
201 write32le(Buf + 8, GotPlt + 8);
205 const uint8_t PltData[] = {
206 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
207 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
208 0x90, 0x90, 0x90, 0x90, // nop
210 memcpy(Buf, PltData, sizeof(PltData));
211 uint32_t GotPlt = InX::GotPlt->getVA();
212 write32le(Buf + 2, GotPlt + 4);
213 write32le(Buf + 8, GotPlt + 8);
216 void X86::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
217 uint64_t PltEntryAddr, int32_t Index,
218 unsigned RelOff) const {
219 const uint8_t Inst[] = {
220 0xff, 0x00, 0, 0, 0, 0, // jmp *foo_in_GOT or jmp *foo@GOT(%ebx)
221 0x68, 0, 0, 0, 0, // pushl $reloc_offset
222 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
224 memcpy(Buf, Inst, sizeof(Inst));
227 // jmp *foo@GOT(%ebx)
228 uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
230 write32le(Buf + 2, GotPltEntryAddr - Ebx);
234 write32le(Buf + 2, GotPltEntryAddr);
237 write32le(Buf + 7, RelOff);
238 write32le(Buf + 12, -getPltEntryOffset(Index) - 16);
241 int64_t X86::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
245 return SignExtend64<8>(*Buf);
248 return SignExtend64<16>(read16le(Buf));
256 case R_386_TLS_LDO_32:
258 return SignExtend64<32>(read32le(Buf));
264 void X86::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
267 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
268 // being used for some 16-bit programs such as boot loaders, so
269 // we want to support them.
270 checkIntUInt(Loc, Val, 8, Type);
274 checkInt(Loc, Val, 8, Type);
278 checkIntUInt(Loc, Val, 16, Type);
282 // R_386_PC16 is normally used with 16 bit code. In that situation
283 // the PC is 16 bits, just like the addend. This means that it can
284 // point from any 16 bit address to any other if the possibility
285 // of wrapping is included.
286 // The only restriction we have to check then is that the destination
287 // address fits in 16 bits. That is impossible to do here. The problem is
288 // that we are passed the final value, which already had the
289 // current location subtracted from it.
290 // We just check that Val fits in 17 bits. This misses some cases, but
291 // should have no false positives.
292 checkInt(Loc, Val, 17, Type);
304 case R_386_TLS_DTPMOD32:
305 case R_386_TLS_DTPOFF32:
307 case R_386_TLS_GOTIE:
310 case R_386_TLS_LDO_32:
312 case R_386_TLS_LE_32:
313 case R_386_TLS_TPOFF:
314 case R_386_TLS_TPOFF32:
315 checkInt(Loc, Val, 32, Type);
319 error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
323 void X86::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
325 // leal x@tlsgd(, %ebx, 1),
326 // call __tls_get_addr@plt
329 // subl $x@ntpoff,%eax
330 const uint8_t Inst[] = {
331 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
332 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
334 memcpy(Loc - 3, Inst, sizeof(Inst));
335 write32le(Loc + 5, Val);
338 void X86::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
340 // leal x@tlsgd(, %ebx, 1),
341 // call __tls_get_addr@plt
344 // addl x@gotntpoff(%ebx), %eax
345 const uint8_t Inst[] = {
346 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
347 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
349 memcpy(Loc - 3, Inst, sizeof(Inst));
350 write32le(Loc + 5, Val);
353 // In some conditions, relocations can be optimized to avoid using GOT.
354 // This function does that for Initial Exec to Local Exec case.
355 void X86::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
356 // Ulrich's document section 6.2 says that @gotntpoff can
357 // be used with MOVL or ADDL instructions.
358 // @indntpoff is similar to @gotntpoff, but for use in
359 // position dependent code.
360 uint8_t Reg = (Loc[-1] >> 3) & 7;
362 if (Type == R_386_TLS_IE) {
363 if (Loc[-1] == 0xa1) {
364 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
365 // This case is different from the generic case below because
366 // this is a 5 byte instruction while below is 6 bytes.
368 } else if (Loc[-2] == 0x8b) {
369 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
371 Loc[-1] = 0xc0 | Reg;
373 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
375 Loc[-1] = 0xc0 | Reg;
378 assert(Type == R_386_TLS_GOTIE);
379 if (Loc[-2] == 0x8b) {
380 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
382 Loc[-1] = 0xc0 | Reg;
384 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
386 Loc[-1] = 0x80 | (Reg << 3) | Reg;
392 void X86::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
393 if (Type == R_386_TLS_LDO_32) {
399 // leal foo(%reg),%eax
400 // call ___tls_get_addr
404 // leal 0(%esi,1),%esi
405 const uint8_t Inst[] = {
406 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
408 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
410 memcpy(Loc - 2, Inst, sizeof(Inst));
414 class RetpolinePic : public X86 {
417 void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
418 void writePltHeader(uint8_t *Buf) const override;
419 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
420 int32_t Index, unsigned RelOff) const override;
423 class RetpolineNoPic : public X86 {
426 void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
427 void writePltHeader(uint8_t *Buf) const override;
428 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
429 int32_t Index, unsigned RelOff) const override;
433 RetpolinePic::RetpolinePic() {
438 void RetpolinePic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
439 write32le(Buf, S.getPltVA() + 17);
442 void RetpolinePic::writePltHeader(uint8_t *Buf) const {
443 const uint8_t Insn[] = {
444 0xff, 0xb3, 0, 0, 0, 0, // 0: pushl GOTPLT+4(%ebx)
445 0x50, // 6: pushl %eax
446 0x8b, 0x83, 0, 0, 0, 0, // 7: mov GOTPLT+8(%ebx), %eax
447 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
448 0xf3, 0x90, // 12: loop: pause
449 0x0f, 0xae, 0xe8, // 14: lfence
450 0xeb, 0xf9, // 17: jmp loop
451 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
452 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
453 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
454 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
455 0x89, 0xc8, // 2b: mov %ecx, %eax
456 0x59, // 2d: pop %ecx
458 0xcc, // 2f: int3; padding
460 memcpy(Buf, Insn, sizeof(Insn));
462 uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
463 uint32_t GotPlt = InX::GotPlt->getVA() - Ebx;
464 write32le(Buf + 2, GotPlt + 4);
465 write32le(Buf + 9, GotPlt + 8);
468 void RetpolinePic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
469 uint64_t PltEntryAddr, int32_t Index,
470 unsigned RelOff) const {
471 const uint8_t Insn[] = {
473 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
474 0xe8, 0, 0, 0, 0, // call plt+0x20
475 0xe9, 0, 0, 0, 0, // jmp plt+0x12
476 0x68, 0, 0, 0, 0, // pushl $reloc_offset
477 0xe9, 0, 0, 0, 0, // jmp plt+0
478 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
480 memcpy(Buf, Insn, sizeof(Insn));
482 uint32_t Ebx = InX::Got->getVA() + InX::Got->getSize();
483 unsigned Off = getPltEntryOffset(Index);
484 write32le(Buf + 3, GotPltEntryAddr - Ebx);
485 write32le(Buf + 8, -Off - 12 + 32);
486 write32le(Buf + 13, -Off - 17 + 18);
487 write32le(Buf + 18, RelOff);
488 write32le(Buf + 23, -Off - 27);
491 RetpolineNoPic::RetpolineNoPic() {
496 void RetpolineNoPic::writeGotPlt(uint8_t *Buf, const Symbol &S) const {
497 write32le(Buf, S.getPltVA() + 16);
500 void RetpolineNoPic::writePltHeader(uint8_t *Buf) const {
501 const uint8_t Insn[] = {
502 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
503 0x50, // 6: pushl %eax
504 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
505 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
506 0xf3, 0x90, // 11: loop: pause
507 0x0f, 0xae, 0xe8, // 13: lfence
508 0xeb, 0xf9, // 16: jmp loop
509 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
510 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
511 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
512 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
513 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
514 0x89, 0xc8, // 2b: mov %ecx, %eax
515 0x59, // 2d: pop %ecx
517 0xcc, // 2f: int3; padding
519 memcpy(Buf, Insn, sizeof(Insn));
521 uint32_t GotPlt = InX::GotPlt->getVA();
522 write32le(Buf + 2, GotPlt + 4);
523 write32le(Buf + 8, GotPlt + 8);
526 void RetpolineNoPic::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
527 uint64_t PltEntryAddr, int32_t Index,
528 unsigned RelOff) const {
529 const uint8_t Insn[] = {
530 0x50, // 0: pushl %eax
531 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
532 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
533 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
534 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
535 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
536 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
537 0xcc, // 1f: int3; padding
539 memcpy(Buf, Insn, sizeof(Insn));
541 unsigned Off = getPltEntryOffset(Index);
542 write32le(Buf + 2, GotPltEntryAddr);
543 write32le(Buf + 7, -Off - 11 + 32);
544 write32le(Buf + 12, -Off - 16 + 17);
545 write32le(Buf + 17, RelOff);
546 write32le(Buf + 22, -Off - 26);
549 TargetInfo *elf::getX86TargetInfo() {
550 if (Config->ZRetpolineplt) {
552 static RetpolinePic T;
555 static RetpolineNoPic T;