1 //===- X86.cpp ------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
11 #include "SyntheticSections.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
24 class X86 : public TargetInfo {
27 int getTlsGdRelaxSkip(RelType type) const override;
28 RelExpr getRelExpr(RelType type, const Symbol &s,
29 const uint8_t *loc) const override;
30 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
31 void writeGotPltHeader(uint8_t *buf) const override;
32 RelType getDynRel(RelType type) const override;
33 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
35 void writePltHeader(uint8_t *buf) const override;
36 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
37 int32_t index, unsigned relOff) const override;
38 void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
40 RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
41 RelExpr expr) const override;
42 void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
43 void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
44 void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
45 void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
51 gotRel = R_386_GLOB_DAT;
53 pltRel = R_386_JUMP_SLOT;
54 iRelativeRel = R_386_IRELATIVE;
55 relativeRel = R_386_RELATIVE;
56 symbolicRel = R_386_32;
57 tlsGotRel = R_386_TLS_TPOFF;
58 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
59 tlsOffsetRel = R_386_TLS_DTPOFF32;
62 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
64 // Align to the non-PAE large page size (known as a superpage or huge page).
65 // FreeBSD automatically promotes large, superpage-aligned allocations.
66 defaultImageBase = 0x400000;
69 int X86::getTlsGdRelaxSkip(RelType type) const {
73 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
74 const uint8_t *loc) const {
75 // There are 4 different TLS variable models with varying degrees of
76 // flexibility and performance. LocalExec and InitialExec models are fast but
77 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
78 // dynamic section to let runtime know about that.
79 if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
80 type == R_386_TLS_GOTIE)
81 config->hasStaticTlsModel = true;
88 case R_386_TLS_LDO_32:
91 return R_TLSGD_GOTPLT;
93 return R_TLSLD_GOTPLT;
101 return R_GOTPLTONLY_PC;
106 // These relocations are arguably mis-designed because their calculations
107 // depend on the instructions they are applied to. This is bad because we
108 // usually don't care about whether the target section contains valid
109 // machine instructions or not. But this is part of the documented ABI, so
110 // we had to implement as the standard requires.
112 // x86 does not support PC-relative data access. Therefore, in order to
113 // access GOT contents, a GOT address needs to be known at link-time
114 // (which means non-PIC) or compilers have to emit code to get a GOT
115 // address at runtime (which means code is position-independent but
116 // compilers need to emit extra code for each GOT access.) This decision
117 // is made at compile-time. In the latter case, compilers emit code to
118 // load an GOT address to a register, which is usually %ebx.
120 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
123 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
124 // find such relocation, we should report an error. foo@GOT is resolved to
125 // an *absolute* address of foo's GOT entry, because both GOT address and
126 // foo's offset are known. In other words, it's G + A.
128 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
129 // foo's GOT entry in the table, because GOT address is not known but foo's
130 // offset in the table is known. It's G + A - GOT.
132 // It's unfortunate that compilers emit the same relocation for these
133 // different use cases. In order to distinguish them, we have to read a
134 // machine instruction.
136 // The following code implements it. We assume that Loc[0] is the first byte
137 // of a displacement or an immediate field of a valid machine
138 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
139 // the byte, we can determine whether the instruction uses the operand as an
140 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
141 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
142 case R_386_TLS_GOTIE:
148 case R_386_TLS_LE_32:
153 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
154 ") against symbol " + toString(s));
159 RelExpr X86::adjustRelaxExpr(RelType type, const uint8_t *data,
160 RelExpr expr) const {
164 case R_RELAX_TLS_GD_TO_IE:
165 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
166 case R_RELAX_TLS_GD_TO_LE:
167 return R_RELAX_TLS_GD_TO_LE_NEG;
171 void X86::writeGotPltHeader(uint8_t *buf) const {
172 write32le(buf, mainPart->dynamic->getVA());
175 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
176 // Entries in .got.plt initially points back to the corresponding
177 // PLT entries with a fixed offset to skip the first instruction.
178 write32le(buf, s.getPltVA() + 6);
181 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
182 // An x86 entry is the address of the ifunc resolver function.
183 write32le(buf, s.getVA());
186 RelType X86::getDynRel(RelType type) const {
187 if (type == R_386_TLS_LE)
188 return R_386_TLS_TPOFF;
189 if (type == R_386_TLS_LE_32)
190 return R_386_TLS_TPOFF32;
194 void X86::writePltHeader(uint8_t *buf) const {
196 const uint8_t v[] = {
197 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
198 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
199 0x90, 0x90, 0x90, 0x90 // nop
201 memcpy(buf, v, sizeof(v));
205 const uint8_t pltData[] = {
206 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
207 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
208 0x90, 0x90, 0x90, 0x90, // nop
210 memcpy(buf, pltData, sizeof(pltData));
211 uint32_t gotPlt = in.gotPlt->getVA();
212 write32le(buf + 2, gotPlt + 4);
213 write32le(buf + 8, gotPlt + 8);
216 void X86::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
217 uint64_t pltEntryAddr, int32_t index,
218 unsigned relOff) const {
220 const uint8_t inst[] = {
221 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
222 0x68, 0, 0, 0, 0, // pushl $reloc_offset
223 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
225 memcpy(buf, inst, sizeof(inst));
226 write32le(buf + 2, gotPltEntryAddr - in.gotPlt->getVA());
228 const uint8_t inst[] = {
229 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
230 0x68, 0, 0, 0, 0, // pushl $reloc_offset
231 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
233 memcpy(buf, inst, sizeof(inst));
234 write32le(buf + 2, gotPltEntryAddr);
237 write32le(buf + 7, relOff);
238 write32le(buf + 12, -pltHeaderSize - pltEntrySize * index - 16);
241 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
245 return SignExtend64<8>(*buf);
248 return SignExtend64<16>(read16le(buf));
256 case R_386_TLS_LDO_32:
258 return SignExtend64<32>(read32le(buf));
264 void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
267 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
268 // being used for some 16-bit programs such as boot loaders, so
269 // we want to support them.
270 checkIntUInt(loc, val, 8, type);
274 checkInt(loc, val, 8, type);
278 checkIntUInt(loc, val, 16, type);
282 // R_386_PC16 is normally used with 16 bit code. In that situation
283 // the PC is 16 bits, just like the addend. This means that it can
284 // point from any 16 bit address to any other if the possibility
285 // of wrapping is included.
286 // The only restriction we have to check then is that the destination
287 // address fits in 16 bits. That is impossible to do here. The problem is
288 // that we are passed the final value, which already had the
289 // current location subtracted from it.
290 // We just check that Val fits in 17 bits. This misses some cases, but
291 // should have no false positives.
292 checkInt(loc, val, 17, type);
303 case R_386_TLS_DTPMOD32:
304 case R_386_TLS_DTPOFF32:
306 case R_386_TLS_GOTIE:
309 case R_386_TLS_LDO_32:
311 case R_386_TLS_LE_32:
312 case R_386_TLS_TPOFF:
313 case R_386_TLS_TPOFF32:
314 checkInt(loc, val, 32, type);
318 llvm_unreachable("unknown relocation");
322 void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
324 // leal x@tlsgd(, %ebx, 1),
325 // call __tls_get_addr@plt
328 // subl $x@ntpoff,%eax
329 const uint8_t inst[] = {
330 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
331 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
333 memcpy(loc - 3, inst, sizeof(inst));
334 write32le(loc + 5, val);
337 void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
339 // leal x@tlsgd(, %ebx, 1),
340 // call __tls_get_addr@plt
343 // addl x@gotntpoff(%ebx), %eax
344 const uint8_t inst[] = {
345 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
346 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
348 memcpy(loc - 3, inst, sizeof(inst));
349 write32le(loc + 5, val);
352 // In some conditions, relocations can be optimized to avoid using GOT.
353 // This function does that for Initial Exec to Local Exec case.
354 void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
355 // Ulrich's document section 6.2 says that @gotntpoff can
356 // be used with MOVL or ADDL instructions.
357 // @indntpoff is similar to @gotntpoff, but for use in
358 // position dependent code.
359 uint8_t reg = (loc[-1] >> 3) & 7;
361 if (type == R_386_TLS_IE) {
362 if (loc[-1] == 0xa1) {
363 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
364 // This case is different from the generic case below because
365 // this is a 5 byte instruction while below is 6 bytes.
367 } else if (loc[-2] == 0x8b) {
368 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
370 loc[-1] = 0xc0 | reg;
372 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
374 loc[-1] = 0xc0 | reg;
377 assert(type == R_386_TLS_GOTIE);
378 if (loc[-2] == 0x8b) {
379 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
381 loc[-1] = 0xc0 | reg;
383 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
385 loc[-1] = 0x80 | (reg << 3) | reg;
391 void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
392 if (type == R_386_TLS_LDO_32) {
398 // leal foo(%reg),%eax
399 // call ___tls_get_addr
403 // leal 0(%esi,1),%esi
404 const uint8_t inst[] = {
405 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
407 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
409 memcpy(loc - 2, inst, sizeof(inst));
413 class RetpolinePic : public X86 {
416 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
417 void writePltHeader(uint8_t *buf) const override;
418 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
419 int32_t index, unsigned relOff) const override;
422 class RetpolineNoPic : public X86 {
425 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
426 void writePltHeader(uint8_t *buf) const override;
427 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
428 int32_t index, unsigned relOff) const override;
432 RetpolinePic::RetpolinePic() {
437 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
438 write32le(buf, s.getPltVA() + 17);
441 void RetpolinePic::writePltHeader(uint8_t *buf) const {
442 const uint8_t insn[] = {
443 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
444 0x50, // 6: pushl %eax
445 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
446 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
447 0xf3, 0x90, // 12: loop: pause
448 0x0f, 0xae, 0xe8, // 14: lfence
449 0xeb, 0xf9, // 17: jmp loop
450 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
451 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
452 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
453 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
454 0x89, 0xc8, // 2b: mov %ecx, %eax
455 0x59, // 2d: pop %ecx
457 0xcc, // 2f: int3; padding
459 memcpy(buf, insn, sizeof(insn));
462 void RetpolinePic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
463 uint64_t pltEntryAddr, int32_t index,
464 unsigned relOff) const {
465 const uint8_t insn[] = {
467 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
468 0xe8, 0, 0, 0, 0, // call plt+0x20
469 0xe9, 0, 0, 0, 0, // jmp plt+0x12
470 0x68, 0, 0, 0, 0, // pushl $reloc_offset
471 0xe9, 0, 0, 0, 0, // jmp plt+0
472 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
474 memcpy(buf, insn, sizeof(insn));
476 uint32_t ebx = in.gotPlt->getVA();
477 unsigned off = pltHeaderSize + pltEntrySize * index;
478 write32le(buf + 3, gotPltEntryAddr - ebx);
479 write32le(buf + 8, -off - 12 + 32);
480 write32le(buf + 13, -off - 17 + 18);
481 write32le(buf + 18, relOff);
482 write32le(buf + 23, -off - 27);
485 RetpolineNoPic::RetpolineNoPic() {
490 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
491 write32le(buf, s.getPltVA() + 16);
494 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
495 const uint8_t insn[] = {
496 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
497 0x50, // 6: pushl %eax
498 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
499 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
500 0xf3, 0x90, // 11: loop: pause
501 0x0f, 0xae, 0xe8, // 13: lfence
502 0xeb, 0xf9, // 16: jmp loop
503 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
504 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
505 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
506 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
507 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
508 0x89, 0xc8, // 2b: mov %ecx, %eax
509 0x59, // 2d: pop %ecx
511 0xcc, // 2f: int3; padding
513 memcpy(buf, insn, sizeof(insn));
515 uint32_t gotPlt = in.gotPlt->getVA();
516 write32le(buf + 2, gotPlt + 4);
517 write32le(buf + 8, gotPlt + 8);
520 void RetpolineNoPic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
521 uint64_t pltEntryAddr, int32_t index,
522 unsigned relOff) const {
523 const uint8_t insn[] = {
524 0x50, // 0: pushl %eax
525 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
526 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
527 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
528 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
529 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
530 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
531 0xcc, // 1f: int3; padding
533 memcpy(buf, insn, sizeof(insn));
535 unsigned off = pltHeaderSize + pltEntrySize * index;
536 write32le(buf + 2, gotPltEntryAddr);
537 write32le(buf + 7, -off - 11 + 32);
538 write32le(buf + 12, -off - 16 + 17);
539 write32le(buf + 17, relOff);
540 write32le(buf + 22, -off - 26);
543 TargetInfo *getX86TargetInfo() {
544 if (config->zRetpolineplt) {
546 static RetpolinePic t;
549 static RetpolineNoPic t;