1 //===- X86.cpp ------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
11 #include "SyntheticSections.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
20 using namespace lld::elf;
23 class X86 : public TargetInfo {
26 int getTlsGdRelaxSkip(RelType type) const override;
27 RelExpr getRelExpr(RelType type, const Symbol &s,
28 const uint8_t *loc) const override;
29 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30 void writeGotPltHeader(uint8_t *buf) const override;
31 RelType getDynRel(RelType type) const override;
32 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
33 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
34 void writePltHeader(uint8_t *buf) const override;
35 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
36 int32_t index, unsigned relOff) const override;
37 void relocateOne(uint8_t *loc, RelType type, uint64_t val) const override;
39 RelExpr adjustRelaxExpr(RelType type, const uint8_t *data,
40 RelExpr expr) const override;
41 void relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const override;
42 void relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
43 void relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const override;
44 void relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const override;
50 gotRel = R_386_GLOB_DAT;
52 pltRel = R_386_JUMP_SLOT;
53 iRelativeRel = R_386_IRELATIVE;
54 relativeRel = R_386_RELATIVE;
55 symbolicRel = R_386_32;
56 tlsGotRel = R_386_TLS_TPOFF;
57 tlsModuleIndexRel = R_386_TLS_DTPMOD32;
58 tlsOffsetRel = R_386_TLS_DTPOFF32;
61 trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT3
63 // Align to the non-PAE large page size (known as a superpage or huge page).
64 // FreeBSD automatically promotes large, superpage-aligned allocations.
65 defaultImageBase = 0x400000;
68 int X86::getTlsGdRelaxSkip(RelType type) const {
72 RelExpr X86::getRelExpr(RelType type, const Symbol &s,
73 const uint8_t *loc) const {
74 // There are 4 different TLS variable models with varying degrees of
75 // flexibility and performance. LocalExec and InitialExec models are fast but
76 // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
77 // dynamic section to let runtime know about that.
78 if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
79 type == R_386_TLS_GOTIE)
80 config->hasStaticTlsModel = true;
87 case R_386_TLS_LDO_32:
90 return R_TLSGD_GOTPLT;
92 return R_TLSLD_GOTPLT;
100 return R_GOTPLTONLY_PC;
105 // These relocations are arguably mis-designed because their calculations
106 // depend on the instructions they are applied to. This is bad because we
107 // usually don't care about whether the target section contains valid
108 // machine instructions or not. But this is part of the documented ABI, so
109 // we had to implement as the standard requires.
111 // x86 does not support PC-relative data access. Therefore, in order to
112 // access GOT contents, a GOT address needs to be known at link-time
113 // (which means non-PIC) or compilers have to emit code to get a GOT
114 // address at runtime (which means code is position-independent but
115 // compilers need to emit extra code for each GOT access.) This decision
116 // is made at compile-time. In the latter case, compilers emit code to
117 // load an GOT address to a register, which is usually %ebx.
119 // So, there are two ways to refer to symbol foo's GOT entry: foo@GOT or
122 // foo@GOT is not usable in PIC. If we are creating a PIC output and if we
123 // find such relocation, we should report an error. foo@GOT is resolved to
124 // an *absolute* address of foo's GOT entry, because both GOT address and
125 // foo's offset are known. In other words, it's G + A.
127 // foo@GOT(%ebx) needs to be resolved to a *relative* offset from a GOT to
128 // foo's GOT entry in the table, because GOT address is not known but foo's
129 // offset in the table is known. It's G + A - GOT.
131 // It's unfortunate that compilers emit the same relocation for these
132 // different use cases. In order to distinguish them, we have to read a
133 // machine instruction.
135 // The following code implements it. We assume that Loc[0] is the first byte
136 // of a displacement or an immediate field of a valid machine
137 // instruction. That means a ModRM byte is at Loc[-1]. By taking a look at
138 // the byte, we can determine whether the instruction uses the operand as an
139 // absolute address (R_GOT) or a register-relative address (R_GOTPLT).
140 return (loc[-1] & 0xc7) == 0x5 ? R_GOT : R_GOTPLT;
141 case R_386_TLS_GOTIE:
147 case R_386_TLS_LE_32:
152 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
153 ") against symbol " + toString(s));
158 RelExpr X86::adjustRelaxExpr(RelType type, const uint8_t *data,
159 RelExpr expr) const {
163 case R_RELAX_TLS_GD_TO_IE:
164 return R_RELAX_TLS_GD_TO_IE_GOTPLT;
165 case R_RELAX_TLS_GD_TO_LE:
166 return R_RELAX_TLS_GD_TO_LE_NEG;
170 void X86::writeGotPltHeader(uint8_t *buf) const {
171 write32le(buf, mainPart->dynamic->getVA());
174 void X86::writeGotPlt(uint8_t *buf, const Symbol &s) const {
175 // Entries in .got.plt initially points back to the corresponding
176 // PLT entries with a fixed offset to skip the first instruction.
177 write32le(buf, s.getPltVA() + 6);
180 void X86::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
181 // An x86 entry is the address of the ifunc resolver function.
182 write32le(buf, s.getVA());
185 RelType X86::getDynRel(RelType type) const {
186 if (type == R_386_TLS_LE)
187 return R_386_TLS_TPOFF;
188 if (type == R_386_TLS_LE_32)
189 return R_386_TLS_TPOFF32;
193 void X86::writePltHeader(uint8_t *buf) const {
195 const uint8_t v[] = {
196 0xff, 0xb3, 0x04, 0x00, 0x00, 0x00, // pushl 4(%ebx)
197 0xff, 0xa3, 0x08, 0x00, 0x00, 0x00, // jmp *8(%ebx)
198 0x90, 0x90, 0x90, 0x90 // nop
200 memcpy(buf, v, sizeof(v));
204 const uint8_t pltData[] = {
205 0xff, 0x35, 0, 0, 0, 0, // pushl (GOTPLT+4)
206 0xff, 0x25, 0, 0, 0, 0, // jmp *(GOTPLT+8)
207 0x90, 0x90, 0x90, 0x90, // nop
209 memcpy(buf, pltData, sizeof(pltData));
210 uint32_t gotPlt = in.gotPlt->getVA();
211 write32le(buf + 2, gotPlt + 4);
212 write32le(buf + 8, gotPlt + 8);
215 void X86::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
216 uint64_t pltEntryAddr, int32_t index,
217 unsigned relOff) const {
219 const uint8_t inst[] = {
220 0xff, 0xa3, 0, 0, 0, 0, // jmp *foo@GOT(%ebx)
221 0x68, 0, 0, 0, 0, // pushl $reloc_offset
222 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
224 memcpy(buf, inst, sizeof(inst));
225 write32le(buf + 2, gotPltEntryAddr - in.gotPlt->getVA());
227 const uint8_t inst[] = {
228 0xff, 0x25, 0, 0, 0, 0, // jmp *foo@GOT
229 0x68, 0, 0, 0, 0, // pushl $reloc_offset
230 0xe9, 0, 0, 0, 0, // jmp .PLT0@PC
232 memcpy(buf, inst, sizeof(inst));
233 write32le(buf + 2, gotPltEntryAddr);
236 write32le(buf + 7, relOff);
237 write32le(buf + 12, -pltHeaderSize - pltEntrySize * index - 16);
240 int64_t X86::getImplicitAddend(const uint8_t *buf, RelType type) const {
244 return SignExtend64<8>(*buf);
247 return SignExtend64<16>(read16le(buf));
255 case R_386_TLS_LDO_32:
257 return SignExtend64<32>(read32le(buf));
263 void X86::relocateOne(uint8_t *loc, RelType type, uint64_t val) const {
266 // R_386_{PC,}{8,16} are not part of the i386 psABI, but they are
267 // being used for some 16-bit programs such as boot loaders, so
268 // we want to support them.
269 checkIntUInt(loc, val, 8, type);
273 checkInt(loc, val, 8, type);
277 checkIntUInt(loc, val, 16, type);
281 // R_386_PC16 is normally used with 16 bit code. In that situation
282 // the PC is 16 bits, just like the addend. This means that it can
283 // point from any 16 bit address to any other if the possibility
284 // of wrapping is included.
285 // The only restriction we have to check then is that the destination
286 // address fits in 16 bits. That is impossible to do here. The problem is
287 // that we are passed the final value, which already had the
288 // current location subtracted from it.
289 // We just check that Val fits in 17 bits. This misses some cases, but
290 // should have no false positives.
291 checkInt(loc, val, 17, type);
302 case R_386_TLS_DTPMOD32:
303 case R_386_TLS_DTPOFF32:
305 case R_386_TLS_GOTIE:
308 case R_386_TLS_LDO_32:
310 case R_386_TLS_LE_32:
311 case R_386_TLS_TPOFF:
312 case R_386_TLS_TPOFF32:
313 checkInt(loc, val, 32, type);
317 llvm_unreachable("unknown relocation");
321 void X86::relaxTlsGdToLe(uint8_t *loc, RelType type, uint64_t val) const {
323 // leal x@tlsgd(, %ebx, 1),
324 // call __tls_get_addr@plt
327 // subl $x@ntpoff,%eax
328 const uint8_t inst[] = {
329 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
330 0x81, 0xe8, 0, 0, 0, 0, // subl Val(%ebx), %eax
332 memcpy(loc - 3, inst, sizeof(inst));
333 write32le(loc + 5, val);
336 void X86::relaxTlsGdToIe(uint8_t *loc, RelType type, uint64_t val) const {
338 // leal x@tlsgd(, %ebx, 1),
339 // call __tls_get_addr@plt
342 // addl x@gotntpoff(%ebx), %eax
343 const uint8_t inst[] = {
344 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0, %eax
345 0x03, 0x83, 0, 0, 0, 0, // addl Val(%ebx), %eax
347 memcpy(loc - 3, inst, sizeof(inst));
348 write32le(loc + 5, val);
351 // In some conditions, relocations can be optimized to avoid using GOT.
352 // This function does that for Initial Exec to Local Exec case.
353 void X86::relaxTlsIeToLe(uint8_t *loc, RelType type, uint64_t val) const {
354 // Ulrich's document section 6.2 says that @gotntpoff can
355 // be used with MOVL or ADDL instructions.
356 // @indntpoff is similar to @gotntpoff, but for use in
357 // position dependent code.
358 uint8_t reg = (loc[-1] >> 3) & 7;
360 if (type == R_386_TLS_IE) {
361 if (loc[-1] == 0xa1) {
362 // "movl foo@indntpoff,%eax" -> "movl $foo,%eax"
363 // This case is different from the generic case below because
364 // this is a 5 byte instruction while below is 6 bytes.
366 } else if (loc[-2] == 0x8b) {
367 // "movl foo@indntpoff,%reg" -> "movl $foo,%reg"
369 loc[-1] = 0xc0 | reg;
371 // "addl foo@indntpoff,%reg" -> "addl $foo,%reg"
373 loc[-1] = 0xc0 | reg;
376 assert(type == R_386_TLS_GOTIE);
377 if (loc[-2] == 0x8b) {
378 // "movl foo@gottpoff(%rip),%reg" -> "movl $foo,%reg"
380 loc[-1] = 0xc0 | reg;
382 // "addl foo@gotntpoff(%rip),%reg" -> "leal foo(%reg),%reg"
384 loc[-1] = 0x80 | (reg << 3) | reg;
390 void X86::relaxTlsLdToLe(uint8_t *loc, RelType type, uint64_t val) const {
391 if (type == R_386_TLS_LDO_32) {
397 // leal foo(%reg),%eax
398 // call ___tls_get_addr
402 // leal 0(%esi,1),%esi
403 const uint8_t inst[] = {
404 0x65, 0xa1, 0x00, 0x00, 0x00, 0x00, // movl %gs:0,%eax
406 0x8d, 0x74, 0x26, 0x00, // leal 0(%esi,1),%esi
408 memcpy(loc - 2, inst, sizeof(inst));
412 class RetpolinePic : public X86 {
415 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
416 void writePltHeader(uint8_t *buf) const override;
417 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
418 int32_t index, unsigned relOff) const override;
421 class RetpolineNoPic : public X86 {
424 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
425 void writePltHeader(uint8_t *buf) const override;
426 void writePlt(uint8_t *buf, uint64_t gotPltEntryAddr, uint64_t pltEntryAddr,
427 int32_t index, unsigned relOff) const override;
431 RetpolinePic::RetpolinePic() {
436 void RetpolinePic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
437 write32le(buf, s.getPltVA() + 17);
440 void RetpolinePic::writePltHeader(uint8_t *buf) const {
441 const uint8_t insn[] = {
442 0xff, 0xb3, 4, 0, 0, 0, // 0: pushl 4(%ebx)
443 0x50, // 6: pushl %eax
444 0x8b, 0x83, 8, 0, 0, 0, // 7: mov 8(%ebx), %eax
445 0xe8, 0x0e, 0x00, 0x00, 0x00, // d: call next
446 0xf3, 0x90, // 12: loop: pause
447 0x0f, 0xae, 0xe8, // 14: lfence
448 0xeb, 0xf9, // 17: jmp loop
449 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 16
450 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
451 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
452 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
453 0x89, 0xc8, // 2b: mov %ecx, %eax
454 0x59, // 2d: pop %ecx
456 0xcc, // 2f: int3; padding
458 memcpy(buf, insn, sizeof(insn));
461 void RetpolinePic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
462 uint64_t pltEntryAddr, int32_t index,
463 unsigned relOff) const {
464 const uint8_t insn[] = {
466 0x8b, 0x83, 0, 0, 0, 0, // mov foo@GOT(%ebx), %eax
467 0xe8, 0, 0, 0, 0, // call plt+0x20
468 0xe9, 0, 0, 0, 0, // jmp plt+0x12
469 0x68, 0, 0, 0, 0, // pushl $reloc_offset
470 0xe9, 0, 0, 0, 0, // jmp plt+0
471 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // int3; padding
473 memcpy(buf, insn, sizeof(insn));
475 uint32_t ebx = in.gotPlt->getVA();
476 unsigned off = pltHeaderSize + pltEntrySize * index;
477 write32le(buf + 3, gotPltEntryAddr - ebx);
478 write32le(buf + 8, -off - 12 + 32);
479 write32le(buf + 13, -off - 17 + 18);
480 write32le(buf + 18, relOff);
481 write32le(buf + 23, -off - 27);
484 RetpolineNoPic::RetpolineNoPic() {
489 void RetpolineNoPic::writeGotPlt(uint8_t *buf, const Symbol &s) const {
490 write32le(buf, s.getPltVA() + 16);
493 void RetpolineNoPic::writePltHeader(uint8_t *buf) const {
494 const uint8_t insn[] = {
495 0xff, 0x35, 0, 0, 0, 0, // 0: pushl GOTPLT+4
496 0x50, // 6: pushl %eax
497 0xa1, 0, 0, 0, 0, // 7: mov GOTPLT+8, %eax
498 0xe8, 0x0f, 0x00, 0x00, 0x00, // c: call next
499 0xf3, 0x90, // 11: loop: pause
500 0x0f, 0xae, 0xe8, // 13: lfence
501 0xeb, 0xf9, // 16: jmp loop
502 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 18: int3
503 0xcc, 0xcc, 0xcc, // 1f: int3; .align 16
504 0x89, 0x0c, 0x24, // 20: next: mov %ecx, (%esp)
505 0x8b, 0x4c, 0x24, 0x04, // 23: mov 0x4(%esp), %ecx
506 0x89, 0x44, 0x24, 0x04, // 27: mov %eax ,0x4(%esp)
507 0x89, 0xc8, // 2b: mov %ecx, %eax
508 0x59, // 2d: pop %ecx
510 0xcc, // 2f: int3; padding
512 memcpy(buf, insn, sizeof(insn));
514 uint32_t gotPlt = in.gotPlt->getVA();
515 write32le(buf + 2, gotPlt + 4);
516 write32le(buf + 8, gotPlt + 8);
519 void RetpolineNoPic::writePlt(uint8_t *buf, uint64_t gotPltEntryAddr,
520 uint64_t pltEntryAddr, int32_t index,
521 unsigned relOff) const {
522 const uint8_t insn[] = {
523 0x50, // 0: pushl %eax
524 0xa1, 0, 0, 0, 0, // 1: mov foo_in_GOT, %eax
525 0xe8, 0, 0, 0, 0, // 6: call plt+0x20
526 0xe9, 0, 0, 0, 0, // b: jmp plt+0x11
527 0x68, 0, 0, 0, 0, // 10: pushl $reloc_offset
528 0xe9, 0, 0, 0, 0, // 15: jmp plt+0
529 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding
530 0xcc, // 1f: int3; padding
532 memcpy(buf, insn, sizeof(insn));
534 unsigned off = pltHeaderSize + pltEntrySize * index;
535 write32le(buf + 2, gotPltEntryAddr);
536 write32le(buf + 7, -off - 11 + 32);
537 write32le(buf + 12, -off - 16 + 17);
538 write32le(buf + 17, relOff);
539 write32le(buf + 22, -off - 26);
542 TargetInfo *elf::getX86TargetInfo() {
543 if (config->zRetpolineplt) {
545 static RetpolinePic t;
548 static RetpolineNoPic t;