1 //===- ARM.cpp ------------------------------------------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputFiles.h"
12 #include "SyntheticSections.h"
15 #include "lld/Common/ErrorHandler.h"
16 #include "llvm/Object/ELF.h"
17 #include "llvm/Support/Endian.h"
20 using namespace llvm::support::endian;
21 using namespace llvm::ELF;
23 using namespace lld::elf;
26 class ARM final : public TargetInfo {
29 uint32_t calcEFlags() const override;
30 RelExpr getRelExpr(RelType Type, const Symbol &S,
31 const uint8_t *Loc) const override;
32 RelType getDynRel(RelType Type) const override;
33 int64_t getImplicitAddend(const uint8_t *Buf, RelType Type) const override;
34 void writeGotPlt(uint8_t *Buf, const Symbol &S) const override;
35 void writeIgotPlt(uint8_t *Buf, const Symbol &S) const override;
36 void writePltHeader(uint8_t *Buf) const override;
37 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
38 int32_t Index, unsigned RelOff) const override;
39 void addPltSymbols(InputSection &IS, uint64_t Off) const override;
40 void addPltHeaderSymbols(InputSection &ISD) const override;
41 bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
42 uint64_t BranchAddr, const Symbol &S) const override;
43 bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
44 void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
50 RelativeRel = R_ARM_RELATIVE;
51 IRelativeRel = R_ARM_IRELATIVE;
52 GotRel = R_ARM_GLOB_DAT;
53 PltRel = R_ARM_JUMP_SLOT;
54 TlsGotRel = R_ARM_TLS_TPOFF32;
55 TlsModuleIndexRel = R_ARM_TLS_DTPMOD32;
56 TlsOffsetRel = R_ARM_TLS_DTPOFF32;
57 GotBaseSymInGotPlt = false;
62 TrapInstr = 0xd4d4d4d4;
63 // ARM uses Variant 1 TLS
67 // The placing of pre-created ThunkSections is controlled by the
68 // ThunkSectionSpacing parameter. The aim is to place the
69 // ThunkSection such that all branches from the InputSections prior to the
70 // ThunkSection can reach a Thunk placed at the end of the ThunkSection.
72 // | up to ThunkSectionSpacing .text input sections |
74 // | up to ThunkSectionSpacing .text input sections |
77 // Pre-created ThunkSections are spaced roughly 16MiB apart on ARM. This is to
78 // match the most common expected case of a Thumb 2 encoded BL, BLX or B.W
79 // ARM B, BL, BLX range +/- 32MiB
80 // Thumb B.W, BL, BLX range +/- 16MiB
81 // Thumb B<cc>.W range +/- 1MiB
82 // If a branch cannot reach a pre-created ThunkSection a new one will be
83 // created so we can handle the rare cases of a Thumb 2 conditional branch.
84 // We intentionally use a lower size for ThunkSectionSpacing than the maximum
85 // branch range so the end of the ThunkSection is more likely to be within
86 // range of the branch instruction that is furthest away. The value we shorten
87 // ThunkSectionSpacing by is set conservatively to allow us to create 16,384
88 // 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
89 // one of the Thunks going out of range.
91 // FIXME: lld assumes that the Thumb BL and BLX encoding permits the J1 and
92 // J2 bits to be used to extend the branch range. On earlier Architectures
93 // such as ARMv4, ARMv5 and ARMv6 (except ARMv6T2) the range is +/- 4MiB. If
94 // support for the earlier encodings is added then when they are used the
95 // ThunkSectionSpacing will need lowering.
96 ThunkSectionSpacing = 0x1000000 - 0x30000;
99 uint32_t ARM::calcEFlags() const {
100 // The ABIFloatType is used by loaders to detect the floating point calling
102 uint32_t ABIFloatType = 0;
103 if (Config->ARMVFPArgs == ARMVFPArgKind::Base ||
104 Config->ARMVFPArgs == ARMVFPArgKind::Default)
105 ABIFloatType = EF_ARM_ABI_FLOAT_SOFT;
106 else if (Config->ARMVFPArgs == ARMVFPArgKind::VFP)
107 ABIFloatType = EF_ARM_ABI_FLOAT_HARD;
109 // We don't currently use any features incompatible with EF_ARM_EABI_VER5,
110 // but we don't have any firm guarantees of conformance. Linux AArch64
111 // kernels (as of 2016) require an EABI version to be set.
112 return EF_ARM_EABI_VER5 | ABIFloatType;
115 RelExpr ARM::getRelExpr(RelType Type, const Symbol &S,
116 const uint8_t *Loc) const {
118 case R_ARM_THM_JUMP11:
125 case R_ARM_THM_JUMP19:
126 case R_ARM_THM_JUMP24:
133 // GOT(S) + A - GOT_ORG
142 return Config->Target1Rel ? R_PC : R_ABS;
144 if (Config->Target2 == Target2Policy::Rel)
146 if (Config->Target2 == Target2Policy::Abs)
151 case R_ARM_TLS_LDM32:
153 case R_ARM_BASE_PREL:
155 // FIXME: currently B(S) assumed to be .got, this may not hold for all
158 case R_ARM_MOVW_PREL_NC:
159 case R_ARM_MOVT_PREL:
161 case R_ARM_THM_MOVW_PREL_NC:
162 case R_ARM_THM_MOVT_PREL:
173 RelType ARM::getDynRel(RelType Type) const {
174 if ((Type == R_ARM_ABS32) || (Type == R_ARM_TARGET1 && !Config->Target1Rel))
179 void ARM::writeGotPlt(uint8_t *Buf, const Symbol &) const {
180 write32le(Buf, InX::Plt->getVA());
183 void ARM::writeIgotPlt(uint8_t *Buf, const Symbol &S) const {
184 // An ARM entry is the address of the ifunc resolver function.
185 write32le(Buf, S.getVA());
188 // Long form PLT Header that does not have any restrictions on the displacement
189 // of the .plt from the .plt.got.
190 static void writePltHeaderLong(uint8_t *Buf) {
191 const uint8_t PltData[] = {
192 0x04, 0xe0, 0x2d, 0xe5, // str lr, [sp,#-4]!
193 0x04, 0xe0, 0x9f, 0xe5, // ldr lr, L2
194 0x0e, 0xe0, 0x8f, 0xe0, // L1: add lr, pc, lr
195 0x08, 0xf0, 0xbe, 0xe5, // ldr pc, [lr, #8]
196 0x00, 0x00, 0x00, 0x00, // L2: .word &(.got.plt) - L1 - 8
197 0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
198 0xd4, 0xd4, 0xd4, 0xd4, // Pad to 32-byte boundary
199 0xd4, 0xd4, 0xd4, 0xd4};
200 memcpy(Buf, PltData, sizeof(PltData));
201 uint64_t GotPlt = InX::GotPlt->getVA();
202 uint64_t L1 = InX::Plt->getVA() + 8;
203 write32le(Buf + 16, GotPlt - L1 - 8);
206 // The default PLT header requires the .plt.got to be within 128 Mb of the
207 // .plt in the positive direction.
208 void ARM::writePltHeader(uint8_t *Buf) const {
209 // Use a similar sequence to that in writePlt(), the difference is the calling
210 // conventions mean we use lr instead of ip. The PLT entry is responsible for
211 // saving lr on the stack, the dynamic loader is responsible for reloading
213 const uint32_t PltData[] = {
214 0xe52de004, // L1: str lr, [sp,#-4]!
215 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
216 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
217 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
220 uint64_t Offset = InX::GotPlt->getVA() - InX::Plt->getVA() - 4;
221 if (!llvm::isUInt<27>(Offset)) {
222 // We cannot encode the Offset, use the long form.
223 writePltHeaderLong(Buf);
226 write32le(Buf + 0, PltData[0]);
227 write32le(Buf + 4, PltData[1] | ((Offset >> 20) & 0xff));
228 write32le(Buf + 8, PltData[2] | ((Offset >> 12) & 0xff));
229 write32le(Buf + 12, PltData[3] | (Offset & 0xfff));
230 write32le(Buf + 16, TrapInstr); // Pad to 32-byte boundary
231 write32le(Buf + 20, TrapInstr);
232 write32le(Buf + 24, TrapInstr);
233 write32le(Buf + 28, TrapInstr);
236 void ARM::addPltHeaderSymbols(InputSection &IS) const {
237 addSyntheticLocal("$a", STT_NOTYPE, 0, 0, IS);
238 addSyntheticLocal("$d", STT_NOTYPE, 16, 0, IS);
241 // Long form PLT entries that do not have any restrictions on the displacement
242 // of the .plt from the .plt.got.
243 static void writePltLong(uint8_t *Buf, uint64_t GotPltEntryAddr,
244 uint64_t PltEntryAddr, int32_t Index,
246 const uint8_t PltData[] = {
247 0x04, 0xc0, 0x9f, 0xe5, // ldr ip, L2
248 0x0f, 0xc0, 0x8c, 0xe0, // L1: add ip, ip, pc
249 0x00, 0xf0, 0x9c, 0xe5, // ldr pc, [ip]
250 0x00, 0x00, 0x00, 0x00, // L2: .word Offset(&(.plt.got) - L1 - 8
252 memcpy(Buf, PltData, sizeof(PltData));
253 uint64_t L1 = PltEntryAddr + 4;
254 write32le(Buf + 12, GotPltEntryAddr - L1 - 8);
257 // The default PLT entries require the .plt.got to be within 128 Mb of the
258 // .plt in the positive direction.
259 void ARM::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
260 uint64_t PltEntryAddr, int32_t Index,
261 unsigned RelOff) const {
262 // The PLT entry is similar to the example given in Appendix A of ELF for
263 // the Arm Architecture. Instead of using the Group Relocations to find the
264 // optimal rotation for the 8-bit immediate used in the add instructions we
265 // hard code the most compact rotations for simplicity. This saves a load
266 // instruction over the long plt sequences.
267 const uint32_t PltData[] = {
268 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.plt.got) - L1 - 8
269 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.plt.got) - L1 - 8
270 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.plt.got) - L1 - 8
273 uint64_t Offset = GotPltEntryAddr - PltEntryAddr - 8;
274 if (!llvm::isUInt<27>(Offset)) {
275 // We cannot encode the Offset, use the long form.
276 writePltLong(Buf, GotPltEntryAddr, PltEntryAddr, Index, RelOff);
279 write32le(Buf + 0, PltData[0] | ((Offset >> 20) & 0xff));
280 write32le(Buf + 4, PltData[1] | ((Offset >> 12) & 0xff));
281 write32le(Buf + 8, PltData[2] | (Offset & 0xfff));
282 write32le(Buf + 12, TrapInstr); // Pad to 16-byte boundary
285 void ARM::addPltSymbols(InputSection &IS, uint64_t Off) const {
286 addSyntheticLocal("$a", STT_NOTYPE, Off, 0, IS);
287 addSyntheticLocal("$d", STT_NOTYPE, Off + 12, 0, IS);
290 bool ARM::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
291 uint64_t BranchAddr, const Symbol &S) const {
292 // If S is an undefined weak symbol and does not have a PLT entry then it
293 // will be resolved as a branch to the next instruction.
294 if (S.isUndefWeak() && !S.isInPlt())
296 // A state change from ARM to Thumb and vice versa must go through an
297 // interworking thunk if the relocation type is not R_ARM_CALL or
303 // Source is ARM, all PLT entries are ARM so no interworking required.
304 // Otherwise we need to interwork if Symbol has bit 0 set (Thumb).
305 if (Expr == R_PC && ((S.getVA() & 1) == 1))
309 uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
310 return !inBranchRange(Type, BranchAddr, Dst);
312 case R_ARM_THM_JUMP19:
313 case R_ARM_THM_JUMP24:
314 // Source is Thumb, all PLT entries are ARM so interworking is required.
315 // Otherwise we need to interwork if Symbol has bit 0 clear (ARM).
316 if (Expr == R_PLT_PC || ((S.getVA() & 1) == 0))
319 case R_ARM_THM_CALL: {
320 uint64_t Dst = (Expr == R_PLT_PC) ? S.getPltVA() : S.getVA();
321 return !inBranchRange(Type, BranchAddr, Dst);
327 bool ARM::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
339 case R_ARM_THM_JUMP19:
343 case R_ARM_THM_JUMP24:
351 // PC at Src is 2 instructions ahead, immediate of branch is signed
353 Range -= 2 * InstrSize;
357 if ((Dst & 0x1) == 0)
358 // Destination is ARM, if ARM caller then Src is already 4-byte aligned.
359 // If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure
360 // destination will be 4 byte aligned.
363 // Bit 0 == 1 denotes Thumb state, it is not part of the range
366 uint64_t Distance = (Src > Dst) ? Src - Dst : Dst - Src;
367 return Distance <= Range;
370 void ARM::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
373 case R_ARM_BASE_PREL:
385 case R_ARM_TLS_LDM32:
386 case R_ARM_TLS_LDO32:
388 case R_ARM_TLS_TPOFF32:
389 case R_ARM_TLS_DTPOFF32:
392 case R_ARM_TLS_DTPMOD32:
396 checkInt(Loc, Val, 31, Type);
397 write32le(Loc, (read32le(Loc) & 0x80000000) | (Val & ~0x80000000));
400 // R_ARM_CALL is used for BL and BLX instructions, depending on the
401 // value of bit 0 of Val, we must select a BL or BLX instruction
403 // If bit 0 of Val is 1 the target is Thumb, we must select a BLX.
404 // The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
405 checkInt(Loc, Val, 26, Type);
406 write32le(Loc, 0xfa000000 | // opcode
407 ((Val & 2) << 23) | // H
408 ((Val >> 2) & 0x00ffffff)); // imm24
411 if ((read32le(Loc) & 0xfe000000) == 0xfa000000)
412 // BLX (always unconditional) instruction to an ARM Target, select an
414 write32le(Loc, 0xeb000000 | (read32le(Loc) & 0x00ffffff));
415 // fall through as BL encoding is shared with B
420 checkInt(Loc, Val, 26, Type);
421 write32le(Loc, (read32le(Loc) & ~0x00ffffff) | ((Val >> 2) & 0x00ffffff));
423 case R_ARM_THM_JUMP11:
424 checkInt(Loc, Val, 12, Type);
425 write16le(Loc, (read32le(Loc) & 0xf800) | ((Val >> 1) & 0x07ff));
427 case R_ARM_THM_JUMP19:
428 // Encoding T3: Val = S:J2:J1:imm6:imm11:0
429 checkInt(Loc, Val, 21, Type);
431 (read16le(Loc) & 0xfbc0) | // opcode cond
432 ((Val >> 10) & 0x0400) | // S
433 ((Val >> 12) & 0x003f)); // imm6
436 ((Val >> 8) & 0x0800) | // J2
437 ((Val >> 5) & 0x2000) | // J1
438 ((Val >> 1) & 0x07ff)); // imm11
441 // R_ARM_THM_CALL is used for BL and BLX instructions, depending on the
442 // value of bit 0 of Val, we must select a BL or BLX instruction
443 if ((Val & 1) == 0) {
444 // Ensure BLX destination is 4-byte aligned. As BLX instruction may
445 // only be two byte aligned. This must be done before overflow check
446 Val = alignTo(Val, 4);
448 // Bit 12 is 0 for BLX, 1 for BL
449 write16le(Loc + 2, (read16le(Loc + 2) & ~0x1000) | (Val & 1) << 12);
450 // Fall through as rest of encoding is the same as B.W
452 case R_ARM_THM_JUMP24:
453 // Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
454 // FIXME: Use of I1 and I2 require v6T2ops
455 checkInt(Loc, Val, 25, Type);
458 ((Val >> 14) & 0x0400) | // S
459 ((Val >> 12) & 0x03ff)); // imm10
461 (read16le(Loc + 2) & 0xd000) | // opcode
462 (((~(Val >> 10)) ^ (Val >> 11)) & 0x2000) | // J1
463 (((~(Val >> 11)) ^ (Val >> 13)) & 0x0800) | // J2
464 ((Val >> 1) & 0x07ff)); // imm11
466 case R_ARM_MOVW_ABS_NC:
467 case R_ARM_MOVW_PREL_NC:
468 write32le(Loc, (read32le(Loc) & ~0x000f0fff) | ((Val & 0xf000) << 4) |
472 case R_ARM_MOVT_PREL:
473 checkInt(Loc, Val, 32, Type);
474 write32le(Loc, (read32le(Loc) & ~0x000f0fff) |
475 (((Val >> 16) & 0xf000) << 4) | ((Val >> 16) & 0xfff));
477 case R_ARM_THM_MOVT_ABS:
478 case R_ARM_THM_MOVT_PREL:
479 // Encoding T1: A = imm4:i:imm3:imm8
480 checkInt(Loc, Val, 32, Type);
483 ((Val >> 17) & 0x0400) | // i
484 ((Val >> 28) & 0x000f)); // imm4
486 (read16le(Loc + 2) & 0x8f00) | // opcode
487 ((Val >> 12) & 0x7000) | // imm3
488 ((Val >> 16) & 0x00ff)); // imm8
490 case R_ARM_THM_MOVW_ABS_NC:
491 case R_ARM_THM_MOVW_PREL_NC:
492 // Encoding T3: A = imm4:i:imm3:imm8
495 ((Val >> 1) & 0x0400) | // i
496 ((Val >> 12) & 0x000f)); // imm4
498 (read16le(Loc + 2) & 0x8f00) | // opcode
499 ((Val << 4) & 0x7000) | // imm3
500 (Val & 0x00ff)); // imm8
503 error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
507 int64_t ARM::getImplicitAddend(const uint8_t *Buf, RelType Type) const {
512 case R_ARM_BASE_PREL:
520 case R_ARM_TLS_LDM32:
521 case R_ARM_TLS_LDO32:
524 return SignExtend64<32>(read32le(Buf));
526 return SignExtend64<31>(read32le(Buf));
531 return SignExtend64<26>(read32le(Buf) << 2);
532 case R_ARM_THM_JUMP11:
533 return SignExtend64<12>(read16le(Buf) << 1);
534 case R_ARM_THM_JUMP19: {
535 // Encoding T3: A = S:J2:J1:imm10:imm6:0
536 uint16_t Hi = read16le(Buf);
537 uint16_t Lo = read16le(Buf + 2);
538 return SignExtend64<20>(((Hi & 0x0400) << 10) | // S
539 ((Lo & 0x0800) << 8) | // J2
540 ((Lo & 0x2000) << 5) | // J1
541 ((Hi & 0x003f) << 12) | // imm6
542 ((Lo & 0x07ff) << 1)); // imm11:0
545 case R_ARM_THM_JUMP24: {
546 // Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0
547 // I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)
548 // FIXME: I1 and I2 require v6T2ops
549 uint16_t Hi = read16le(Buf);
550 uint16_t Lo = read16le(Buf + 2);
551 return SignExtend64<24>(((Hi & 0x0400) << 14) | // S
552 (~((Lo ^ (Hi << 3)) << 10) & 0x00800000) | // I1
553 (~((Lo ^ (Hi << 1)) << 11) & 0x00400000) | // I2
554 ((Hi & 0x003ff) << 12) | // imm0
555 ((Lo & 0x007ff) << 1)); // imm11:0
557 // ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and
558 // MOVT is in the range -32768 <= A < 32768
559 case R_ARM_MOVW_ABS_NC:
561 case R_ARM_MOVW_PREL_NC:
562 case R_ARM_MOVT_PREL: {
563 uint64_t Val = read32le(Buf) & 0x000f0fff;
564 return SignExtend64<16>(((Val & 0x000f0000) >> 4) | (Val & 0x00fff));
566 case R_ARM_THM_MOVW_ABS_NC:
567 case R_ARM_THM_MOVT_ABS:
568 case R_ARM_THM_MOVW_PREL_NC:
569 case R_ARM_THM_MOVT_PREL: {
570 // Encoding T3: A = imm4:i:imm3:imm8
571 uint16_t Hi = read16le(Buf);
572 uint16_t Lo = read16le(Buf + 2);
573 return SignExtend64<16>(((Hi & 0x000f) << 12) | // imm4
574 ((Hi & 0x0400) << 1) | // i
575 ((Lo & 0x7000) >> 4) | // imm3
576 (Lo & 0x00ff)); // imm8
581 TargetInfo *elf::getARMTargetInfo() {