1 //===- PPC64.cpp ----------------------------------------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "SyntheticSections.h"
13 #include "lld/Common/ErrorHandler.h"
14 #include "llvm/Support/Endian.h"
17 using namespace llvm::object;
18 using namespace llvm::support::endian;
19 using namespace llvm::ELF;
21 using namespace lld::elf;
23 static uint64_t PPC64TocOffset = 0x8000;
24 static uint64_t DynamicThreadPointerOffset = 0x8000;
26 // The instruction encoding of bits 21-30 from the ISA for the Xform and Dform
27 // instructions that can be used as part of the initial exec TLS sequence.
63 uint64_t elf::getPPC64TocBase() {
64 // The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The
65 // TOC starts where the first of these sections starts. We always create a
66 // .got when we see a relocation that uses it, so for us the start is always
68 uint64_t TocVA = In.Got->getVA();
70 // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
71 // thus permitting a full 64 Kbytes segment. Note that the glibc startup
72 // code (crt1.o) assumes that you can get from the TOC base to the
73 // start of the .toc section with only a single (signed) 16-bit relocation.
74 return TocVA + PPC64TocOffset;
77 unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t StOther) {
78 // The offset is encoded into the 3 most significant bits of the st_other
79 // field, with some special values described in section 3.4.1 of the ABI:
80 // 0 --> Zero offset between the GEP and LEP, and the function does NOT use
81 // the TOC pointer (r2). r2 will hold the same value on returning from
82 // the function as it did on entering the function.
83 // 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a
84 // caller-saved register for all callers.
85 // 2-6 --> The binary logarithm of the offset eg:
86 // 2 --> 2^2 = 4 bytes --> 1 instruction.
87 // 6 --> 2^6 = 64 bytes --> 16 instructions.
89 uint8_t GepToLep = (StOther >> 5) & 7;
93 // The value encoded in the st_other bits is the
94 // log-base-2(offset).
98 error("reserved value of 7 in the 3 most-significant-bits of st_other");
103 class PPC64 final : public TargetInfo {
106 uint32_t calcEFlags() const override;
107 RelExpr getRelExpr(RelType Type, const Symbol &S,
108 const uint8_t *Loc) const override;
109 void writePltHeader(uint8_t *Buf) const override;
110 void writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr, uint64_t PltEntryAddr,
111 int32_t Index, unsigned RelOff) const override;
112 void relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const override;
113 void writeGotHeader(uint8_t *Buf) const override;
114 bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
115 uint64_t BranchAddr, const Symbol &S) const override;
116 bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
117 RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
118 RelExpr Expr) const override;
119 void relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
120 void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
121 void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
122 void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
124 bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
125 uint8_t StOther) const override;
129 // Relocation masks following the #lo(value), #hi(value), #ha(value),
130 // #higher(value), #highera(value), #highest(value), and #highesta(value)
131 // macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi
133 static uint16_t lo(uint64_t V) { return V; }
134 static uint16_t hi(uint64_t V) { return V >> 16; }
135 static uint16_t ha(uint64_t V) { return (V + 0x8000) >> 16; }
136 static uint16_t higher(uint64_t V) { return V >> 32; }
137 static uint16_t highera(uint64_t V) { return (V + 0x8000) >> 32; }
138 static uint16_t highest(uint64_t V) { return V >> 48; }
139 static uint16_t highesta(uint64_t V) { return (V + 0x8000) >> 48; }
141 // Extracts the 'PO' field of an instruction encoding.
142 static uint8_t getPrimaryOpCode(uint32_t Encoding) { return (Encoding >> 26); }
144 static bool isDQFormInstruction(uint32_t Encoding) {
145 switch (getPrimaryOpCode(Encoding)) {
149 // The only instruction with a primary opcode of 56 is `lq`.
152 // There are both DS and DQ instruction forms with this primary opcode.
153 // Namely `lxv` and `stxv` are the DQ-forms that use it.
154 // The DS 'XO' bits being set to 01 is restricted to DQ form.
155 return (Encoding & 3) == 0x1;
159 static bool isInstructionUpdateForm(uint32_t Encoding) {
160 switch (getPrimaryOpCode(Encoding)) {
175 // LWA has the same opcode as LD, and the DS bits is what differentiates
176 // between LD/LDU/LWA
179 return (Encoding & 3) == 1;
183 // There are a number of places when we either want to read or write an
184 // instruction when handling a half16 relocation type. On big-endian the buffer
185 // pointer is pointing into the middle of the word we want to extract, and on
186 // little-endian it is pointing to the start of the word. These 2 helpers are to
187 // simplify reading and writing in that context.
188 static void writeInstrFromHalf16(uint8_t *Loc, uint32_t Instr) {
189 write32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0), Instr);
192 static uint32_t readInstrFromHalf16(const uint8_t *Loc) {
193 return read32(Loc - (Config->EKind == ELF64BEKind ? 2 : 0));
197 GotRel = R_PPC64_GLOB_DAT;
198 NoneRel = R_PPC64_NONE;
199 PltRel = R_PPC64_JMP_SLOT;
200 RelativeRel = R_PPC64_RELATIVE;
201 IRelativeRel = R_PPC64_IRELATIVE;
205 GotBaseSymInGotPlt = false;
206 GotBaseSymOff = 0x8000;
207 GotHeaderEntriesNum = 1;
208 GotPltHeaderEntriesNum = 2;
212 TlsModuleIndexRel = R_PPC64_DTPMOD64;
213 TlsOffsetRel = R_PPC64_DTPREL64;
215 TlsGotRel = R_PPC64_TPREL64;
217 NeedsMoreStackNonSplit = false;
219 // We need 64K pages (at least under glibc/Linux, the loader won't
220 // set different permissions on a finer granularity than that).
221 DefaultMaxPageSize = 65536;
223 // The PPC64 ELF ABI v1 spec, says:
225 // It is normally desirable to put segments with different characteristics
226 // in separate 256 Mbyte portions of the address space, to give the
227 // operating system full paging flexibility in the 64-bit address space.
229 // And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers
230 // use 0x10000000 as the starting address.
231 DefaultImageBase = 0x10000000;
233 write32(TrapInstr.data(), 0x7fe00008);
236 static uint32_t getEFlags(InputFile *File) {
237 if (Config->EKind == ELF64BEKind)
238 return cast<ObjFile<ELF64BE>>(File)->getObj().getHeader()->e_flags;
239 return cast<ObjFile<ELF64LE>>(File)->getObj().getHeader()->e_flags;
242 // This file implements v2 ABI. This function makes sure that all
243 // object files have v2 or an unspecified version as an ABI version.
244 uint32_t PPC64::calcEFlags() const {
245 for (InputFile *F : ObjectFiles) {
246 uint32_t Flag = getEFlags(F);
248 error(toString(F) + ": ABI version 1 is not supported");
250 error(toString(F) + ": unrecognized e_flags: " + Twine(Flag));
255 void PPC64::relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
256 // Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.
257 // The general dynamic code sequence for a global `x` will look like:
258 // Instruction Relocation Symbol
259 // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
260 // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
261 // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
262 // R_PPC64_REL24 __tls_get_addr
265 // Relaxing to local exec entails converting:
266 // addis r3, r2, x@got@tlsgd@ha into nop
267 // addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha
268 // bl __tls_get_addr(x@tlsgd) into nop
269 // nop into addi r3, r3, x@tprel@l
272 case R_PPC64_GOT_TLSGD16_HA:
273 writeInstrFromHalf16(Loc, 0x60000000); // nop
275 case R_PPC64_GOT_TLSGD16:
276 case R_PPC64_GOT_TLSGD16_LO:
277 writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13
278 relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
281 write32(Loc, 0x60000000); // nop
282 write32(Loc + 4, 0x38630000); // addi r3, r3
283 // Since we are relocating a half16 type relocation and Loc + 4 points to
284 // the start of an instruction we need to advance the buffer by an extra
286 relocateOne(Loc + 4 + (Config->EKind == ELF64BEKind ? 2 : 0),
287 R_PPC64_TPREL16_LO, Val);
290 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
294 void PPC64::relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
295 // Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.
296 // The local dynamic code sequence for a global `x` will look like:
297 // Instruction Relocation Symbol
298 // addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x
299 // addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x
300 // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x
301 // R_PPC64_REL24 __tls_get_addr
304 // Relaxing to local exec entails converting:
305 // addis r3, r2, x@got@tlsld@ha into nop
306 // addi r3, r3, x@got@tlsld@l into addis r3, r13, 0
307 // bl __tls_get_addr(x@tlsgd) into nop
308 // nop into addi r3, r3, 4096
311 case R_PPC64_GOT_TLSLD16_HA:
312 writeInstrFromHalf16(Loc, 0x60000000); // nop
314 case R_PPC64_GOT_TLSLD16_LO:
315 writeInstrFromHalf16(Loc, 0x3c6d0000); // addis r3, r13, 0
318 write32(Loc, 0x60000000); // nop
319 write32(Loc + 4, 0x38631000); // addi r3, r3, 4096
321 case R_PPC64_DTPREL16:
322 case R_PPC64_DTPREL16_HA:
323 case R_PPC64_DTPREL16_HI:
324 case R_PPC64_DTPREL16_DS:
325 case R_PPC64_DTPREL16_LO:
326 case R_PPC64_DTPREL16_LO_DS:
327 case R_PPC64_GOT_DTPREL16_HA:
328 case R_PPC64_GOT_DTPREL16_LO_DS:
329 case R_PPC64_GOT_DTPREL16_DS:
330 case R_PPC64_GOT_DTPREL16_HI:
331 relocateOne(Loc, Type, Val);
334 llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");
338 static unsigned getDFormOp(unsigned SecondaryOp) {
339 switch (SecondaryOp) {
359 error("unrecognized instruction for IE to LE R_PPC64_TLS");
364 void PPC64::relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const {
365 // The initial exec code sequence for a global `x` will look like:
366 // Instruction Relocation Symbol
367 // addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x
368 // ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x
369 // add r9, r9, x@tls R_PPC64_TLS x
371 // Relaxing to local exec entails converting:
372 // addis r9, r2, x@got@tprel@ha into nop
373 // ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha
374 // add r9, r9, x@tls into addi r9, r9, x@tprel@l
376 // x@tls R_PPC64_TLS is a relocation which does not compute anything,
377 // it is replaced with r13 (thread pointer).
379 // The add instruction in the initial exec sequence has multiple variations
380 // that need to be handled. If we are building an address it will use an add
381 // instruction, if we are accessing memory it will use any of the X-form
382 // indexed load or store instructions.
384 unsigned Offset = (Config->EKind == ELF64BEKind) ? 2 : 0;
386 case R_PPC64_GOT_TPREL16_HA:
387 write32(Loc - Offset, 0x60000000); // nop
389 case R_PPC64_GOT_TPREL16_LO_DS:
390 case R_PPC64_GOT_TPREL16_DS: {
391 uint32_t RegNo = read32(Loc - Offset) & 0x03E00000; // bits 6-10
392 write32(Loc - Offset, 0x3C0D0000 | RegNo); // addis RegNo, r13
393 relocateOne(Loc, R_PPC64_TPREL16_HA, Val);
397 uint32_t PrimaryOp = getPrimaryOpCode(read32(Loc));
399 error("unrecognized instruction for IE to LE R_PPC64_TLS");
400 uint32_t SecondaryOp = (read32(Loc) & 0x000007FE) >> 1; // bits 21-30
401 uint32_t DFormOp = getDFormOp(SecondaryOp);
402 write32(Loc, ((DFormOp << 26) | (read32(Loc) & 0x03FFFFFF)));
403 relocateOne(Loc + Offset, R_PPC64_TPREL16_LO, Val);
407 llvm_unreachable("unknown relocation for IE to LE");
412 RelExpr PPC64::getRelExpr(RelType Type, const Symbol &S,
413 const uint8_t *Loc) const {
416 case R_PPC64_GOT16_DS:
417 case R_PPC64_GOT16_HA:
418 case R_PPC64_GOT16_HI:
419 case R_PPC64_GOT16_LO:
420 case R_PPC64_GOT16_LO_DS:
423 case R_PPC64_TOC16_DS:
424 case R_PPC64_TOC16_HA:
425 case R_PPC64_TOC16_HI:
426 case R_PPC64_TOC16_LO:
427 case R_PPC64_TOC16_LO_DS:
433 return R_PPC_CALL_PLT;
434 case R_PPC64_REL16_LO:
435 case R_PPC64_REL16_HA:
439 case R_PPC64_GOT_TLSGD16:
440 case R_PPC64_GOT_TLSGD16_HA:
441 case R_PPC64_GOT_TLSGD16_HI:
442 case R_PPC64_GOT_TLSGD16_LO:
444 case R_PPC64_GOT_TLSLD16:
445 case R_PPC64_GOT_TLSLD16_HA:
446 case R_PPC64_GOT_TLSLD16_HI:
447 case R_PPC64_GOT_TLSLD16_LO:
449 case R_PPC64_GOT_TPREL16_HA:
450 case R_PPC64_GOT_TPREL16_LO_DS:
451 case R_PPC64_GOT_TPREL16_DS:
452 case R_PPC64_GOT_TPREL16_HI:
454 case R_PPC64_GOT_DTPREL16_HA:
455 case R_PPC64_GOT_DTPREL16_LO_DS:
456 case R_PPC64_GOT_DTPREL16_DS:
457 case R_PPC64_GOT_DTPREL16_HI:
458 return R_TLSLD_GOT_OFF;
459 case R_PPC64_TPREL16:
460 case R_PPC64_TPREL16_HA:
461 case R_PPC64_TPREL16_LO:
462 case R_PPC64_TPREL16_HI:
463 case R_PPC64_TPREL16_DS:
464 case R_PPC64_TPREL16_LO_DS:
465 case R_PPC64_TPREL16_HIGHER:
466 case R_PPC64_TPREL16_HIGHERA:
467 case R_PPC64_TPREL16_HIGHEST:
468 case R_PPC64_TPREL16_HIGHESTA:
470 case R_PPC64_DTPREL16:
471 case R_PPC64_DTPREL16_DS:
472 case R_PPC64_DTPREL16_HA:
473 case R_PPC64_DTPREL16_HI:
474 case R_PPC64_DTPREL16_HIGHER:
475 case R_PPC64_DTPREL16_HIGHERA:
476 case R_PPC64_DTPREL16_HIGHEST:
477 case R_PPC64_DTPREL16_HIGHESTA:
478 case R_PPC64_DTPREL16_LO:
479 case R_PPC64_DTPREL16_LO_DS:
480 case R_PPC64_DTPREL64:
483 return R_TLSDESC_CALL;
493 void PPC64::writeGotHeader(uint8_t *Buf) const {
494 write64(Buf, getPPC64TocBase());
497 void PPC64::writePltHeader(uint8_t *Buf) const {
498 // The generic resolver stub goes first.
499 write32(Buf + 0, 0x7c0802a6); // mflr r0
500 write32(Buf + 4, 0x429f0005); // bcl 20,4*cr7+so,8 <_glink+0x8>
501 write32(Buf + 8, 0x7d6802a6); // mflr r11
502 write32(Buf + 12, 0x7c0803a6); // mtlr r0
503 write32(Buf + 16, 0x7d8b6050); // subf r12, r11, r12
504 write32(Buf + 20, 0x380cffcc); // subi r0,r12,52
505 write32(Buf + 24, 0x7800f082); // srdi r0,r0,62,2
506 write32(Buf + 28, 0xe98b002c); // ld r12,44(r11)
507 write32(Buf + 32, 0x7d6c5a14); // add r11,r12,r11
508 write32(Buf + 36, 0xe98b0000); // ld r12,0(r11)
509 write32(Buf + 40, 0xe96b0008); // ld r11,8(r11)
510 write32(Buf + 44, 0x7d8903a6); // mtctr r12
511 write32(Buf + 48, 0x4e800420); // bctr
513 // The 'bcl' instruction will set the link register to the address of the
514 // following instruction ('mflr r11'). Here we store the offset from that
515 // instruction to the first entry in the GotPlt section.
516 int64_t GotPltOffset = In.GotPlt->getVA() - (In.Plt->getVA() + 8);
517 write64(Buf + 52, GotPltOffset);
520 void PPC64::writePlt(uint8_t *Buf, uint64_t GotPltEntryAddr,
521 uint64_t PltEntryAddr, int32_t Index,
522 unsigned RelOff) const {
523 int32_t Offset = PltHeaderSize + Index * PltEntrySize;
524 // bl __glink_PLTresolve
525 write32(Buf, 0x48000000 | ((-Offset) & 0x03FFFFFc));
528 static std::pair<RelType, uint64_t> toAddr16Rel(RelType Type, uint64_t Val) {
529 // Relocations relative to the toc-base need to be adjusted by the Toc offset.
530 uint64_t TocBiasedVal = Val - PPC64TocOffset;
531 // Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset.
532 uint64_t DTPBiasedVal = Val - DynamicThreadPointerOffset;
535 // TOC biased relocation.
537 case R_PPC64_GOT_TLSGD16:
538 case R_PPC64_GOT_TLSLD16:
540 return {R_PPC64_ADDR16, TocBiasedVal};
541 case R_PPC64_GOT16_DS:
542 case R_PPC64_TOC16_DS:
543 case R_PPC64_GOT_TPREL16_DS:
544 case R_PPC64_GOT_DTPREL16_DS:
545 return {R_PPC64_ADDR16_DS, TocBiasedVal};
546 case R_PPC64_GOT16_HA:
547 case R_PPC64_GOT_TLSGD16_HA:
548 case R_PPC64_GOT_TLSLD16_HA:
549 case R_PPC64_GOT_TPREL16_HA:
550 case R_PPC64_GOT_DTPREL16_HA:
551 case R_PPC64_TOC16_HA:
552 return {R_PPC64_ADDR16_HA, TocBiasedVal};
553 case R_PPC64_GOT16_HI:
554 case R_PPC64_GOT_TLSGD16_HI:
555 case R_PPC64_GOT_TLSLD16_HI:
556 case R_PPC64_GOT_TPREL16_HI:
557 case R_PPC64_GOT_DTPREL16_HI:
558 case R_PPC64_TOC16_HI:
559 return {R_PPC64_ADDR16_HI, TocBiasedVal};
560 case R_PPC64_GOT16_LO:
561 case R_PPC64_GOT_TLSGD16_LO:
562 case R_PPC64_GOT_TLSLD16_LO:
563 case R_PPC64_TOC16_LO:
564 return {R_PPC64_ADDR16_LO, TocBiasedVal};
565 case R_PPC64_GOT16_LO_DS:
566 case R_PPC64_TOC16_LO_DS:
567 case R_PPC64_GOT_TPREL16_LO_DS:
568 case R_PPC64_GOT_DTPREL16_LO_DS:
569 return {R_PPC64_ADDR16_LO_DS, TocBiasedVal};
571 // Dynamic Thread pointer biased relocation types.
572 case R_PPC64_DTPREL16:
573 return {R_PPC64_ADDR16, DTPBiasedVal};
574 case R_PPC64_DTPREL16_DS:
575 return {R_PPC64_ADDR16_DS, DTPBiasedVal};
576 case R_PPC64_DTPREL16_HA:
577 return {R_PPC64_ADDR16_HA, DTPBiasedVal};
578 case R_PPC64_DTPREL16_HI:
579 return {R_PPC64_ADDR16_HI, DTPBiasedVal};
580 case R_PPC64_DTPREL16_HIGHER:
581 return {R_PPC64_ADDR16_HIGHER, DTPBiasedVal};
582 case R_PPC64_DTPREL16_HIGHERA:
583 return {R_PPC64_ADDR16_HIGHERA, DTPBiasedVal};
584 case R_PPC64_DTPREL16_HIGHEST:
585 return {R_PPC64_ADDR16_HIGHEST, DTPBiasedVal};
586 case R_PPC64_DTPREL16_HIGHESTA:
587 return {R_PPC64_ADDR16_HIGHESTA, DTPBiasedVal};
588 case R_PPC64_DTPREL16_LO:
589 return {R_PPC64_ADDR16_LO, DTPBiasedVal};
590 case R_PPC64_DTPREL16_LO_DS:
591 return {R_PPC64_ADDR16_LO_DS, DTPBiasedVal};
592 case R_PPC64_DTPREL64:
593 return {R_PPC64_ADDR64, DTPBiasedVal};
600 static bool isTocOptType(RelType Type) {
602 case R_PPC64_GOT16_HA:
603 case R_PPC64_GOT16_LO_DS:
604 case R_PPC64_TOC16_HA:
605 case R_PPC64_TOC16_LO_DS:
606 case R_PPC64_TOC16_LO:
613 void PPC64::relocateOne(uint8_t *Loc, RelType Type, uint64_t Val) const {
614 // We need to save the original relocation type to use in diagnostics, and
615 // use the original type to determine if we should toc-optimize the
616 // instructions being relocated.
617 RelType OriginalType = Type;
618 bool ShouldTocOptimize = isTocOptType(Type);
619 // For dynamic thread pointer relative, toc-relative, and got-indirect
620 // relocations, proceed in terms of the corresponding ADDR16 relocation type.
621 std::tie(Type, Val) = toAddr16Rel(Type, Val);
624 case R_PPC64_ADDR14: {
625 checkAlignment(Loc, Val, 4, Type);
626 // Preserve the AA/LK bits in the branch instruction
627 uint8_t AALK = Loc[3];
628 write16(Loc + 2, (AALK & 3) | (Val & 0xfffc));
632 case R_PPC64_TPREL16:
633 checkInt(Loc, Val, 16, OriginalType);
636 case R_PPC64_ADDR16_DS:
637 case R_PPC64_TPREL16_DS: {
638 checkInt(Loc, Val, 16, OriginalType);
639 // DQ-form instructions use bits 28-31 as part of the instruction encoding
640 // DS-form instructions only use bits 30-31.
641 uint16_t Mask = isDQFormInstruction(readInstrFromHalf16(Loc)) ? 0xF : 0x3;
642 checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
643 write16(Loc, (read16(Loc) & Mask) | lo(Val));
645 case R_PPC64_ADDR16_HA:
646 case R_PPC64_REL16_HA:
647 case R_PPC64_TPREL16_HA:
648 if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0)
649 writeInstrFromHalf16(Loc, 0x60000000);
651 write16(Loc, ha(Val));
653 case R_PPC64_ADDR16_HI:
654 case R_PPC64_REL16_HI:
655 case R_PPC64_TPREL16_HI:
656 write16(Loc, hi(Val));
658 case R_PPC64_ADDR16_HIGHER:
659 case R_PPC64_TPREL16_HIGHER:
660 write16(Loc, higher(Val));
662 case R_PPC64_ADDR16_HIGHERA:
663 case R_PPC64_TPREL16_HIGHERA:
664 write16(Loc, highera(Val));
666 case R_PPC64_ADDR16_HIGHEST:
667 case R_PPC64_TPREL16_HIGHEST:
668 write16(Loc, highest(Val));
670 case R_PPC64_ADDR16_HIGHESTA:
671 case R_PPC64_TPREL16_HIGHESTA:
672 write16(Loc, highesta(Val));
674 case R_PPC64_ADDR16_LO:
675 case R_PPC64_REL16_LO:
676 case R_PPC64_TPREL16_LO:
677 // When the high-adjusted part of a toc relocation evalutes to 0, it is
678 // changed into a nop. The lo part then needs to be updated to use the
679 // toc-pointer register r2, as the base register.
680 if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
681 uint32_t Instr = readInstrFromHalf16(Loc);
682 if (isInstructionUpdateForm(Instr))
683 error(getErrorLocation(Loc) +
684 "can't toc-optimize an update instruction: 0x" +
686 Instr = (Instr & 0xFFE00000) | 0x00020000;
687 writeInstrFromHalf16(Loc, Instr);
689 write16(Loc, lo(Val));
691 case R_PPC64_ADDR16_LO_DS:
692 case R_PPC64_TPREL16_LO_DS: {
693 // DQ-form instructions use bits 28-31 as part of the instruction encoding
694 // DS-form instructions only use bits 30-31.
695 uint32_t Inst = readInstrFromHalf16(Loc);
696 uint16_t Mask = isDQFormInstruction(Inst) ? 0xF : 0x3;
697 checkAlignment(Loc, lo(Val), Mask + 1, OriginalType);
698 if (Config->TocOptimize && ShouldTocOptimize && ha(Val) == 0) {
699 // When the high-adjusted part of a toc relocation evalutes to 0, it is
700 // changed into a nop. The lo part then needs to be updated to use the toc
701 // pointer register r2, as the base register.
702 if (isInstructionUpdateForm(Inst))
703 error(getErrorLocation(Loc) +
704 "Can't toc-optimize an update instruction: 0x" +
705 Twine::utohexstr(Inst));
706 Inst = (Inst & 0xFFE0000F) | 0x00020000;
707 writeInstrFromHalf16(Loc, Inst);
709 write16(Loc, (read16(Loc) & Mask) | lo(Val));
713 checkInt(Loc, Val, 32, Type);
721 case R_PPC64_REL14: {
722 uint32_t Mask = 0x0000FFFC;
723 checkInt(Loc, Val, 16, Type);
724 checkAlignment(Loc, Val, 4, Type);
725 write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
728 case R_PPC64_REL24: {
729 uint32_t Mask = 0x03FFFFFC;
730 checkInt(Loc, Val, 26, Type);
731 checkAlignment(Loc, Val, 4, Type);
732 write32(Loc, (read32(Loc) & ~Mask) | (Val & Mask));
735 case R_PPC64_DTPREL64:
736 write64(Loc, Val - DynamicThreadPointerOffset);
739 error(getErrorLocation(Loc) + "unrecognized reloc " + Twine(Type));
743 bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
744 uint64_t BranchAddr, const Symbol &S) const {
745 if (Type != R_PPC64_REL14 && Type != R_PPC64_REL24)
748 // If a function is in the Plt it needs to be called with a call-stub.
752 // If a symbol is a weak undefined and we are compiling an executable
753 // it doesn't need a range-extending thunk since it can't be called.
754 if (S.isUndefWeak() && !Config->Shared)
757 // If the offset exceeds the range of the branch type then it will need
758 // a range-extending thunk.
759 return !inBranchRange(Type, BranchAddr, S.getVA());
762 bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
763 int64_t Offset = Dst - Src;
764 if (Type == R_PPC64_REL14)
765 return isInt<16>(Offset);
766 if (Type == R_PPC64_REL24)
767 return isInt<26>(Offset);
768 llvm_unreachable("unsupported relocation type used in branch");
771 RelExpr PPC64::adjustRelaxExpr(RelType Type, const uint8_t *Data,
772 RelExpr Expr) const {
773 if (Expr == R_RELAX_TLS_GD_TO_IE)
774 return R_RELAX_TLS_GD_TO_IE_GOT_OFF;
775 if (Expr == R_RELAX_TLS_LD_TO_LE)
776 return R_RELAX_TLS_LD_TO_LE_ABS;
780 // Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement.
781 // The general dynamic code sequence for a global `x` uses 4 instructions.
782 // Instruction Relocation Symbol
783 // addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x
784 // addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x
785 // bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x
786 // R_PPC64_REL24 __tls_get_addr
789 // Relaxing to initial-exec entails:
790 // 1) Convert the addis/addi pair that builds the address of the tls_index
791 // struct for 'x' to an addis/ld pair that loads an offset from a got-entry.
792 // 2) Convert the call to __tls_get_addr to a nop.
793 // 3) Convert the nop following the call to an add of the loaded offset to the
795 // Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is
796 // used as the relaxation hint for both steps 2 and 3.
797 void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
799 case R_PPC64_GOT_TLSGD16_HA:
800 // This is relaxed from addis rT, r2, sym@got@tlsgd@ha to
801 // addis rT, r2, sym@got@tprel@ha.
802 relocateOne(Loc, R_PPC64_GOT_TPREL16_HA, Val);
804 case R_PPC64_GOT_TLSGD16_LO: {
805 // Relax from addi r3, rA, sym@got@tlsgd@l to
806 // ld r3, sym@got@tprel@l(rA)
807 uint32_t InputRegister = (readInstrFromHalf16(Loc) & (0x1f << 16));
808 writeInstrFromHalf16(Loc, 0xE8600000 | InputRegister);
809 relocateOne(Loc, R_PPC64_GOT_TPREL16_LO_DS, Val);
813 write32(Loc, 0x60000000); // bl __tls_get_addr(sym@tlsgd) --> nop
814 write32(Loc + 4, 0x7c636A14); // nop --> add r3, r3, r13
817 llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");
821 // The prologue for a split-stack function is expected to look roughly
823 // .Lglobal_entry_point:
824 // # TOC pointer initalization.
826 // .Llocal_entry_point:
827 // # load the __private_ss member of the threads tcbhead.
828 // ld r0,-0x7000-64(r13)
829 // # subtract the functions stack size from the stack pointer.
830 // addis r12, r1, ha(-stack-frame size)
831 // addi r12, r12, l(-stack-frame size)
832 // # compare needed to actual and branch to allocate_more_stack if more
833 // # space is needed, otherwise fallthrough to 'normal' function body.
835 // blt- cr7, .Lallocate_more_stack
837 // -) The allocate_more_stack block might be placed after the split-stack
838 // prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
840 // -) If either the addis or addi is not needed due to the stack size being
841 // smaller then 32K or a multiple of 64K they will be replaced with a nop,
842 // but there will always be 2 instructions the linker can overwrite for the
843 // adjusted stack size.
845 // The linkers job here is to increase the stack size used in the addis/addi
846 // pair by split-stack-size-adjust.
847 // addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
848 // addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
849 bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
850 uint8_t StOther) const {
851 // If the caller has a global entry point adjust the buffer past it. The start
852 // of the split-stack prologue will be at the local entry point.
853 Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther);
855 // At the very least we expect to see a load of some split-stack data from the
856 // tcb, and 2 instructions that calculate the ending stack address this
857 // function will require. If there is not enough room for at least 3
858 // instructions it can't be a split-stack prologue.
862 // First instruction must be `ld r0, -0x7000-64(r13)`
863 if (read32(Loc) != 0xe80d8fc0)
868 // First instruction can be either an addis if the frame size is larger then
869 // 32K, or an addi if the size is less then 32K.
870 int32_t FirstInstr = read32(Loc + 4);
871 if (getPrimaryOpCode(FirstInstr) == 15) {
872 HiImm = FirstInstr & 0xFFFF;
873 } else if (getPrimaryOpCode(FirstInstr) == 14) {
874 LoImm = FirstInstr & 0xFFFF;
879 // Second instruction is either an addi or a nop. If the first instruction was
880 // an addi then LoImm is set and the second instruction must be a nop.
881 uint32_t SecondInstr = read32(Loc + 8);
882 if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) {
883 LoImm = SecondInstr & 0xFFFF;
884 } else if (SecondInstr != 0x60000000) {
888 // The register operands of the first instruction should be the stack-pointer
889 // (r1) as the input (RA) and r12 as the output (RT). If the second
890 // instruction is not a nop, then it should use r12 as both input and output.
891 auto CheckRegOperands = [](uint32_t Instr, uint8_t ExpectedRT,
892 uint8_t ExpectedRA) {
893 return ((Instr & 0x3E00000) >> 21 == ExpectedRT) &&
894 ((Instr & 0x1F0000) >> 16 == ExpectedRA);
896 if (!CheckRegOperands(FirstInstr, 12, 1))
898 if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12))
901 int32_t StackFrameSize = (HiImm * 65536) + LoImm;
902 // Check that the adjusted size doesn't overflow what we can represent with 2
904 if (StackFrameSize < Config->SplitStackAdjustSize + INT32_MIN) {
905 error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows");
909 int32_t AdjustedStackFrameSize =
910 StackFrameSize - Config->SplitStackAdjustSize;
912 LoImm = AdjustedStackFrameSize & 0xFFFF;
913 HiImm = (AdjustedStackFrameSize + 0x8000) >> 16;
915 write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm);
916 // If the low immediate is zero the second instruction will be a nop.
917 SecondInstr = LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000;
918 write32(Loc + 8, SecondInstr);
921 write32(Loc + 4, (0x39810000) | (uint16_t)LoImm);
922 write32(Loc + 8, 0x60000000);
928 TargetInfo *elf::getPPC64TargetInfo() {