1 //===- InputSection.cpp ---------------------------------------------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputSection.h"
13 #include "InputFiles.h"
14 #include "LinkerScript.h"
15 #include "OutputSections.h"
16 #include "Relocations.h"
17 #include "SymbolTable.h"
19 #include "SyntheticSections.h"
22 #include "lld/Common/ErrorHandler.h"
23 #include "lld/Common/Memory.h"
24 #include "llvm/Object/Decompressor.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/Compression.h"
27 #include "llvm/Support/Endian.h"
28 #include "llvm/Support/Threading.h"
29 #include "llvm/Support/xxhash.h"
36 using namespace llvm::ELF;
37 using namespace llvm::object;
38 using namespace llvm::support;
39 using namespace llvm::support::endian;
40 using namespace llvm::sys;
43 using namespace lld::elf;
45 std::vector<InputSectionBase *> elf::InputSections;
47 // Returns a string to construct an error message.
48 std::string lld::toString(const InputSectionBase *Sec) {
49 return (toString(Sec->File) + ":(" + Sec->Name + ")").str();
53 static ArrayRef<uint8_t> getSectionContents(ObjFile<ELFT> &File,
54 const typename ELFT::Shdr &Hdr) {
55 if (Hdr.sh_type == SHT_NOBITS)
56 return makeArrayRef<uint8_t>(nullptr, Hdr.sh_size);
57 return check(File.getObj().getSectionContents(&Hdr));
60 InputSectionBase::InputSectionBase(InputFile *File, uint64_t Flags,
61 uint32_t Type, uint64_t Entsize,
62 uint32_t Link, uint32_t Info,
63 uint32_t Alignment, ArrayRef<uint8_t> Data,
64 StringRef Name, Kind SectionKind)
65 : SectionBase(SectionKind, Name, Flags, Entsize, Alignment, Type, Info,
67 File(File), Data(Data) {
68 // In order to reduce memory allocation, we assume that mergeable
69 // sections are smaller than 4 GiB, which is not an unreasonable
70 // assumption as of 2017.
71 if (SectionKind == SectionBase::Merge && Data.size() > UINT32_MAX)
72 error(toString(this) + ": section too large");
75 AreRelocsRela = false;
77 // The ELF spec states that a value of 0 means the section has
78 // no alignment constraits.
79 uint32_t V = std::max<uint64_t>(Alignment, 1);
80 if (!isPowerOf2_64(V))
81 fatal(toString(File) + ": section sh_addralign is not a power of 2");
85 // Drop SHF_GROUP bit unless we are producing a re-linkable object file.
86 // SHF_GROUP is a marker that a section belongs to some comdat group.
87 // That flag doesn't make sense in an executable.
88 static uint64_t getFlags(uint64_t Flags) {
89 Flags &= ~(uint64_t)SHF_INFO_LINK;
90 if (!Config->Relocatable)
91 Flags &= ~(uint64_t)SHF_GROUP;
95 // GNU assembler 2.24 and LLVM 4.0.0's MC (the newest release as of
96 // March 2017) fail to infer section types for sections starting with
97 // ".init_array." or ".fini_array.". They set SHT_PROGBITS instead of
98 // SHF_INIT_ARRAY. As a result, the following assembler directive
99 // creates ".init_array.100" with SHT_PROGBITS, for example.
101 // .section .init_array.100, "aw"
103 // This function forces SHT_{INIT,FINI}_ARRAY so that we can handle
104 // incorrect inputs as if they were correct from the beginning.
105 static uint64_t getType(uint64_t Type, StringRef Name) {
106 if (Type == SHT_PROGBITS && Name.startswith(".init_array."))
107 return SHT_INIT_ARRAY;
108 if (Type == SHT_PROGBITS && Name.startswith(".fini_array."))
109 return SHT_FINI_ARRAY;
113 template <class ELFT>
114 InputSectionBase::InputSectionBase(ObjFile<ELFT> &File,
115 const typename ELFT::Shdr &Hdr,
116 StringRef Name, Kind SectionKind)
117 : InputSectionBase(&File, getFlags(Hdr.sh_flags),
118 getType(Hdr.sh_type, Name), Hdr.sh_entsize, Hdr.sh_link,
119 Hdr.sh_info, Hdr.sh_addralign,
120 getSectionContents(File, Hdr), Name, SectionKind) {
121 // We reject object files having insanely large alignments even though
122 // they are allowed by the spec. I think 4GB is a reasonable limitation.
123 // We might want to relax this in the future.
124 if (Hdr.sh_addralign > UINT32_MAX)
125 fatal(toString(&File) + ": section sh_addralign is too large");
128 size_t InputSectionBase::getSize() const {
129 if (auto *S = dyn_cast<SyntheticSection>(this))
135 uint64_t InputSectionBase::getOffsetInFile() const {
136 const uint8_t *FileStart = (const uint8_t *)File->MB.getBufferStart();
137 const uint8_t *SecStart = Data.begin();
138 return SecStart - FileStart;
141 uint64_t SectionBase::getOffset(uint64_t Offset) const {
144 auto *OS = cast<OutputSection>(this);
145 // For output sections we treat offset -1 as the end of the section.
146 return Offset == uint64_t(-1) ? OS->Size : Offset;
150 return cast<InputSection>(this)->getOffset(Offset);
152 // The file crtbeginT.o has relocations pointing to the start of an empty
153 // .eh_frame that is known to be the first in the link. It does that to
154 // identify the start of the output .eh_frame.
157 const MergeInputSection *MS = cast<MergeInputSection>(this);
158 if (InputSection *IS = MS->getParent())
159 return IS->getOffset(MS->getParentOffset(Offset));
160 return MS->getParentOffset(Offset);
162 llvm_unreachable("invalid section kind");
165 uint64_t SectionBase::getVA(uint64_t Offset) const {
166 const OutputSection *Out = getOutputSection();
167 return (Out ? Out->Addr : 0) + getOffset(Offset);
170 OutputSection *SectionBase::getOutputSection() {
172 if (auto *IS = dyn_cast<InputSection>(this))
174 else if (auto *MS = dyn_cast<MergeInputSection>(this))
175 Sec = MS->getParent();
176 else if (auto *EH = dyn_cast<EhInputSection>(this))
177 Sec = EH->getParent();
179 return cast<OutputSection>(this);
180 return Sec ? Sec->getParent() : nullptr;
183 // Decompress section contents if required. Note that this function
184 // is called from parallelForEach, so it must be thread-safe.
185 void InputSectionBase::maybeDecompress() {
188 if (!(Flags & SHF_COMPRESSED) && !Name.startswith(".zdebug"))
191 // Decompress a section.
192 Decompressor Dec = check(Decompressor::create(Name, toStringRef(Data),
193 Config->IsLE, Config->Is64));
195 size_t Size = Dec.getDecompressedSize();
196 DecompressBuf.reset(new char[Size + Name.size()]());
197 if (Error E = Dec.decompress({DecompressBuf.get(), Size}))
198 fatal(toString(this) +
199 ": decompress failed: " + llvm::toString(std::move(E)));
201 Data = makeArrayRef((uint8_t *)DecompressBuf.get(), Size);
202 Flags &= ~(uint64_t)SHF_COMPRESSED;
204 // A section name may have been altered if compressed. If that's
205 // the case, restore the original name. (i.e. ".zdebug_" -> ".debug_")
206 if (Name.startswith(".zdebug")) {
207 DecompressBuf[Size] = '.';
208 memcpy(&DecompressBuf[Size + 1], Name.data() + 2, Name.size() - 2);
209 Name = StringRef(&DecompressBuf[Size], Name.size() - 1);
213 InputSection *InputSectionBase::getLinkOrderDep() const {
215 assert(Flags & SHF_LINK_ORDER);
216 return cast<InputSection>(File->getSections()[Link]);
219 // Find a function symbol that encloses a given location.
220 template <class ELFT>
221 Defined *InputSectionBase::getEnclosingFunction(uint64_t Offset) {
222 for (Symbol *B : File->getSymbols())
223 if (Defined *D = dyn_cast<Defined>(B))
224 if (D->Section == this && D->Type == STT_FUNC && D->Value <= Offset &&
225 Offset < D->Value + D->Size)
230 // Returns a source location string. Used to construct an error message.
231 template <class ELFT>
232 std::string InputSectionBase::getLocation(uint64_t Offset) {
233 // We don't have file for synthetic sections.
234 if (getFile<ELFT>() == nullptr)
235 return (Config->OutputFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")")
238 // First check if we can get desired values from debugging information.
239 if (Optional<DILineInfo> Info = getFile<ELFT>()->getDILineInfo(this, Offset))
240 return Info->FileName + ":" + std::to_string(Info->Line);
242 // File->SourceFile contains STT_FILE symbol that contains a
243 // source file name. If it's missing, we use an object file name.
244 std::string SrcFile = getFile<ELFT>()->SourceFile;
246 SrcFile = toString(File);
248 if (Defined *D = getEnclosingFunction<ELFT>(Offset))
249 return SrcFile + ":(function " + toString(*D) + ")";
251 // If there's no symbol, print out the offset in the section.
252 return (SrcFile + ":(" + Name + "+0x" + utohexstr(Offset) + ")").str();
255 // This function is intended to be used for constructing an error message.
256 // The returned message looks like this:
258 // foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42)
260 // Returns an empty string if there's no way to get line info.
261 std::string InputSectionBase::getSrcMsg(const Symbol &Sym, uint64_t Offset) {
262 // Synthetic sections don't have input files.
265 return File->getSrcMsg(Sym, *this, Offset);
268 // Returns a filename string along with an optional section name. This
269 // function is intended to be used for constructing an error
270 // message. The returned message looks like this:
272 // path/to/foo.o:(function bar)
276 // path/to/foo.o:(function bar) in archive path/to/bar.a
277 std::string InputSectionBase::getObjMsg(uint64_t Off) {
278 // Synthetic sections don't have input files.
280 return ("<internal>:(" + Name + "+0x" + utohexstr(Off) + ")").str();
281 std::string Filename = File->getName();
284 if (!File->ArchiveName.empty())
285 Archive = " in archive " + File->ArchiveName;
287 // Find a symbol that encloses a given location.
288 for (Symbol *B : File->getSymbols())
289 if (auto *D = dyn_cast<Defined>(B))
290 if (D->Section == this && D->Value <= Off && Off < D->Value + D->Size)
291 return Filename + ":(" + toString(*D) + ")" + Archive;
293 // If there's no symbol, print out the offset in the section.
294 return (Filename + ":(" + Name + "+0x" + utohexstr(Off) + ")" + Archive)
298 InputSection InputSection::Discarded(nullptr, 0, 0, 0, ArrayRef<uint8_t>(), "");
300 InputSection::InputSection(InputFile *F, uint64_t Flags, uint32_t Type,
301 uint32_t Alignment, ArrayRef<uint8_t> Data,
302 StringRef Name, Kind K)
303 : InputSectionBase(F, Flags, Type,
304 /*Entsize*/ 0, /*Link*/ 0, /*Info*/ 0, Alignment, Data,
307 template <class ELFT>
308 InputSection::InputSection(ObjFile<ELFT> &F, const typename ELFT::Shdr &Header,
310 : InputSectionBase(F, Header, Name, InputSectionBase::Regular) {}
312 bool InputSection::classof(const SectionBase *S) {
313 return S->kind() == SectionBase::Regular ||
314 S->kind() == SectionBase::Synthetic;
317 OutputSection *InputSection::getParent() const {
318 return cast_or_null<OutputSection>(Parent);
321 // Copy SHT_GROUP section contents. Used only for the -r option.
322 template <class ELFT> void InputSection::copyShtGroup(uint8_t *Buf) {
323 // ELFT::Word is the 32-bit integral type in the target endianness.
324 typedef typename ELFT::Word u32;
325 ArrayRef<u32> From = getDataAs<u32>();
326 auto *To = reinterpret_cast<u32 *>(Buf);
328 // The first entry is not a section number but a flag.
331 // Adjust section numbers because section numbers in an input object
332 // files are different in the output.
333 ArrayRef<InputSectionBase *> Sections = File->getSections();
334 for (uint32_t Idx : From.slice(1))
335 *To++ = Sections[Idx]->getOutputSection()->SectionIndex;
338 InputSectionBase *InputSection::getRelocatedSection() const {
339 if (!File || (Type != SHT_RELA && Type != SHT_REL))
341 ArrayRef<InputSectionBase *> Sections = File->getSections();
342 return Sections[Info];
345 // This is used for -r and --emit-relocs. We can't use memcpy to copy
346 // relocations because we need to update symbol table offset and section index
347 // for each relocation. So we copy relocations one by one.
348 template <class ELFT, class RelTy>
349 void InputSection::copyRelocations(uint8_t *Buf, ArrayRef<RelTy> Rels) {
350 InputSectionBase *Sec = getRelocatedSection();
352 for (const RelTy &Rel : Rels) {
353 RelType Type = Rel.getType(Config->IsMips64EL);
354 Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel);
356 auto *P = reinterpret_cast<typename ELFT::Rela *>(Buf);
357 Buf += sizeof(RelTy);
360 P->r_addend = getAddend<ELFT>(Rel);
362 // Output section VA is zero for -r, so r_offset is an offset within the
363 // section, but for --emit-relocs it is an virtual address.
364 P->r_offset = Sec->getVA(Rel.r_offset);
365 P->setSymbolAndType(InX::SymTab->getSymbolIndex(&Sym), Type,
368 if (Sym.Type == STT_SECTION) {
369 // We combine multiple section symbols into only one per
370 // section. This means we have to update the addend. That is
371 // trivial for Elf_Rela, but for Elf_Rel we have to write to the
372 // section data. We do that by adding to the Relocation vector.
374 // .eh_frame is horribly special and can reference discarded sections. To
375 // avoid having to parse and recreate .eh_frame, we just replace any
376 // relocation in it pointing to discarded sections with R_*_NONE, which
377 // hopefully creates a frame that is ignored at runtime.
378 auto *D = dyn_cast<Defined>(&Sym);
380 error("STT_SECTION symbol should be defined");
383 SectionBase *Section = D->Section;
384 if (Section == &InputSection::Discarded) {
385 P->setSymbolAndType(0, 0, false);
389 int64_t Addend = getAddend<ELFT>(Rel);
390 const uint8_t *BufLoc = Sec->Data.begin() + Rel.r_offset;
392 Addend = Target->getImplicitAddend(BufLoc, Type);
394 if (Config->EMachine == EM_MIPS && Config->Relocatable &&
395 Target->getRelExpr(Type, Sym, BufLoc) == R_MIPS_GOTREL) {
396 // Some MIPS relocations depend on "gp" value. By default,
397 // this value has 0x7ff0 offset from a .got section. But
398 // relocatable files produced by a complier or a linker
399 // might redefine this default value and we must use it
400 // for a calculation of the relocation result. When we
401 // generate EXE or DSO it's trivial. Generating a relocatable
402 // output is more difficult case because the linker does
403 // not calculate relocations in this mode and loses
404 // individual "gp" values used by each input object file.
405 // As a workaround we add the "gp" value to the relocation
406 // addend and save it back to the file.
407 Addend += Sec->getFile<ELFT>()->MipsGp0;
411 P->r_addend = Sym.getVA(Addend) - Section->getOutputSection()->Addr;
412 else if (Config->Relocatable)
413 Sec->Relocations.push_back({R_ABS, Type, Rel.r_offset, Addend, &Sym});
418 // The ARM and AArch64 ABI handle pc-relative relocations to undefined weak
419 // references specially. The general rule is that the value of the symbol in
420 // this context is the address of the place P. A further special case is that
421 // branch relocations to an undefined weak reference resolve to the next
423 static uint32_t getARMUndefinedRelativeWeakVA(RelType Type, uint32_t A,
426 // Unresolved branch relocations to weak references resolve to next
427 // instruction, this will be either 2 or 4 bytes on from P.
428 case R_ARM_THM_JUMP11:
435 case R_ARM_THM_JUMP19:
436 case R_ARM_THM_JUMP24:
439 // We don't want an interworking BLX to ARM
441 // Unresolved non branch pc-relative relocations
442 // R_ARM_TARGET2 which can be resolved relatively is not present as it never
443 // targets a weak-reference.
444 case R_ARM_MOVW_PREL_NC:
445 case R_ARM_MOVT_PREL:
447 case R_ARM_THM_MOVW_PREL_NC:
448 case R_ARM_THM_MOVT_PREL:
451 llvm_unreachable("ARM pc-relative relocation expected\n");
454 // The comment above getARMUndefinedRelativeWeakVA applies to this function.
455 static uint64_t getAArch64UndefinedRelativeWeakVA(uint64_t Type, uint64_t A,
458 // Unresolved branch relocations to weak references resolve to next
459 // instruction, this is 4 bytes on from P.
460 case R_AARCH64_CALL26:
461 case R_AARCH64_CONDBR19:
462 case R_AARCH64_JUMP26:
463 case R_AARCH64_TSTBR14:
465 // Unresolved non branch pc-relative relocations
466 case R_AARCH64_PREL16:
467 case R_AARCH64_PREL32:
468 case R_AARCH64_PREL64:
469 case R_AARCH64_ADR_PREL_LO21:
470 case R_AARCH64_LD_PREL_LO19:
473 llvm_unreachable("AArch64 pc-relative relocation expected\n");
476 // ARM SBREL relocations are of the form S + A - B where B is the static base
477 // The ARM ABI defines base to be "addressing origin of the output segment
478 // defining the symbol S". We defined the "addressing origin"/static base to be
479 // the base of the PT_LOAD segment containing the Sym.
480 // The procedure call standard only defines a Read Write Position Independent
481 // RWPI variant so in practice we should expect the static base to be the base
482 // of the RW segment.
483 static uint64_t getARMStaticBase(const Symbol &Sym) {
484 OutputSection *OS = Sym.getOutputSection();
485 if (!OS || !OS->PtLoad || !OS->PtLoad->FirstSec)
486 fatal("SBREL relocation to " + Sym.getName() + " without static base");
487 return OS->PtLoad->FirstSec->Addr;
490 static uint64_t getRelocTargetVA(const InputFile *File, RelType Type, int64_t A,
491 uint64_t P, const Symbol &Sym, RelExpr Expr) {
496 case R_RELAX_TLS_LD_TO_LE_ABS:
497 case R_RELAX_GOT_PC_NOPIC:
502 return Sym.getVA(A) - getARMStaticBase(Sym);
504 case R_RELAX_TLS_GD_TO_IE_ABS:
505 return Sym.getGotVA() + A;
507 return InX::Got->getVA() + A - P;
508 case R_GOTONLY_PC_FROM_END:
509 return InX::Got->getVA() + A - P + InX::Got->getSize();
511 return Sym.getVA(A) - InX::Got->getVA();
512 case R_GOTREL_FROM_END:
513 return Sym.getVA(A) - InX::Got->getVA() - InX::Got->getSize();
515 case R_RELAX_TLS_GD_TO_IE_END:
516 return Sym.getGotOffset() + A - InX::Got->getSize();
517 case R_TLSLD_GOT_OFF:
519 case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
520 return Sym.getGotOffset() + A;
522 case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
523 return getAArch64Page(Sym.getGotVA() + A) - getAArch64Page(P);
525 case R_RELAX_TLS_GD_TO_IE:
526 return Sym.getGotVA() + A - P;
531 llvm_unreachable("cannot relocate hint relocs");
533 return Sym.getVA(A) - InX::MipsGot->getGp(File);
535 return InX::MipsGot->getGp(File) + A;
536 case R_MIPS_GOT_GP_PC: {
537 // R_MIPS_LO16 expression has R_MIPS_GOT_GP_PC type iif the target
538 // is _gp_disp symbol. In that case we should use the following
539 // formula for calculation "AHL + GP - P + 4". For details see p. 4-19 at
540 // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
541 // microMIPS variants of these relocations use slightly different
542 // expressions: AHL + GP - P + 3 for %lo() and AHL + GP - P - 1 for %hi()
543 // to correctly handle less-sugnificant bit of the microMIPS symbol.
544 uint64_t V = InX::MipsGot->getGp(File) + A - P;
545 if (Type == R_MIPS_LO16 || Type == R_MICROMIPS_LO16)
547 if (Type == R_MICROMIPS_LO16 || Type == R_MICROMIPS_HI16)
551 case R_MIPS_GOT_LOCAL_PAGE:
552 // If relocation against MIPS local symbol requires GOT entry, this entry
553 // should be initialized by 'page address'. This address is high 16-bits
554 // of sum the symbol's value and the addend.
555 return InX::MipsGot->getVA() +
556 InX::MipsGot->getPageEntryOffset(File, Sym, A) -
557 InX::MipsGot->getGp(File);
559 case R_MIPS_GOT_OFF32:
560 // In case of MIPS if a GOT relocation has non-zero addend this addend
561 // should be applied to the GOT entry content not to the GOT entry offset.
562 // That is why we use separate expression type.
563 return InX::MipsGot->getVA() +
564 InX::MipsGot->getSymEntryOffset(File, Sym, A) -
565 InX::MipsGot->getGp(File);
567 return InX::MipsGot->getVA() + InX::MipsGot->getGlobalDynOffset(File, Sym) -
568 InX::MipsGot->getGp(File);
570 return InX::MipsGot->getVA() + InX::MipsGot->getTlsIndexOffset(File) -
571 InX::MipsGot->getGp(File);
573 case R_PLT_PAGE_PC: {
575 if (Sym.isUndefWeak())
576 Dest = getAArch64Page(A);
578 Dest = getAArch64Page(Sym.getVA(A));
579 return Dest - getAArch64Page(P);
583 if (Sym.isUndefWeak()) {
584 // On ARM and AArch64 a branch to an undefined weak resolves to the
585 // next instruction, otherwise the place.
586 if (Config->EMachine == EM_ARM)
587 Dest = getARMUndefinedRelativeWeakVA(Type, A, P);
588 else if (Config->EMachine == EM_AARCH64)
589 Dest = getAArch64UndefinedRelativeWeakVA(Type, A, P);
598 return Sym.getPltVA() + A;
601 return Sym.getPltVA() + A - P;
603 uint64_t SymVA = Sym.getVA(A);
604 // If we have an undefined weak symbol, we might get here with a symbol
605 // address of zero. That could overflow, but the code must be unreachable,
606 // so don't bother doing anything at all.
610 // PPC64 V2 ABI describes two entry points to a function. The global entry
611 // point sets up the TOC base pointer. When calling a local function, the
612 // call should branch to the local entry point rather than the global entry
613 // point. Section 3.4.1 describes using the 3 most significant bits of the
614 // st_other field to find out how many instructions there are between the
615 // local and global entry point.
616 uint8_t StOther = (Sym.StOther >> 5) & 7;
617 if (StOther == 0 || StOther == 1)
620 return SymVA - P + (1LL << StOther);
623 return getPPC64TocBase() + A;
625 return Sym.getVA(A) - P;
626 case R_RELAX_TLS_GD_TO_LE:
627 case R_RELAX_TLS_IE_TO_LE:
628 case R_RELAX_TLS_LD_TO_LE:
630 // A weak undefined TLS symbol resolves to the base of the TLS
631 // block, i.e. gets a value of zero. If we pass --gc-sections to
632 // lld and .tbss is not referenced, it gets reclaimed and we don't
633 // create a TLS program header. Therefore, we resolve this
634 // statically to zero.
635 if (Sym.isTls() && Sym.isUndefWeak())
638 // For TLS variant 1 the TCB is a fixed size, whereas for TLS variant 2 the
639 // TCB is on unspecified size and content. Targets that implement variant 1
640 // should set TcbSize.
641 if (Target->TcbSize) {
642 // PPC64 V2 ABI has the thread pointer offset into the middle of the TLS
643 // storage area by TlsTpOffset for efficient addressing TCB and up to
644 // 4KB – 8 B of other thread library information (placed before the TCB).
645 // Subtracting this offset will get the address of the first TLS block.
646 if (Target->TlsTpOffset)
647 return Sym.getVA(A) - Target->TlsTpOffset;
649 // If thread pointer is not offset into the middle, the first thing in the
650 // TLS storage area is the TCB. Add the TcbSize to get the address of the
652 return Sym.getVA(A) + alignTo(Target->TcbSize, Out::TlsPhdr->p_align);
654 return Sym.getVA(A) - Out::TlsPhdr->p_memsz;
655 case R_RELAX_TLS_GD_TO_LE_NEG:
657 return Out::TlsPhdr->p_memsz - Sym.getVA(A);
659 return Sym.getSize() + A;
661 return InX::Got->getGlobalDynAddr(Sym) + A;
663 return getAArch64Page(InX::Got->getGlobalDynAddr(Sym) + A) -
666 return InX::Got->getGlobalDynOffset(Sym) + A;
667 case R_TLSGD_GOT_FROM_END:
668 return InX::Got->getGlobalDynOffset(Sym) + A - InX::Got->getSize();
670 return InX::Got->getGlobalDynAddr(Sym) + A - P;
671 case R_TLSLD_GOT_FROM_END:
672 return InX::Got->getTlsIndexOff() + A - InX::Got->getSize();
674 return InX::Got->getTlsIndexOff() + A;
676 return InX::Got->getTlsIndexVA() + A - P;
678 llvm_unreachable("Invalid expression");
681 // This function applies relocations to sections without SHF_ALLOC bit.
682 // Such sections are never mapped to memory at runtime. Debug sections are
683 // an example. Relocations in non-alloc sections are much easier to
684 // handle than in allocated sections because it will never need complex
685 // treatement such as GOT or PLT (because at runtime no one refers them).
686 // So, we handle relocations for non-alloc sections directly in this
687 // function as a performance optimization.
688 template <class ELFT, class RelTy>
689 void InputSection::relocateNonAlloc(uint8_t *Buf, ArrayRef<RelTy> Rels) {
690 const unsigned Bits = sizeof(typename ELFT::uint) * 8;
692 for (const RelTy &Rel : Rels) {
693 RelType Type = Rel.getType(Config->IsMips64EL);
695 // GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations
696 // against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed
697 // in 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we
698 // need to keep this bug-compatible code for a while.
699 if (Config->EMachine == EM_386 && Type == R_386_GOTPC)
702 uint64_t Offset = getOffset(Rel.r_offset);
703 uint8_t *BufLoc = Buf + Offset;
704 int64_t Addend = getAddend<ELFT>(Rel);
706 Addend += Target->getImplicitAddend(BufLoc, Type);
708 Symbol &Sym = getFile<ELFT>()->getRelocTargetSym(Rel);
709 RelExpr Expr = Target->getRelExpr(Type, Sym, BufLoc);
714 std::string Msg = getLocation<ELFT>(Offset) +
715 ": has non-ABS relocation " + toString(Type) +
716 " against symbol '" + toString(Sym) + "'";
722 // If the control reaches here, we found a PC-relative relocation in a
723 // non-ALLOC section. Since non-ALLOC section is not loaded into memory
724 // at runtime, the notion of PC-relative doesn't make sense here. So,
725 // this is a usage error. However, GNU linkers historically accept such
726 // relocations without any errors and relocate them as if they were at
727 // address 0. For bug-compatibilty, we accept them with warnings. We
728 // know Steel Bank Common Lisp as of 2018 have this bug.
730 Target->relocateOne(BufLoc, Type,
731 SignExtend64<Bits>(Sym.getVA(Addend - Offset)));
735 if (Sym.isTls() && !Out::TlsPhdr)
736 Target->relocateOne(BufLoc, Type, 0);
738 Target->relocateOne(BufLoc, Type, SignExtend64<Bits>(Sym.getVA(Addend)));
742 // This is used when '-r' is given.
743 // For REL targets, InputSection::copyRelocations() may store artificial
744 // relocations aimed to update addends. They are handled in relocateAlloc()
745 // for allocatable sections, and this function does the same for
746 // non-allocatable sections, such as sections with debug information.
747 static void relocateNonAllocForRelocatable(InputSection *Sec, uint8_t *Buf) {
748 const unsigned Bits = Config->Is64 ? 64 : 32;
750 for (const Relocation &Rel : Sec->Relocations) {
751 // InputSection::copyRelocations() adds only R_ABS relocations.
752 assert(Rel.Expr == R_ABS);
753 uint8_t *BufLoc = Buf + Rel.Offset + Sec->OutSecOff;
754 uint64_t TargetVA = SignExtend64(Rel.Sym->getVA(Rel.Addend), Bits);
755 Target->relocateOne(BufLoc, Rel.Type, TargetVA);
759 template <class ELFT>
760 void InputSectionBase::relocate(uint8_t *Buf, uint8_t *BufEnd) {
761 if (Flags & SHF_EXECINSTR)
762 adjustSplitStackFunctionPrologues<ELFT>(Buf, BufEnd);
764 if (Flags & SHF_ALLOC) {
765 relocateAlloc(Buf, BufEnd);
769 auto *Sec = cast<InputSection>(this);
770 if (Config->Relocatable)
771 relocateNonAllocForRelocatable(Sec, Buf);
772 else if (Sec->AreRelocsRela)
773 Sec->relocateNonAlloc<ELFT>(Buf, Sec->template relas<ELFT>());
775 Sec->relocateNonAlloc<ELFT>(Buf, Sec->template rels<ELFT>());
778 void InputSectionBase::relocateAlloc(uint8_t *Buf, uint8_t *BufEnd) {
779 assert(Flags & SHF_ALLOC);
780 const unsigned Bits = Config->Wordsize * 8;
782 for (const Relocation &Rel : Relocations) {
783 uint64_t Offset = Rel.Offset;
784 if (auto *Sec = dyn_cast<InputSection>(this))
785 Offset += Sec->OutSecOff;
786 uint8_t *BufLoc = Buf + Offset;
787 RelType Type = Rel.Type;
789 uint64_t AddrLoc = getOutputSection()->Addr + Offset;
790 RelExpr Expr = Rel.Expr;
791 uint64_t TargetVA = SignExtend64(
792 getRelocTargetVA(File, Type, Rel.Addend, AddrLoc, *Rel.Sym, Expr),
797 case R_RELAX_GOT_PC_NOPIC:
798 Target->relaxGot(BufLoc, TargetVA);
800 case R_RELAX_TLS_IE_TO_LE:
801 Target->relaxTlsIeToLe(BufLoc, Type, TargetVA);
803 case R_RELAX_TLS_LD_TO_LE:
804 case R_RELAX_TLS_LD_TO_LE_ABS:
805 Target->relaxTlsLdToLe(BufLoc, Type, TargetVA);
807 case R_RELAX_TLS_GD_TO_LE:
808 case R_RELAX_TLS_GD_TO_LE_NEG:
809 Target->relaxTlsGdToLe(BufLoc, Type, TargetVA);
811 case R_RELAX_TLS_GD_TO_IE:
812 case R_RELAX_TLS_GD_TO_IE_ABS:
813 case R_RELAX_TLS_GD_TO_IE_GOT_OFF:
814 case R_RELAX_TLS_GD_TO_IE_PAGE_PC:
815 case R_RELAX_TLS_GD_TO_IE_END:
816 Target->relaxTlsGdToIe(BufLoc, Type, TargetVA);
819 // If this is a call to __tls_get_addr, it may be part of a TLS
820 // sequence that has been relaxed and turned into a nop. In this
821 // case, we don't want to handle it as a call.
822 if (read32(BufLoc) == 0x60000000) // nop
825 // Patch a nop (0x60000000) to a ld.
826 if (Rel.Sym->NeedsTocRestore) {
827 if (BufLoc + 8 > BufEnd || read32(BufLoc + 4) != 0x60000000) {
828 error(getErrorLocation(BufLoc) + "call lacks nop, can't restore toc");
831 write32(BufLoc + 4, 0xe8410018); // ld %r2, 24(%r1)
833 Target->relocateOne(BufLoc, Type, TargetVA);
836 Target->relocateOne(BufLoc, Type, TargetVA);
842 // For each function-defining prologue, find any calls to __morestack,
843 // and replace them with calls to __morestack_non_split.
844 static void switchMorestackCallsToMorestackNonSplit(
845 DenseSet<Defined *> &Prologues, std::vector<Relocation *> &MorestackCalls) {
847 // If the target adjusted a function's prologue, all calls to
848 // __morestack inside that function should be switched to
849 // __morestack_non_split.
850 Symbol *MoreStackNonSplit = Symtab->find("__morestack_non_split");
852 // Sort both collections to compare addresses efficiently.
853 llvm::sort(MorestackCalls.begin(), MorestackCalls.end(),
854 [](const Relocation *L, const Relocation *R) {
855 return L->Offset < R->Offset;
857 std::vector<Defined *> Functions(Prologues.begin(), Prologues.end());
859 Functions.begin(), Functions.end(),
860 [](const Defined *L, const Defined *R) { return L->Value < R->Value; });
862 auto It = MorestackCalls.begin();
863 for (Defined *F : Functions) {
864 // Find the first call to __morestack within the function.
865 while (It != MorestackCalls.end() && (*It)->Offset < F->Value)
867 // Adjust all calls inside the function.
868 while (It != MorestackCalls.end() && (*It)->Offset < F->Value + F->Size) {
869 (*It)->Sym = MoreStackNonSplit;
875 static bool enclosingPrologueAdjusted(uint64_t Offset,
876 const DenseSet<Defined *> &Prologues) {
877 for (Defined *F : Prologues)
878 if (F->Value <= Offset && Offset < F->Value + F->Size)
883 // If a function compiled for split stack calls a function not
884 // compiled for split stack, then the caller needs its prologue
885 // adjusted to ensure that the called function will have enough stack
886 // available. Find those functions, and adjust their prologues.
887 template <class ELFT>
888 void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
890 if (!getFile<ELFT>()->SplitStack)
892 DenseSet<Defined *> AdjustedPrologues;
893 std::vector<Relocation *> MorestackCalls;
895 for (Relocation &Rel : Relocations) {
896 // Local symbols can't possibly be cross-calls, and should have been
897 // resolved long before this line.
898 if (Rel.Sym->isLocal())
901 Defined *D = dyn_cast<Defined>(Rel.Sym);
902 // A reference to an undefined symbol was an error, and should not
903 // have gotten to this point.
907 // Ignore calls into the split-stack api.
908 if (D->getName().startswith("__morestack")) {
909 if (D->getName().equals("__morestack"))
910 MorestackCalls.push_back(&Rel);
914 // A relocation to non-function isn't relevant. Sometimes
915 // __morestack is not marked as a function, so this check comes
916 // after the name check.
917 if (D->Type != STT_FUNC)
920 if (enclosingPrologueAdjusted(Rel.Offset, AdjustedPrologues))
923 if (Defined *F = getEnclosingFunction<ELFT>(Rel.Offset)) {
924 if (Target->adjustPrologueForCrossSplitStack(Buf + F->Value, End)) {
925 AdjustedPrologues.insert(F);
929 if (!getFile<ELFT>()->SomeNoSplitStack)
930 error("function call at " + getErrorLocation(Buf + Rel.Offset) +
931 "crosses a split-stack boundary, but unable " +
932 "to adjust the enclosing function's prologue");
934 switchMorestackCallsToMorestackNonSplit(AdjustedPrologues, MorestackCalls);
937 template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
938 if (Type == SHT_NOBITS)
941 if (auto *S = dyn_cast<SyntheticSection>(this)) {
942 S->writeTo(Buf + OutSecOff);
946 // If -r or --emit-relocs is given, then an InputSection
947 // may be a relocation section.
948 if (Type == SHT_RELA) {
949 copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rela>());
952 if (Type == SHT_REL) {
953 copyRelocations<ELFT>(Buf + OutSecOff, getDataAs<typename ELFT::Rel>());
957 // If -r is given, we may have a SHT_GROUP section.
958 if (Type == SHT_GROUP) {
959 copyShtGroup<ELFT>(Buf + OutSecOff);
963 // Copy section contents from source object file to output file
964 // and then apply relocations.
965 memcpy(Buf + OutSecOff, Data.data(), Data.size());
966 uint8_t *BufEnd = Buf + OutSecOff + Data.size();
967 relocate<ELFT>(Buf, BufEnd);
970 void InputSection::replace(InputSection *Other) {
971 Alignment = std::max(Alignment, Other->Alignment);
976 template <class ELFT>
977 EhInputSection::EhInputSection(ObjFile<ELFT> &F,
978 const typename ELFT::Shdr &Header,
980 : InputSectionBase(F, Header, Name, InputSectionBase::EHFrame) {}
982 SyntheticSection *EhInputSection::getParent() const {
983 return cast_or_null<SyntheticSection>(Parent);
986 // Returns the index of the first relocation that points to a region between
987 // Begin and Begin+Size.
988 template <class IntTy, class RelTy>
989 static unsigned getReloc(IntTy Begin, IntTy Size, const ArrayRef<RelTy> &Rels,
991 // Start search from RelocI for fast access. That works because the
992 // relocations are sorted in .eh_frame.
993 for (unsigned N = Rels.size(); RelocI < N; ++RelocI) {
994 const RelTy &Rel = Rels[RelocI];
995 if (Rel.r_offset < Begin)
998 if (Rel.r_offset < Begin + Size)
1005 // .eh_frame is a sequence of CIE or FDE records.
1006 // This function splits an input section into records and returns them.
1007 template <class ELFT> void EhInputSection::split() {
1009 split<ELFT>(relas<ELFT>());
1011 split<ELFT>(rels<ELFT>());
1014 template <class ELFT, class RelTy>
1015 void EhInputSection::split(ArrayRef<RelTy> Rels) {
1017 for (size_t Off = 0, End = Data.size(); Off != End;) {
1018 size_t Size = readEhRecordSize(this, Off);
1019 Pieces.emplace_back(Off, this, Size, getReloc(Off, Size, Rels, RelI));
1020 // The empty record is the end marker.
1027 static size_t findNull(StringRef S, size_t EntSize) {
1028 // Optimize the common case.
1032 for (unsigned I = 0, N = S.size(); I != N; I += EntSize) {
1033 const char *B = S.begin() + I;
1034 if (std::all_of(B, B + EntSize, [](char C) { return C == 0; }))
1037 return StringRef::npos;
1040 SyntheticSection *MergeInputSection::getParent() const {
1041 return cast_or_null<SyntheticSection>(Parent);
1044 // Split SHF_STRINGS section. Such section is a sequence of
1045 // null-terminated strings.
1046 void MergeInputSection::splitStrings(ArrayRef<uint8_t> Data, size_t EntSize) {
1048 bool IsAlloc = Flags & SHF_ALLOC;
1049 StringRef S = toStringRef(Data);
1051 while (!S.empty()) {
1052 size_t End = findNull(S, EntSize);
1053 if (End == StringRef::npos)
1054 fatal(toString(this) + ": string is not null terminated");
1055 size_t Size = End + EntSize;
1057 Pieces.emplace_back(Off, xxHash64(S.substr(0, Size)), !IsAlloc);
1063 // Split non-SHF_STRINGS section. Such section is a sequence of
1064 // fixed size records.
1065 void MergeInputSection::splitNonStrings(ArrayRef<uint8_t> Data,
1067 size_t Size = Data.size();
1068 assert((Size % EntSize) == 0);
1069 bool IsAlloc = Flags & SHF_ALLOC;
1071 for (size_t I = 0; I != Size; I += EntSize)
1072 Pieces.emplace_back(I, xxHash64(Data.slice(I, EntSize)), !IsAlloc);
1075 template <class ELFT>
1076 MergeInputSection::MergeInputSection(ObjFile<ELFT> &F,
1077 const typename ELFT::Shdr &Header,
1079 : InputSectionBase(F, Header, Name, InputSectionBase::Merge) {}
1081 MergeInputSection::MergeInputSection(uint64_t Flags, uint32_t Type,
1082 uint64_t Entsize, ArrayRef<uint8_t> Data,
1084 : InputSectionBase(nullptr, Flags, Type, Entsize, /*Link*/ 0, /*Info*/ 0,
1085 /*Alignment*/ Entsize, Data, Name, SectionBase::Merge) {}
1087 // This function is called after we obtain a complete list of input sections
1088 // that need to be linked. This is responsible to split section contents
1089 // into small chunks for further processing.
1091 // Note that this function is called from parallelForEach. This must be
1092 // thread-safe (i.e. no memory allocation from the pools).
1093 void MergeInputSection::splitIntoPieces() {
1094 assert(Pieces.empty());
1096 if (Flags & SHF_STRINGS)
1097 splitStrings(Data, Entsize);
1099 splitNonStrings(Data, Entsize);
1101 OffsetMap.reserve(Pieces.size());
1102 for (size_t I = 0, E = Pieces.size(); I != E; ++I)
1103 OffsetMap[Pieces[I].InputOff] = I;
1106 template <class It, class T, class Compare>
1107 static It fastUpperBound(It First, It Last, const T &Value, Compare Comp) {
1108 size_t Size = std::distance(First, Last);
1111 size_t H = Size / 2;
1112 const It MI = First + H;
1114 First = Comp(Value, *MI) ? First : First + H;
1116 return Comp(Value, *First) ? First : First + 1;
1119 // Do binary search to get a section piece at a given input offset.
1120 static SectionPiece *findSectionPiece(MergeInputSection *Sec, uint64_t Offset) {
1121 if (Sec->Data.size() <= Offset)
1122 fatal(toString(Sec) + ": entry is past the end of the section");
1124 // Find the element this offset points to.
1125 auto I = fastUpperBound(
1126 Sec->Pieces.begin(), Sec->Pieces.end(), Offset,
1127 [](const uint64_t &A, const SectionPiece &B) { return A < B.InputOff; });
1132 SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) {
1133 // Find a piece starting at a given offset.
1134 auto It = OffsetMap.find(Offset);
1135 if (It != OffsetMap.end())
1136 return &Pieces[It->second];
1138 // If Offset is not at beginning of a section piece, it is not in the map.
1139 // In that case we need to search from the original section piece vector.
1140 return findSectionPiece(this, Offset);
1143 // Returns the offset in an output section for a given input offset.
1144 // Because contents of a mergeable section is not contiguous in output,
1145 // it is not just an addition to a base output offset.
1146 uint64_t MergeInputSection::getParentOffset(uint64_t Offset) const {
1147 // Find a string starting at a given offset.
1148 auto It = OffsetMap.find(Offset);
1149 if (It != OffsetMap.end())
1150 return Pieces[It->second].OutputOff;
1152 // If Offset is not at beginning of a section piece, it is not in the map.
1153 // In that case we need to search from the original section piece vector.
1154 const SectionPiece &Piece =
1155 *findSectionPiece(const_cast<MergeInputSection *>(this), Offset);
1156 uint64_t Addend = Offset - Piece.InputOff;
1157 return Piece.OutputOff + Addend;
1160 template InputSection::InputSection(ObjFile<ELF32LE> &, const ELF32LE::Shdr &,
1162 template InputSection::InputSection(ObjFile<ELF32BE> &, const ELF32BE::Shdr &,
1164 template InputSection::InputSection(ObjFile<ELF64LE> &, const ELF64LE::Shdr &,
1166 template InputSection::InputSection(ObjFile<ELF64BE> &, const ELF64BE::Shdr &,
1169 template std::string InputSectionBase::getLocation<ELF32LE>(uint64_t);
1170 template std::string InputSectionBase::getLocation<ELF32BE>(uint64_t);
1171 template std::string InputSectionBase::getLocation<ELF64LE>(uint64_t);
1172 template std::string InputSectionBase::getLocation<ELF64BE>(uint64_t);
1174 template void InputSection::writeTo<ELF32LE>(uint8_t *);
1175 template void InputSection::writeTo<ELF32BE>(uint8_t *);
1176 template void InputSection::writeTo<ELF64LE>(uint8_t *);
1177 template void InputSection::writeTo<ELF64BE>(uint8_t *);
1179 template MergeInputSection::MergeInputSection(ObjFile<ELF32LE> &,
1180 const ELF32LE::Shdr &, StringRef);
1181 template MergeInputSection::MergeInputSection(ObjFile<ELF32BE> &,
1182 const ELF32BE::Shdr &, StringRef);
1183 template MergeInputSection::MergeInputSection(ObjFile<ELF64LE> &,
1184 const ELF64LE::Shdr &, StringRef);
1185 template MergeInputSection::MergeInputSection(ObjFile<ELF64BE> &,
1186 const ELF64BE::Shdr &, StringRef);
1188 template EhInputSection::EhInputSection(ObjFile<ELF32LE> &,
1189 const ELF32LE::Shdr &, StringRef);
1190 template EhInputSection::EhInputSection(ObjFile<ELF32BE> &,
1191 const ELF32BE::Shdr &, StringRef);
1192 template EhInputSection::EhInputSection(ObjFile<ELF64LE> &,
1193 const ELF64LE::Shdr &, StringRef);
1194 template EhInputSection::EhInputSection(ObjFile<ELF64BE> &,
1195 const ELF64BE::Shdr &, StringRef);
1197 template void EhInputSection::split<ELF32LE>();
1198 template void EhInputSection::split<ELF32BE>();
1199 template void EhInputSection::split<ELF64LE>();
1200 template void EhInputSection::split<ELF64BE>();