1 //===- Chunks.h -------------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_COFF_CHUNKS_H
10 #define LLD_COFF_CHUNKS_H
13 #include "InputFiles.h"
14 #include "lld/Common/LLVM.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/PointerIntPair.h"
17 #include "llvm/ADT/iterator.h"
18 #include "llvm/ADT/iterator_range.h"
19 #include "llvm/MC/StringTableBuilder.h"
20 #include "llvm/Object/COFF.h"
27 using llvm::COFF::ImportDirectoryTableEntry;
28 using llvm::object::COFFSymbolRef;
29 using llvm::object::SectionRef;
30 using llvm::object::coff_relocation;
31 using llvm::object::coff_section;
35 class DefinedImportData;
39 class RuntimePseudoReloc;
42 // Mask for permissions (discardable, writable, readable, executable, etc).
43 const uint32_t permMask = 0xFE000000;
45 // Mask for section types (code, data, bss).
46 const uint32_t typeMask = 0x000000E0;
48 // The log base 2 of the largest section alignment, which is log2(8192), or 13.
49 enum : unsigned { Log2MaxSectionAlignment = 13 };
51 // A Chunk represents a chunk of data that will occupy space in the
52 // output (if the resolver chose that). It may or may not be backed by
53 // a section of an input file. It could be linker-created data, or
54 // doesn't even have actual data (if common or bss).
57 enum Kind : uint8_t { SectionKind, OtherKind, ImportThunkKind };
58 Kind kind() const { return chunkKind; }
60 // Returns the size of this chunk (even if this is a common or BSS.)
61 size_t getSize() const;
63 // Returns chunk alignment in power of two form. Value values are powers of
64 // two from 1 to 8192.
65 uint32_t getAlignment() const { return 1U << p2Align; }
67 // Update the chunk section alignment measured in bytes. Internally alignment
69 void setAlignment(uint32_t align) {
70 // Treat zero byte alignment as 1 byte alignment.
71 align = align ? align : 1;
72 assert(llvm::isPowerOf2_32(align) && "alignment is not a power of 2");
73 p2Align = llvm::Log2_32(align);
74 assert(p2Align <= Log2MaxSectionAlignment &&
75 "impossible requested alignment");
78 // Write this chunk to a mmap'ed file, assuming Buf is pointing to
79 // beginning of the file. Because this function may use RVA values
80 // of other chunks for relocations, you need to set them properly
81 // before calling this function.
82 void writeTo(uint8_t *buf) const;
84 // The writer sets and uses the addresses. In practice, PE images cannot be
85 // larger than 2GB. Chunks are always laid as part of the image, so Chunk RVAs
86 // can be stored with 32 bits.
87 uint32_t getRVA() const { return rva; }
88 void setRVA(uint64_t v) {
90 assert(rva == v && "RVA truncated");
93 // Returns readable/writable/executable bits.
94 uint32_t getOutputCharacteristics() const;
96 // Returns the section name if this is a section chunk.
97 // It is illegal to call this function on non-section chunks.
98 StringRef getSectionName() const;
100 // An output section has pointers to chunks in the section, and each
101 // chunk has a back pointer to an output section.
102 void setOutputSectionIdx(uint16_t o) { osidx = o; }
103 uint16_t getOutputSectionIdx() const { return osidx; }
104 OutputSection *getOutputSection() const;
107 // Collect all locations that contain absolute addresses for base relocations.
108 void getBaserels(std::vector<Baserel> *res);
110 // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
111 // bytes, so this is used only for logging or debugging.
112 StringRef getDebugName() const;
114 // Return true if this file has the hotpatch flag set to true in the
115 // S_COMPILE3 record in codeview debug info. Also returns true for some thunks
116 // synthesized by the linker.
117 bool isHotPatchable() const;
120 Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {}
122 const Kind chunkKind;
125 // Returns true if this has non-zero data. BSS chunks return
126 // false. If false is returned, the space occupied by this chunk
127 // will be filled with zeros. Corresponds to the
128 // IMAGE_SCN_CNT_UNINITIALIZED_DATA section characteristic bit.
132 // The alignment of this chunk, stored in log2 form. The writer uses the
136 // The output section index for this chunk. The first valid section number is
140 // The RVA of this chunk in the output. The writer sets a value.
144 class NonSectionChunk : public Chunk {
146 virtual ~NonSectionChunk() = default;
148 // Returns the size of this chunk (even if this is a common or BSS.)
149 virtual size_t getSize() const = 0;
151 virtual uint32_t getOutputCharacteristics() const { return 0; }
153 // Write this chunk to a mmap'ed file, assuming Buf is pointing to
154 // beginning of the file. Because this function may use RVA values
155 // of other chunks for relocations, you need to set them properly
156 // before calling this function.
157 virtual void writeTo(uint8_t *buf) const {}
159 // Returns the section name if this is a section chunk.
160 // It is illegal to call this function on non-section chunks.
161 virtual StringRef getSectionName() const {
162 llvm_unreachable("unimplemented getSectionName");
166 // Collect all locations that contain absolute addresses for base relocations.
167 virtual void getBaserels(std::vector<Baserel> *res) {}
169 // Returns a human-readable name of this chunk. Chunks are unnamed chunks of
170 // bytes, so this is used only for logging or debugging.
171 virtual StringRef getDebugName() const { return ""; }
173 static bool classof(const Chunk *c) { return c->kind() != SectionKind; }
176 NonSectionChunk(Kind k = OtherKind) : Chunk(k) {}
179 // A chunk corresponding a section of an input file.
180 class SectionChunk final : public Chunk {
181 // Identical COMDAT Folding feature accesses section internal data.
185 class symbol_iterator : public llvm::iterator_adaptor_base<
186 symbol_iterator, const coff_relocation *,
187 std::random_access_iterator_tag, Symbol *> {
192 symbol_iterator(ObjFile *file, const coff_relocation *i)
193 : symbol_iterator::iterator_adaptor_base(i), file(file) {}
196 symbol_iterator() = default;
198 Symbol *operator*() const { return file->getSymbol(I->SymbolTableIndex); }
201 SectionChunk(ObjFile *file, const coff_section *header);
202 static bool classof(const Chunk *c) { return c->kind() == SectionKind; }
203 size_t getSize() const { return header->SizeOfRawData; }
204 ArrayRef<uint8_t> getContents() const;
205 void writeTo(uint8_t *buf) const;
207 uint32_t getOutputCharacteristics() const {
208 return header->Characteristics & (permMask | typeMask);
210 StringRef getSectionName() const {
211 return StringRef(sectionNameData, sectionNameSize);
213 void getBaserels(std::vector<Baserel> *res);
214 bool isCOMDAT() const;
215 void applyRelX64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
217 void applyRelX86(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
219 void applyRelARM(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
221 void applyRelARM64(uint8_t *off, uint16_t type, OutputSection *os, uint64_t s,
224 void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &res);
226 // Called if the garbage collector decides to not include this chunk
227 // in a final output. It's supposed to print out a log message to stdout.
228 void printDiscardedMessage() const;
230 // Adds COMDAT associative sections to this COMDAT section. A chunk
231 // and its children are treated as a group by the garbage collector.
232 void addAssociative(SectionChunk *child);
234 StringRef getDebugName() const;
236 // True if this is a codeview debug info chunk. These will not be laid out in
237 // the image. Instead they will end up in the PDB, if one is requested.
238 bool isCodeView() const {
239 return getSectionName() == ".debug" || getSectionName().startswith(".debug$");
242 // True if this is a DWARF debug info or exception handling chunk.
243 bool isDWARF() const {
244 return getSectionName().startswith(".debug_") || getSectionName() == ".eh_frame";
247 // Allow iteration over the bodies of this chunk's relocated symbols.
248 llvm::iterator_range<symbol_iterator> symbols() const {
249 return llvm::make_range(symbol_iterator(file, relocsData),
250 symbol_iterator(file, relocsData + relocsSize));
253 ArrayRef<coff_relocation> getRelocs() const {
254 return llvm::makeArrayRef(relocsData, relocsSize);
257 // Reloc setter used by ARM range extension thunk insertion.
258 void setRelocs(ArrayRef<coff_relocation> newRelocs) {
259 relocsData = newRelocs.data();
260 relocsSize = newRelocs.size();
261 assert(relocsSize == newRelocs.size() && "reloc size truncation");
264 // Single linked list iterator for associated comdat children.
265 class AssociatedIterator
266 : public llvm::iterator_facade_base<
267 AssociatedIterator, std::forward_iterator_tag, SectionChunk> {
269 AssociatedIterator() = default;
270 AssociatedIterator(SectionChunk *head) : cur(head) {}
271 AssociatedIterator &operator=(const AssociatedIterator &r) {
275 bool operator==(const AssociatedIterator &r) const { return cur == r.cur; }
276 const SectionChunk &operator*() const { return *cur; }
277 SectionChunk &operator*() { return *cur; }
278 AssociatedIterator &operator++() {
279 cur = cur->assocChildren;
284 SectionChunk *cur = nullptr;
287 // Allow iteration over the associated child chunks for this section.
288 llvm::iterator_range<AssociatedIterator> children() const {
289 return llvm::make_range(AssociatedIterator(assocChildren),
290 AssociatedIterator(nullptr));
293 // The section ID this chunk belongs to in its Obj.
294 uint32_t getSectionNumber() const;
296 ArrayRef<uint8_t> consumeDebugMagic();
298 static ArrayRef<uint8_t> consumeDebugMagic(ArrayRef<uint8_t> data,
299 StringRef sectionName);
301 static SectionChunk *findByName(ArrayRef<SectionChunk *> sections,
304 // The file that this chunk was created from.
307 // Pointer to the COFF section header in the input file.
308 const coff_section *header;
310 // The COMDAT leader symbol if this is a COMDAT chunk.
311 DefinedRegular *sym = nullptr;
313 // The CRC of the contents as described in the COFF spec 4.5.5.
314 // Auxiliary Format 5: Section Definitions. Used for ICF.
315 uint32_t checksum = 0;
317 // Used by the garbage collector.
320 // Whether this section needs to be kept distinct from other sections during
321 // ICF. This is set by the driver using address-significance tables.
322 bool keepUnique = false;
324 // The COMDAT selection if this is a COMDAT chunk.
325 llvm::COFF::COMDATType selection = (llvm::COFF::COMDATType)0;
327 // A pointer pointing to a replacement for this chunk.
328 // Initially it points to "this" object. If this chunk is merged
329 // with other chunk by ICF, it points to another chunk,
330 // and this chunk is considered as dead.
334 SectionChunk *assocChildren = nullptr;
336 // Used for ICF (Identical COMDAT Folding)
337 void replace(SectionChunk *other);
338 uint32_t eqClass[2] = {0, 0};
340 // Relocations for this section. Size is stored below.
341 const coff_relocation *relocsData;
343 // Section name string. Size is stored below.
344 const char *sectionNameData;
346 uint32_t relocsSize = 0;
347 uint32_t sectionNameSize = 0;
350 // Inline methods to implement faux-virtual dispatch for SectionChunk.
352 inline size_t Chunk::getSize() const {
353 if (isa<SectionChunk>(this))
354 return static_cast<const SectionChunk *>(this)->getSize();
356 return static_cast<const NonSectionChunk *>(this)->getSize();
359 inline uint32_t Chunk::getOutputCharacteristics() const {
360 if (isa<SectionChunk>(this))
361 return static_cast<const SectionChunk *>(this)->getOutputCharacteristics();
363 return static_cast<const NonSectionChunk *>(this)
364 ->getOutputCharacteristics();
367 inline void Chunk::writeTo(uint8_t *buf) const {
368 if (isa<SectionChunk>(this))
369 static_cast<const SectionChunk *>(this)->writeTo(buf);
371 static_cast<const NonSectionChunk *>(this)->writeTo(buf);
374 inline StringRef Chunk::getSectionName() const {
375 if (isa<SectionChunk>(this))
376 return static_cast<const SectionChunk *>(this)->getSectionName();
378 return static_cast<const NonSectionChunk *>(this)->getSectionName();
381 inline void Chunk::getBaserels(std::vector<Baserel> *res) {
382 if (isa<SectionChunk>(this))
383 static_cast<SectionChunk *>(this)->getBaserels(res);
385 static_cast<NonSectionChunk *>(this)->getBaserels(res);
388 inline StringRef Chunk::getDebugName() const {
389 if (isa<SectionChunk>(this))
390 return static_cast<const SectionChunk *>(this)->getDebugName();
392 return static_cast<const NonSectionChunk *>(this)->getDebugName();
395 // This class is used to implement an lld-specific feature (not implemented in
396 // MSVC) that minimizes the output size by finding string literals sharing tail
397 // parts and merging them.
399 // If string tail merging is enabled and a section is identified as containing a
400 // string literal, it is added to a MergeChunk with an appropriate alignment.
401 // The MergeChunk then tail merges the strings using the StringTableBuilder
402 // class and assigns RVAs and section offsets to each of the member chunks based
403 // on the offsets assigned by the StringTableBuilder.
404 class MergeChunk : public NonSectionChunk {
406 MergeChunk(uint32_t alignment);
407 static void addSection(SectionChunk *c);
408 void finalizeContents();
409 void assignSubsectionRVAs();
411 uint32_t getOutputCharacteristics() const override;
412 StringRef getSectionName() const override { return ".rdata"; }
413 size_t getSize() const override;
414 void writeTo(uint8_t *buf) const override;
416 static MergeChunk *instances[Log2MaxSectionAlignment + 1];
417 std::vector<SectionChunk *> sections;
420 llvm::StringTableBuilder builder;
421 bool finalized = false;
424 // A chunk for common symbols. Common chunks don't have actual data.
425 class CommonChunk : public NonSectionChunk {
427 CommonChunk(const COFFSymbolRef sym);
428 size_t getSize() const override { return sym.getValue(); }
429 uint32_t getOutputCharacteristics() const override;
430 StringRef getSectionName() const override { return ".bss"; }
433 const COFFSymbolRef sym;
436 // A chunk for linker-created strings.
437 class StringChunk : public NonSectionChunk {
439 explicit StringChunk(StringRef s) : str(s) {}
440 size_t getSize() const override { return str.size() + 1; }
441 void writeTo(uint8_t *buf) const override;
447 static const uint8_t importThunkX86[] = {
448 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
451 static const uint8_t importThunkARM[] = {
452 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
453 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
454 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
457 static const uint8_t importThunkARM64[] = {
458 0x10, 0x00, 0x00, 0x90, // adrp x16, #0
459 0x10, 0x02, 0x40, 0xf9, // ldr x16, [x16]
460 0x00, 0x02, 0x1f, 0xd6, // br x16
464 // A chunk for DLL import jump table entry. In a final output, its
465 // contents will be a JMP instruction to some __imp_ symbol.
466 class ImportThunkChunk : public NonSectionChunk {
468 ImportThunkChunk(Defined *s)
469 : NonSectionChunk(ImportThunkKind), impSymbol(s) {}
470 static bool classof(const Chunk *c) { return c->kind() == ImportThunkKind; }
476 class ImportThunkChunkX64 : public ImportThunkChunk {
478 explicit ImportThunkChunkX64(Defined *s);
479 size_t getSize() const override { return sizeof(importThunkX86); }
480 void writeTo(uint8_t *buf) const override;
483 class ImportThunkChunkX86 : public ImportThunkChunk {
485 explicit ImportThunkChunkX86(Defined *s) : ImportThunkChunk(s) {}
486 size_t getSize() const override { return sizeof(importThunkX86); }
487 void getBaserels(std::vector<Baserel> *res) override;
488 void writeTo(uint8_t *buf) const override;
491 class ImportThunkChunkARM : public ImportThunkChunk {
493 explicit ImportThunkChunkARM(Defined *s) : ImportThunkChunk(s) {}
494 size_t getSize() const override { return sizeof(importThunkARM); }
495 void getBaserels(std::vector<Baserel> *res) override;
496 void writeTo(uint8_t *buf) const override;
499 class ImportThunkChunkARM64 : public ImportThunkChunk {
501 explicit ImportThunkChunkARM64(Defined *s) : ImportThunkChunk(s) {}
502 size_t getSize() const override { return sizeof(importThunkARM64); }
503 void writeTo(uint8_t *buf) const override;
506 class RangeExtensionThunkARM : public NonSectionChunk {
508 explicit RangeExtensionThunkARM(Defined *t) : target(t) {}
509 size_t getSize() const override;
510 void writeTo(uint8_t *buf) const override;
515 class RangeExtensionThunkARM64 : public NonSectionChunk {
517 explicit RangeExtensionThunkARM64(Defined *t) : target(t) {}
518 size_t getSize() const override;
519 void writeTo(uint8_t *buf) const override;
525 // See comments for DefinedLocalImport class.
526 class LocalImportChunk : public NonSectionChunk {
528 explicit LocalImportChunk(Defined *s) : sym(s) {
529 setAlignment(config->wordsize);
531 size_t getSize() const override;
532 void getBaserels(std::vector<Baserel> *res) override;
533 void writeTo(uint8_t *buf) const override;
539 // Duplicate RVAs are not allowed in RVA tables, so unique symbols by chunk and
540 // offset into the chunk. Order does not matter as the RVA table will be sorted
542 struct ChunkAndOffset {
546 struct DenseMapInfo {
547 static ChunkAndOffset getEmptyKey() {
548 return {llvm::DenseMapInfo<Chunk *>::getEmptyKey(), 0};
550 static ChunkAndOffset getTombstoneKey() {
551 return {llvm::DenseMapInfo<Chunk *>::getTombstoneKey(), 0};
553 static unsigned getHashValue(const ChunkAndOffset &co) {
554 return llvm::DenseMapInfo<std::pair<Chunk *, uint32_t>>::getHashValue(
555 {co.inputChunk, co.offset});
557 static bool isEqual(const ChunkAndOffset &lhs, const ChunkAndOffset &rhs) {
558 return lhs.inputChunk == rhs.inputChunk && lhs.offset == rhs.offset;
563 using SymbolRVASet = llvm::DenseSet<ChunkAndOffset>;
565 // Table which contains symbol RVAs. Used for /safeseh and /guard:cf.
566 class RVATableChunk : public NonSectionChunk {
568 explicit RVATableChunk(SymbolRVASet s) : syms(std::move(s)) {}
569 size_t getSize() const override { return syms.size() * 4; }
570 void writeTo(uint8_t *buf) const override;
577 // This class represents a block in .reloc section.
578 // See the PE/COFF spec 5.6 for details.
579 class BaserelChunk : public NonSectionChunk {
581 BaserelChunk(uint32_t page, Baserel *begin, Baserel *end);
582 size_t getSize() const override { return data.size(); }
583 void writeTo(uint8_t *buf) const override;
586 std::vector<uint8_t> data;
591 Baserel(uint32_t v, uint8_t ty) : rva(v), type(ty) {}
592 explicit Baserel(uint32_t v) : Baserel(v, getDefaultType()) {}
593 uint8_t getDefaultType();
599 // This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a
600 // specific place in a section, without any data. This is used for the MinGW
601 // specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept
602 // of an empty chunk isn't MinGW specific.
603 class EmptyChunk : public NonSectionChunk {
606 size_t getSize() const override { return 0; }
607 void writeTo(uint8_t *buf) const override {}
610 // MinGW specific, for the "automatic import of variables from DLLs" feature.
611 // This provides the table of runtime pseudo relocations, for variable
612 // references that turned out to need to be imported from a DLL even though
613 // the reference didn't use the dllimport attribute. The MinGW runtime will
614 // process this table after loading, before handling control over to user
616 class PseudoRelocTableChunk : public NonSectionChunk {
618 PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &relocs)
619 : relocs(std::move(relocs)) {
622 size_t getSize() const override;
623 void writeTo(uint8_t *buf) const override;
626 std::vector<RuntimePseudoReloc> relocs;
629 // MinGW specific; information about one individual location in the image
630 // that needs to be fixed up at runtime after loading. This represents
631 // one individual element in the PseudoRelocTableChunk table.
632 class RuntimePseudoReloc {
634 RuntimePseudoReloc(Defined *sym, SectionChunk *target, uint32_t targetOffset,
636 : sym(sym), target(target), targetOffset(targetOffset), flags(flags) {}
639 SectionChunk *target;
640 uint32_t targetOffset;
641 // The Flags field contains the size of the relocation, in bits. No other
642 // flags are currently defined.
646 // MinGW specific. A Chunk that contains one pointer-sized absolute value.
647 class AbsolutePointerChunk : public NonSectionChunk {
649 AbsolutePointerChunk(uint64_t value) : value(value) {
650 setAlignment(getSize());
652 size_t getSize() const override;
653 void writeTo(uint8_t *buf) const override;
659 // Return true if this file has the hotpatch flag set to true in the S_COMPILE3
660 // record in codeview debug info. Also returns true for some thunks synthesized
662 inline bool Chunk::isHotPatchable() const {
663 if (auto *sc = dyn_cast<SectionChunk>(this))
664 return sc->file->hotPatchable;
665 else if (isa<ImportThunkChunk>(this))
670 void applyMOV32T(uint8_t *off, uint32_t v);
671 void applyBranch24T(uint8_t *off, int32_t v);
673 void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift);
674 void applyArm64Imm(uint8_t *off, uint64_t imm, uint32_t rangeLimit);
675 void applyArm64Branch26(uint8_t *off, int64_t v);
682 struct DenseMapInfo<lld::coff::ChunkAndOffset>
683 : lld::coff::ChunkAndOffset::DenseMapInfo {};