1 //===- Object.h -------------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #ifndef LLVM_TOOLS_OBJCOPY_OBJECT_H
11 #define LLVM_TOOLS_OBJCOPY_OBJECT_H
13 #include "llvm/ADT/ArrayRef.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/MC/StringTableBuilder.h"
18 #include "llvm/Object/ELFObjectFile.h"
19 #include "llvm/Support/FileOutputBuffer.h"
20 #include "llvm/Support/JamCRC.h"
34 class OwnedDataSection;
35 class StringTableSection;
36 class SymbolTableSection;
37 class RelocationSection;
38 class DynamicRelocationSection;
39 class GnuDebugLinkSection;
41 class SectionIndexSection;
46 class SectionTableRef {
47 MutableArrayRef<std::unique_ptr<SectionBase>> Sections;
50 using iterator = pointee_iterator<std::unique_ptr<SectionBase> *>;
52 explicit SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)
54 SectionTableRef(const SectionTableRef &) = default;
56 iterator begin() { return iterator(Sections.data()); }
57 iterator end() { return iterator(Sections.data() + Sections.size()); }
59 SectionBase *getSection(uint32_t Index, Twine ErrMsg);
62 T *getSectionOfType(uint32_t Index, Twine IndexErrMsg, Twine TypeErrMsg);
65 enum ElfType { ELFT_ELF32LE, ELFT_ELF64LE, ELFT_ELF32BE, ELFT_ELF64BE };
67 class SectionVisitor {
69 virtual ~SectionVisitor();
71 virtual void visit(const Section &Sec) = 0;
72 virtual void visit(const OwnedDataSection &Sec) = 0;
73 virtual void visit(const StringTableSection &Sec) = 0;
74 virtual void visit(const SymbolTableSection &Sec) = 0;
75 virtual void visit(const RelocationSection &Sec) = 0;
76 virtual void visit(const DynamicRelocationSection &Sec) = 0;
77 virtual void visit(const GnuDebugLinkSection &Sec) = 0;
78 virtual void visit(const GroupSection &Sec) = 0;
79 virtual void visit(const SectionIndexSection &Sec) = 0;
82 class SectionWriter : public SectionVisitor {
87 virtual ~SectionWriter(){};
89 void visit(const Section &Sec) override;
90 void visit(const OwnedDataSection &Sec) override;
91 void visit(const StringTableSection &Sec) override;
92 void visit(const DynamicRelocationSection &Sec) override;
93 virtual void visit(const SymbolTableSection &Sec) override = 0;
94 virtual void visit(const RelocationSection &Sec) override = 0;
95 virtual void visit(const GnuDebugLinkSection &Sec) override = 0;
96 virtual void visit(const GroupSection &Sec) override = 0;
97 virtual void visit(const SectionIndexSection &Sec) override = 0;
99 explicit SectionWriter(Buffer &Buf) : Out(Buf) {}
102 template <class ELFT> class ELFSectionWriter : public SectionWriter {
104 using Elf_Word = typename ELFT::Word;
105 using Elf_Rel = typename ELFT::Rel;
106 using Elf_Rela = typename ELFT::Rela;
109 virtual ~ELFSectionWriter() {}
110 void visit(const SymbolTableSection &Sec) override;
111 void visit(const RelocationSection &Sec) override;
112 void visit(const GnuDebugLinkSection &Sec) override;
113 void visit(const GroupSection &Sec) override;
114 void visit(const SectionIndexSection &Sec) override;
116 explicit ELFSectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
119 #define MAKE_SEC_WRITER_FRIEND \
120 friend class SectionWriter; \
121 template <class ELFT> friend class ELFSectionWriter;
123 class BinarySectionWriter : public SectionWriter {
125 virtual ~BinarySectionWriter() {}
127 void visit(const SymbolTableSection &Sec) override;
128 void visit(const RelocationSection &Sec) override;
129 void visit(const GnuDebugLinkSection &Sec) override;
130 void visit(const GroupSection &Sec) override;
131 void visit(const SectionIndexSection &Sec) override;
133 explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
136 // The class Buffer abstracts out the common interface of FileOutputBuffer and
137 // WritableMemoryBuffer so that the hierarchy of Writers depends on this
138 // abstract interface and doesn't depend on a particular implementation.
139 // TODO: refactor the buffer classes in LLVM to enable us to use them here
146 virtual void allocate(size_t Size) = 0;
147 virtual uint8_t *getBufferStart() = 0;
148 virtual Error commit() = 0;
150 explicit Buffer(StringRef Name) : Name(Name) {}
151 StringRef getName() const { return Name; }
154 class FileBuffer : public Buffer {
155 std::unique_ptr<FileOutputBuffer> Buf;
158 void allocate(size_t Size) override;
159 uint8_t *getBufferStart() override;
160 Error commit() override;
162 explicit FileBuffer(StringRef FileName) : Buffer(FileName) {}
165 class MemBuffer : public Buffer {
166 std::unique_ptr<WritableMemoryBuffer> Buf;
169 void allocate(size_t Size) override;
170 uint8_t *getBufferStart() override;
171 Error commit() override;
173 explicit MemBuffer(StringRef Name) : Buffer(Name) {}
175 std::unique_ptr<WritableMemoryBuffer> releaseMemoryBuffer();
185 virtual void finalize() = 0;
186 virtual void write() = 0;
188 Writer(Object &O, Buffer &B) : Obj(O), Buf(B) {}
191 template <class ELFT> class ELFWriter : public Writer {
193 using Elf_Shdr = typename ELFT::Shdr;
194 using Elf_Phdr = typename ELFT::Phdr;
195 using Elf_Ehdr = typename ELFT::Ehdr;
198 void writePhdr(const Segment &Seg);
199 void writeShdr(const SectionBase &Sec);
203 void writeSectionData();
205 void assignOffsets();
207 std::unique_ptr<ELFSectionWriter<ELFT>> SecWriter;
209 size_t totalSize() const;
212 virtual ~ELFWriter() {}
213 bool WriteSectionHeaders = true;
215 void finalize() override;
216 void write() override;
217 ELFWriter(Object &Obj, Buffer &Buf, bool WSH)
218 : Writer(Obj, Buf), WriteSectionHeaders(WSH) {}
221 class BinaryWriter : public Writer {
223 std::unique_ptr<BinarySectionWriter> SecWriter;
229 void finalize() override;
230 void write() override;
231 BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
237 Segment *ParentSegment = nullptr;
238 uint64_t HeaderOffset;
239 uint64_t OriginalOffset = std::numeric_limits<uint64_t>::max();
241 bool HasSymbol = false;
245 uint32_t EntrySize = 0;
248 uint64_t Link = ELF::SHN_UNDEF;
249 uint64_t NameIndex = 0;
252 uint64_t Type = ELF::SHT_NULL;
254 virtual ~SectionBase() = default;
256 virtual void initialize(SectionTableRef SecTable);
257 virtual void finalize();
258 virtual void removeSectionReferences(const SectionBase *Sec);
259 virtual void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
260 virtual void accept(SectionVisitor &Visitor) const = 0;
261 virtual void markSymbols();
266 struct SectionCompare {
267 bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const {
268 // Some sections might have the same address if one of them is empty. To
269 // fix this we can use the lexicographic ordering on ->Addr and the
270 // address of the actully stored section.
271 if (Lhs->OriginalOffset == Rhs->OriginalOffset)
273 return Lhs->OriginalOffset < Rhs->OriginalOffset;
277 std::set<const SectionBase *, SectionCompare> Sections;
278 ArrayRef<uint8_t> Contents;
291 uint64_t OriginalOffset;
292 Segment *ParentSegment = nullptr;
294 explicit Segment(ArrayRef<uint8_t> Data) : Contents(Data) {}
297 const SectionBase *firstSection() const {
298 if (!Sections.empty())
299 return *Sections.begin();
303 void removeSection(const SectionBase *Sec) { Sections.erase(Sec); }
304 void addSection(const SectionBase *Sec) { Sections.insert(Sec); }
307 class Section : public SectionBase {
308 MAKE_SEC_WRITER_FRIEND
310 ArrayRef<uint8_t> Contents;
311 SectionBase *LinkSection = nullptr;
314 explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
316 void accept(SectionVisitor &Visitor) const override;
317 void removeSectionReferences(const SectionBase *Sec) override;
318 void initialize(SectionTableRef SecTable) override;
319 void finalize() override;
322 class OwnedDataSection : public SectionBase {
323 MAKE_SEC_WRITER_FRIEND
325 std::vector<uint8_t> Data;
328 OwnedDataSection(StringRef SecName, ArrayRef<uint8_t> Data)
329 : Data(std::begin(Data), std::end(Data)) {
331 Type = ELF::SHT_PROGBITS;
333 OriginalOffset = std::numeric_limits<uint64_t>::max();
336 void accept(SectionVisitor &Sec) const override;
339 // There are two types of string tables that can exist, dynamic and not dynamic.
340 // In the dynamic case the string table is allocated. Changing a dynamic string
341 // table would mean altering virtual addresses and thus the memory image. So
342 // dynamic string tables should not have an interface to modify them or
343 // reconstruct them. This type lets us reconstruct a string table. To avoid
344 // this class being used for dynamic string tables (which has happened) the
345 // classof method checks that the particular instance is not allocated. This
346 // then agrees with the makeSection method used to construct most sections.
347 class StringTableSection : public SectionBase {
348 MAKE_SEC_WRITER_FRIEND
350 StringTableBuilder StrTabBuilder;
353 StringTableSection() : StrTabBuilder(StringTableBuilder::ELF) {
354 Type = ELF::SHT_STRTAB;
357 void addString(StringRef Name);
358 uint32_t findIndex(StringRef Name) const;
359 void finalize() override;
360 void accept(SectionVisitor &Visitor) const override;
362 static bool classof(const SectionBase *S) {
363 if (S->Flags & ELF::SHF_ALLOC)
365 return S->Type == ELF::SHT_STRTAB;
369 // Symbols have a st_shndx field that normally stores an index but occasionally
370 // stores a different special value. This enum keeps track of what the st_shndx
371 // field means. Most of the values are just copies of the special SHN_* values.
372 // SYMBOL_SIMPLE_INDEX means that the st_shndx is just an index of a section.
373 enum SymbolShndxType {
374 SYMBOL_SIMPLE_INDEX = 0,
375 SYMBOL_ABS = ELF::SHN_ABS,
376 SYMBOL_COMMON = ELF::SHN_COMMON,
377 SYMBOL_HEXAGON_SCOMMON = ELF::SHN_HEXAGON_SCOMMON,
378 SYMBOL_HEXAGON_SCOMMON_2 = ELF::SHN_HEXAGON_SCOMMON_2,
379 SYMBOL_HEXAGON_SCOMMON_4 = ELF::SHN_HEXAGON_SCOMMON_4,
380 SYMBOL_HEXAGON_SCOMMON_8 = ELF::SHN_HEXAGON_SCOMMON_8,
381 SYMBOL_XINDEX = ELF::SHN_XINDEX,
386 SectionBase *DefinedIn = nullptr;
387 SymbolShndxType ShndxType;
395 bool Referenced = false;
397 uint16_t getShndx() const;
400 class SectionIndexSection : public SectionBase {
401 MAKE_SEC_WRITER_FRIEND
404 std::vector<uint32_t> Indexes;
405 SymbolTableSection *Symbols = nullptr;
408 virtual ~SectionIndexSection() {}
409 void addIndex(uint32_t Index) {
410 Indexes.push_back(Index);
413 void setSymTab(SymbolTableSection *SymTab) { Symbols = SymTab; }
414 void initialize(SectionTableRef SecTable) override;
415 void finalize() override;
416 void accept(SectionVisitor &Visitor) const override;
418 SectionIndexSection() {
419 Name = ".symtab_shndx";
422 Type = ELF::SHT_SYMTAB_SHNDX;
426 class SymbolTableSection : public SectionBase {
427 MAKE_SEC_WRITER_FRIEND
429 void setStrTab(StringTableSection *StrTab) { SymbolNames = StrTab; }
430 void assignIndices();
433 std::vector<std::unique_ptr<Symbol>> Symbols;
434 StringTableSection *SymbolNames = nullptr;
435 SectionIndexSection *SectionIndexTable = nullptr;
437 using SymPtr = std::unique_ptr<Symbol>;
440 void addSymbol(StringRef Name, uint8_t Bind, uint8_t Type,
441 SectionBase *DefinedIn, uint64_t Value, uint8_t Visibility,
442 uint16_t Shndx, uint64_t Sz);
443 void prepareForLayout();
444 // An 'empty' symbol table still contains a null symbol.
445 bool empty() const { return Symbols.size() == 1; }
446 void setShndxTable(SectionIndexSection *ShndxTable) {
447 SectionIndexTable = ShndxTable;
449 const SectionIndexSection *getShndxTable() const { return SectionIndexTable; }
450 const SectionBase *getStrTab() const { return SymbolNames; }
451 const Symbol *getSymbolByIndex(uint32_t Index) const;
452 Symbol *getSymbolByIndex(uint32_t Index);
453 void updateSymbols(function_ref<void(Symbol &)> Callable);
455 void removeSectionReferences(const SectionBase *Sec) override;
456 void initialize(SectionTableRef SecTable) override;
457 void finalize() override;
458 void accept(SectionVisitor &Visitor) const override;
459 void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
461 static bool classof(const SectionBase *S) {
462 return S->Type == ELF::SHT_SYMTAB;
467 Symbol *RelocSymbol = nullptr;
473 // All relocation sections denote relocations to apply to another section.
474 // However, some relocation sections use a dynamic symbol table and others use
475 // a regular symbol table. Because the types of the two symbol tables differ in
476 // our system (because they should behave differently) we can't uniformly
477 // represent all relocations with the same base class if we expose an interface
478 // that mentions the symbol table type. So we split the two base types into two
479 // different classes, one which handles the section the relocation is applied to
480 // and another which handles the symbol table type. The symbol table type is
481 // taken as a type parameter to the class (see RelocSectionWithSymtabBase).
482 class RelocationSectionBase : public SectionBase {
484 SectionBase *SecToApplyRel = nullptr;
487 const SectionBase *getSection() const { return SecToApplyRel; }
488 void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
490 static bool classof(const SectionBase *S) {
491 return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA;
495 // Takes the symbol table type to use as a parameter so that we can deduplicate
496 // that code between the two symbol table types.
497 template <class SymTabType>
498 class RelocSectionWithSymtabBase : public RelocationSectionBase {
499 SymTabType *Symbols = nullptr;
500 void setSymTab(SymTabType *SymTab) { Symbols = SymTab; }
503 RelocSectionWithSymtabBase() = default;
506 void removeSectionReferences(const SectionBase *Sec) override;
507 void initialize(SectionTableRef SecTable) override;
508 void finalize() override;
511 class RelocationSection
512 : public RelocSectionWithSymtabBase<SymbolTableSection> {
513 MAKE_SEC_WRITER_FRIEND
515 std::vector<Relocation> Relocations;
518 void addRelocation(Relocation Rel) { Relocations.push_back(Rel); }
519 void accept(SectionVisitor &Visitor) const override;
520 void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
521 void markSymbols() override;
523 static bool classof(const SectionBase *S) {
524 if (S->Flags & ELF::SHF_ALLOC)
526 return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA;
530 // TODO: The way stripping and groups interact is complicated
531 // and still needs to be worked on.
533 class GroupSection : public SectionBase {
534 MAKE_SEC_WRITER_FRIEND
535 const SymbolTableSection *SymTab = nullptr;
536 Symbol *Sym = nullptr;
537 ELF::Elf32_Word FlagWord;
538 SmallVector<SectionBase *, 3> GroupMembers;
541 // TODO: Contents is present in several classes of the hierarchy.
542 // This needs to be refactored to avoid duplication.
543 ArrayRef<uint8_t> Contents;
545 explicit GroupSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
547 void setSymTab(const SymbolTableSection *SymTabSec) { SymTab = SymTabSec; }
548 void setSymbol(Symbol *S) { Sym = S; }
549 void setFlagWord(ELF::Elf32_Word W) { FlagWord = W; }
550 void addMember(SectionBase *Sec) { GroupMembers.push_back(Sec); }
552 void initialize(SectionTableRef SecTable) override{};
553 void accept(SectionVisitor &) const override;
554 void finalize() override;
555 void removeSymbols(function_ref<bool(const Symbol &)> ToRemove) override;
556 void markSymbols() override;
558 static bool classof(const SectionBase *S) {
559 return S->Type == ELF::SHT_GROUP;
563 class DynamicSymbolTableSection : public Section {
565 explicit DynamicSymbolTableSection(ArrayRef<uint8_t> Data) : Section(Data) {}
567 static bool classof(const SectionBase *S) {
568 return S->Type == ELF::SHT_DYNSYM;
572 class DynamicSection : public Section {
574 explicit DynamicSection(ArrayRef<uint8_t> Data) : Section(Data) {}
576 static bool classof(const SectionBase *S) {
577 return S->Type == ELF::SHT_DYNAMIC;
581 class DynamicRelocationSection
582 : public RelocSectionWithSymtabBase<DynamicSymbolTableSection> {
583 MAKE_SEC_WRITER_FRIEND
586 ArrayRef<uint8_t> Contents;
589 explicit DynamicRelocationSection(ArrayRef<uint8_t> Data) : Contents(Data) {}
591 void accept(SectionVisitor &) const override;
593 static bool classof(const SectionBase *S) {
594 if (!(S->Flags & ELF::SHF_ALLOC))
596 return S->Type == ELF::SHT_REL || S->Type == ELF::SHT_RELA;
600 class GnuDebugLinkSection : public SectionBase {
601 MAKE_SEC_WRITER_FRIEND
607 void init(StringRef File, StringRef Data);
610 // If we add this section from an external source we can use this ctor.
611 explicit GnuDebugLinkSection(StringRef File);
612 void accept(SectionVisitor &Visitor) const override;
618 virtual std::unique_ptr<Object> create() const = 0;
621 using object::Binary;
622 using object::ELFFile;
623 using object::ELFObjectFile;
624 using object::OwningBinary;
626 template <class ELFT> class ELFBuilder {
628 using Elf_Addr = typename ELFT::Addr;
629 using Elf_Shdr = typename ELFT::Shdr;
630 using Elf_Ehdr = typename ELFT::Ehdr;
631 using Elf_Word = typename ELFT::Word;
633 const ELFFile<ELFT> &ElfFile;
636 void setParentSegment(Segment &Child);
637 void readProgramHeaders();
638 void initGroupSection(GroupSection *GroupSec);
639 void initSymbolTable(SymbolTableSection *SymTab);
640 void readSectionHeaders();
641 SectionBase &makeSection(const Elf_Shdr &Shdr);
644 ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj)
645 : ElfFile(*ElfObj.getELFFile()), Obj(Obj) {}
650 class ELFReader : public Reader {
654 ElfType getElfType() const;
655 std::unique_ptr<Object> create() const override;
656 explicit ELFReader(Binary *B) : Bin(B){};
661 using SecPtr = std::unique_ptr<SectionBase>;
662 using SegPtr = std::unique_ptr<Segment>;
664 std::vector<SecPtr> Sections;
665 std::vector<SegPtr> Segments;
669 using Range = iterator_range<
670 pointee_iterator<typename std::vector<std::unique_ptr<T>>::iterator>>;
673 using ConstRange = iterator_range<pointee_iterator<
674 typename std::vector<std::unique_ptr<T>>::const_iterator>>;
676 // It is often the case that the ELF header and the program header table are
677 // not present in any segment. This could be a problem during file layout,
678 // because other segments may get assigned an offset where either of the
679 // two should reside, which will effectively corrupt the resulting binary.
680 // Other than that we use these segments to track program header offsets
681 // when they may not follow the ELF header.
682 Segment ElfHdrSegment;
683 Segment ProgramHdrSegment;
693 StringTableSection *SectionNames = nullptr;
694 SymbolTableSection *SymbolTable = nullptr;
695 SectionIndexSection *SectionIndexTable = nullptr;
698 SectionTableRef sections() { return SectionTableRef(Sections); }
699 ConstRange<SectionBase> sections() const {
700 return make_pointee_range(Sections);
702 Range<Segment> segments() { return make_pointee_range(Segments); }
703 ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
705 void removeSections(std::function<bool(const SectionBase &)> ToRemove);
706 void removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
707 template <class T, class... Ts> T &addSection(Ts &&... Args) {
708 auto Sec = llvm::make_unique<T>(std::forward<Ts>(Args)...);
709 auto Ptr = Sec.get();
710 Sections.emplace_back(std::move(Sec));
713 Segment &addSegment(ArrayRef<uint8_t> Data) {
714 Segments.emplace_back(llvm::make_unique<Segment>(Data));
715 return *Segments.back();
718 } // end namespace objcopy
719 } // end namespace llvm
721 #endif // LLVM_TOOLS_OBJCOPY_OBJECT_H