//===- GdbIndex.cpp -------------------------------------------------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // File contains classes for implementation of --gdb-index command line option. // // If that option is used, linker should emit a .gdb_index section that allows // debugger to locate and read .dwo files, containing neccessary debug // information. // More information about implementation can be found in DWARF specification, // latest version is available at http://dwarfstd.org. // // .gdb_index section format: // (Information is based on/taken from // https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html (*)) // // A mapped index consists of several areas, laid out in order: // 1) The file header. // 2) "The CU (compilation unit) list. This is a sequence of pairs of 64-bit // little-endian values, sorted by the CU offset. The first element in each // pair is the offset of a CU in the .debug_info section. The second element // in each pair is the length of that CU. References to a CU elsewhere in the // map are done using a CU index, which is just the 0-based index into this // table. Note that if there are type CUs, then conceptually CUs and type CUs // form a single list for the purposes of CU indices."(*) // 3) The types CU list. Depricated as .debug_types does not appear in the DWARF // v5 specification. // 4) The address area. The address area is a sequence of address // entries, where each entrie contains low address, high address and CU // index. // 5) "The symbol table. This is an open-addressed hash table. The size of the // hash table is always a power of 2. Each slot in the hash table consists of // a pair of offset_type values. The first value is the offset of the // symbol's name in the constant pool. The second value is the offset of the // CU vector in the constant pool."(*) // 6) "The constant pool. This is simply a bunch of bytes. It is organized so // that alignment is correct: CU vectors are stored first, followed by // strings." (*) // // For constructing the .gdb_index section following steps should be performed: // 1) For file header nothing special should be done. It contains the offsets to // the areas below. // 2) Scan the compilation unit headers of the .debug_info sections to build a // list of compilation units. // 3) CU Types are no longer needed as DWARF skeleton type units never made it // into the standard. lld does nothing to support parsing of .debug_types // and generates empty types CU area in .gdb_index section. // 4) Address area entries are extracted from DW_TAG_compile_unit DIEs of // .debug_info sections. // 5) For building the symbol table linker extracts the public names from the // .debug_gnu_pubnames and .debug_gnu_pubtypes sections. Then it builds the // hashtable in according to .gdb_index format specification. // 6) Constant pool is populated at the same time as symbol table. //===----------------------------------------------------------------------===// #include "GdbIndex.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" #include "llvm/Object/ELFObjectFile.h" using namespace llvm; using namespace llvm::object; using namespace lld::elf; template GdbIndexBuilder::GdbIndexBuilder(InputSection *DebugInfoSec) : DebugInfoSec(DebugInfoSec) { if (Expected> Obj = object::ObjectFile::createObjectFile(DebugInfoSec->getFile()->MB)) Dwarf.reset(new DWARFContextInMemory(*Obj.get(), this)); else error(toString(DebugInfoSec->getFile()) + ": error creating DWARF context"); } template std::vector> GdbIndexBuilder::readCUList() { std::vector> Ret; for (std::unique_ptr &CU : Dwarf->compile_units()) Ret.push_back( {DebugInfoSec->OutSecOff + CU->getOffset(), CU->getLength() + 4}); return Ret; } template std::vector> GdbIndexBuilder::readPubNamesAndTypes() { const bool IsLE = ELFT::TargetEndianness == llvm::support::little; StringRef Data[] = {Dwarf->getGnuPubNamesSection(), Dwarf->getGnuPubTypesSection()}; std::vector> Ret; for (StringRef D : Data) { DWARFDebugPubTable PubTable(D, IsLE, true); for (const DWARFDebugPubTable::Set &S : PubTable.getData()) for (const DWARFDebugPubTable::Entry &E : S.Entries) Ret.push_back({E.Name, E.Descriptor.toBits()}); } return Ret; } std::pair GdbHashTab::add(uint32_t Hash, size_t Offset) { if (Size * 4 / 3 >= Table.size()) expand(); GdbSymbol **Slot = findSlot(Hash, Offset); bool New = false; if (*Slot == nullptr) { ++Size; *Slot = new (Alloc) GdbSymbol(Hash, Offset); New = true; } return {New, *Slot}; } void GdbHashTab::expand() { if (Table.empty()) { Table.resize(InitialSize); return; } std::vector NewTable(Table.size() * 2); NewTable.swap(Table); for (GdbSymbol *Sym : NewTable) { if (!Sym) continue; GdbSymbol **Slot = findSlot(Sym->NameHash, Sym->NameOffset); *Slot = Sym; } } // Methods finds a slot for symbol with given hash. The step size used to find // the next candidate slot when handling a hash collision is specified in // .gdb_index section format. The hash value for a table entry is computed by // applying an iterative hash function to the symbol's name. GdbSymbol **GdbHashTab::findSlot(uint32_t Hash, size_t Offset) { uint32_t Index = Hash & (Table.size() - 1); uint32_t Step = ((Hash * 17) & (Table.size() - 1)) | 1; for (;;) { GdbSymbol *S = Table[Index]; if (!S || ((S->NameOffset == Offset) && (S->NameHash == Hash))) return &Table[Index]; Index = (Index + Step) & (Table.size() - 1); } } template static InputSectionBase * findSection(ArrayRef *> Arr, uint64_t Offset) { for (InputSectionBase *S : Arr) if (S && S != &InputSection::Discarded) if (Offset >= S->Offset && Offset < S->Offset + S->getSize()) return S; return nullptr; } template std::vector> GdbIndexBuilder::readAddressArea(size_t CurrentCU) { std::vector> Ret; for (const auto &CU : Dwarf->compile_units()) { DWARFAddressRangesVector Ranges; CU->collectAddressRanges(Ranges); ArrayRef *> Sections = DebugInfoSec->getFile()->getSections(); for (std::pair &R : Ranges) if (InputSectionBase *S = findSection(Sections, R.first)) Ret.push_back( {S, R.first - S->Offset, R.second - S->Offset, CurrentCU}); ++CurrentCU; } return Ret; } // We return file offset as load address for allocatable sections. That is // currently used for collecting address ranges in readAddressArea(). We are // able then to find section index that range belongs to. template uint64_t GdbIndexBuilder::getSectionLoadAddress( const object::SectionRef &Sec) const { if (static_cast(Sec).getFlags() & ELF::SHF_ALLOC) return static_cast(Sec).getOffset(); return 0; } template std::unique_ptr GdbIndexBuilder::clone() const { return {}; } namespace lld { namespace elf { template class GdbIndexBuilder; template class GdbIndexBuilder; template class GdbIndexBuilder; template class GdbIndexBuilder; } }