//===- DwarfTransformer.cpp -----------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include #include #include "llvm/DebugInfo/DIContext.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Support/Error.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" #include "llvm/DebugInfo/GSYM/FunctionInfo.h" #include "llvm/DebugInfo/GSYM/GsymCreator.h" #include "llvm/DebugInfo/GSYM/GsymReader.h" #include "llvm/DebugInfo/GSYM/InlineInfo.h" using namespace llvm; using namespace gsym; struct llvm::gsym::CUInfo { const DWARFDebugLine::LineTable *LineTable; const char *CompDir; std::vector FileCache; uint64_t Language = 0; uint8_t AddrSize = 0; CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { LineTable = DICtx.getLineTableForUnit(CU); CompDir = CU->getCompilationDir(); FileCache.clear(); if (LineTable) FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); DWARFDie Die = CU->getUnitDIE(); Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); AddrSize = CU->getAddressByteSize(); } /// Return true if Addr is the highest address for a given compile unit. The /// highest address is encoded as -1, of all ones in the address. These high /// addresses are used by some linkers to indicate that a function has been /// dead stripped or didn't end up in the linked executable. bool isHighestAddress(uint64_t Addr) const { if (AddrSize == 4) return Addr == UINT32_MAX; else if (AddrSize == 8) return Addr == UINT64_MAX; return false; } /// Convert a DWARF compile unit file index into a GSYM global file index. /// /// Each compile unit in DWARF has its own file table in the line table /// prologue. GSYM has a single large file table that applies to all files /// from all of the info in a GSYM file. This function converts between the /// two and caches and DWARF CU file index that has already been converted so /// the first client that asks for a compile unit file index will end up /// doing the conversion, and subsequent clients will get the cached GSYM /// index. uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) { if (!LineTable) return 0; assert(DwarfFileIdx < FileCache.size()); uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; if (GsymFileIdx != UINT32_MAX) return GsymFileIdx; std::string File; if (LineTable->getFileNameByIndex( DwarfFileIdx, CompDir, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) GsymFileIdx = Gsym.insertFile(File); else GsymFileIdx = 0; return GsymFileIdx; } }; static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { if (DWARFDie SpecDie = Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) return SpecParent; } if (DWARFDie AbstDie = Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) return AbstParent; } // We never want to follow parent for inlined subroutine - that would // give us information about where the function is inlined, not what // function is inlined if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) return DWARFDie(); DWARFDie ParentDie = Die.getParent(); if (!ParentDie) return DWARFDie(); switch (ParentDie.getTag()) { case dwarf::DW_TAG_namespace: case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: case dwarf::DW_TAG_subprogram: return ParentDie; // Found parent decl context DIE case dwarf::DW_TAG_lexical_block: return GetParentDeclContextDIE(ParentDie); default: break; } return DWARFDie(); } /// Get the GsymCreator string table offset for the qualified name for the /// DIE passed in. This function will avoid making copies of any strings in /// the GsymCreator when possible. We don't need to copy a string when the /// string comes from our .debug_str section or is an inlined string in the /// .debug_info. If we create a qualified name string in this function by /// combining multiple strings in the DWARF string table or info, we will make /// a copy of the string when we add it to the string table. static Optional getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) { // If the dwarf has mangled name, use mangled name if (auto LinkageName = dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_AT_linkage_name}), nullptr)) return Gsym.insertString(LinkageName, /* Copy */ false); StringRef ShortName(Die.getName(DINameKind::ShortName)); if (ShortName.empty()) return llvm::None; // For C++ and ObjC, prepend names of all parent declaration contexts if (!(Language == dwarf::DW_LANG_C_plus_plus || Language == dwarf::DW_LANG_C_plus_plus_03 || Language == dwarf::DW_LANG_C_plus_plus_11 || Language == dwarf::DW_LANG_C_plus_plus_14 || Language == dwarf::DW_LANG_ObjC_plus_plus || // This should not be needed for C, but we see C++ code marked as C // in some binaries. This should hurt, so let's do it for C as well Language == dwarf::DW_LANG_C)) return Gsym.insertString(ShortName, /* Copy */ false); // Some GCC optimizations create functions with names ending with .isra. // or .part. and those names are just DW_AT_name, not DW_AT_linkage_name // If it looks like it could be the case, don't add any prefix if (ShortName.startswith("_Z") && (ShortName.contains(".isra.") || ShortName.contains(".part."))) return Gsym.insertString(ShortName, /* Copy */ false); DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); if (ParentDeclCtxDie) { std::string Name = ShortName.str(); while (ParentDeclCtxDie) { StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); if (!ParentName.empty()) { // "lambda" names are wrapped in < >. Replace with { } // to be consistent with demangled names and not to confuse with // templates if (ParentName.front() == '<' && ParentName.back() == '>') Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + "::" + Name; else Name = ParentName.str() + "::" + Name; } ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); } // Copy the name since we created a new name in a std::string. return Gsym.insertString(Name, /* Copy */ true); } // Don't copy the name since it exists in the DWARF object file. return Gsym.insertString(ShortName, /* Copy */ false); } static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { bool CheckChildren = true; switch (Die.getTag()) { case dwarf::DW_TAG_subprogram: // Don't look into functions within functions. CheckChildren = Depth == 0; break; case dwarf::DW_TAG_inlined_subroutine: return true; default: break; } if (!CheckChildren) return false; for (DWARFDie ChildDie : Die.children()) { if (hasInlineInfo(ChildDie, Depth + 1)) return true; } return false; } static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, uint32_t Depth, FunctionInfo &FI, InlineInfo &parent) { if (!hasInlineInfo(Die, Depth)) return; dwarf::Tag Tag = Die.getTag(); if (Tag == dwarf::DW_TAG_inlined_subroutine) { // create new InlineInfo and append to parent.children InlineInfo II; DWARFAddressRange FuncRange = DWARFAddressRange(FI.startAddress(), FI.endAddress()); Expected RangesOrError = Die.getAddressRanges(); if (RangesOrError) { for (const DWARFAddressRange &Range : RangesOrError.get()) { // Check that the inlined function is within the range of the function // info, it might not be in case of split functions if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC) II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC)); } } if (II.Ranges.empty()) return; if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) II.Name = *NameIndex; II.CallFile = CUI.DWARFToGSYMFileIndex( Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); // parse all children and append to parent for (DWARFDie ChildDie : Die.children()) parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II); parent.Children.emplace_back(std::move(II)); return; } if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { // skip this Die and just recurse down for (DWARFDie ChildDie : Die.children()) parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent); } } static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI, DWARFDie Die, GsymCreator &Gsym, FunctionInfo &FI) { std::vector RowVector; const uint64_t StartAddress = FI.startAddress(); const uint64_t EndAddress = FI.endAddress(); const uint64_t RangeSize = EndAddress - StartAddress; const object::SectionedAddress SecAddress{ StartAddress, object::SectionedAddress::UndefSection}; if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { // If we have a DW_TAG_subprogram but no line entries, fall back to using // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. if (auto FileIdx = dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) { if (auto Line = dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx), *Line); FI.OptLineTable = LineTable(); FI.OptLineTable->push(LE); // LE.Addr = EndAddress; // FI.OptLineTable->push(LE); } } return; } FI.OptLineTable = LineTable(); DWARFDebugLine::Row PrevRow; for (uint32_t RowIndex : RowVector) { // Take file number and line/column from the row. const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); uint64_t RowAddress = Row.Address.Address; // Watch out for a RowAddress that is in the middle of a line table entry // in the DWARF. If we pass an address in between two line table entries // we will get a RowIndex for the previous valid line table row which won't // be contained in our function. This is usually a bug in the DWARF due to // linker problems or LTO or other DWARF re-linking so it is worth emitting // an error, but not worth stopping the creation of the GSYM. if (!FI.Range.contains(RowAddress)) { if (RowAddress < FI.Range.Start) { Log << "error: DIE has a start address whose LowPC is between the " "line table Row[" << RowIndex << "] with address " << HEX64(RowAddress) << " and the next one.\n"; Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); RowAddress = FI.Range.Start; } else { continue; } } LineEntry LE(RowAddress, FileIdx, Row.Line); if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { // We have seen full duplicate line tables for functions in some // DWARF files. Watch for those here by checking the the last // row was the function's end address (HighPC) and that the // current line table entry's address is the same as the first // line entry we already have in our "function_info.Lines". If // so break out after printing a warning. auto FirstLE = FI.OptLineTable->first(); if (FirstLE && *FirstLE == LE) { Log << "warning: duplicate line table detected for DIE:\n"; Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); } else { // Print out (ignore if os == nulls as this is expensive) Log << "error: line table has addresses that do not " << "monotonically increase:\n"; for (uint32_t RowIndex2 : RowVector) { CUI.LineTable->Rows[RowIndex2].dump(Log); } Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); } break; } // Skip multiple line entries for the same file and line. auto LastLE = FI.OptLineTable->last(); if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) continue; // Only push a row if it isn't an end sequence. End sequence markers are // included for the last address in a function or the last contiguous // address in a sequence. if (Row.EndSequence) { // End sequence means that the next line entry could have a lower address // that the previous entries. So we clear the previous row so we don't // trigger the line table error about address that do not monotonically // increase. PrevRow = DWARFDebugLine::Row(); } else { FI.OptLineTable->push(LE); PrevRow = Row; } } // If not line table rows were added, clear the line table so we don't encode // on in the GSYM file. if (FI.OptLineTable->empty()) FI.OptLineTable = llvm::None; } void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { switch (Die.getTag()) { case dwarf::DW_TAG_subprogram: { Expected RangesOrError = Die.getAddressRanges(); if (!RangesOrError) { consumeError(RangesOrError.takeError()); break; } const DWARFAddressRangesVector &Ranges = RangesOrError.get(); if (Ranges.empty()) break; auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); if (!NameIndex) { OS << "error: function at " << HEX64(Die.getOffset()) << " has no name\n "; Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); break; } // Create a function_info for each range for (const DWARFAddressRange &Range : Ranges) { // The low PC must be less than the high PC. Many linkers don't remove // DWARF for functions that don't get linked into the final executable. // If both the high and low pc have relocations, linkers will often set // the address values for both to the same value to indicate the function // has been remove. Other linkers have been known to set the one or both // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 // byte addresses to indicate the function isn't valid. The check below // tries to watch for these cases and abort if it runs into them. if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) break; // Many linkers can't remove DWARF and might set the LowPC to zero. Since // high PC can be an offset from the low PC in more recent DWARF versions // we need to watch for a zero'ed low pc which we do using // ValidTextRanges below. if (!Gsym.IsValidTextAddress(Range.LowPC)) { // We expect zero and -1 to be invalid addresses in DWARF depending // on the linker of the DWARF. This indicates a function was stripped // and the debug info wasn't able to be stripped from the DWARF. If // the LowPC isn't zero or -1, then we should emit an error. if (Range.LowPC != 0) { // Unexpected invalid address, emit an error Log << "warning: DIE has an address range whose start address is " "not in any executable sections (" << *Gsym.GetValidTextRanges() << ") and will not be processed:\n"; Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); } break; } FunctionInfo FI; FI.setStartAddress(Range.LowPC); FI.setEndAddress(Range.HighPC); FI.Name = *NameIndex; if (CUI.LineTable) { convertFunctionLineTable(OS, CUI, Die, Gsym, FI); } if (hasInlineInfo(Die, 0)) { FI.Inline = InlineInfo(); FI.Inline->Name = *NameIndex; FI.Inline->Ranges.insert(FI.Range); parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline); } Gsym.addFunctionInfo(std::move(FI)); } } break; default: break; } for (DWARFDie ChildDie : Die.children()) handleDie(OS, CUI, ChildDie); } Error DwarfTransformer::convert(uint32_t NumThreads) { size_t NumBefore = Gsym.getNumFunctionInfos(); if (NumThreads == 1) { // Parse all DWARF data from this thread, use the same string/file table // for everything for (const auto &CU : DICtx.compile_units()) { DWARFDie Die = CU->getUnitDIE(false); CUInfo CUI(DICtx, dyn_cast(CU.get())); handleDie(Log, CUI, Die); } } else { // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up // front before we start accessing any DIEs since there might be // cross compile unit references in the DWARF. If we don't do this we can // end up crashing. // We need to call getAbbreviations sequentially first so that getUnitDIE() // only works with its local data. for (const auto &CU : DICtx.compile_units()) CU->getAbbreviations(); // Now parse all DIEs in case we have cross compile unit references in a // thread pool. ThreadPool pool(hardware_concurrency(NumThreads)); for (const auto &CU : DICtx.compile_units()) pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); pool.wait(); // Now convert all DWARF to GSYM in a thread pool. std::mutex LogMutex; for (const auto &CU : DICtx.compile_units()) { DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); if (Die) { CUInfo CUI(DICtx, dyn_cast(CU.get())); pool.async([this, CUI, &LogMutex, Die]() mutable { std::string ThreadLogStorage; raw_string_ostream ThreadOS(ThreadLogStorage); handleDie(ThreadOS, CUI, Die); ThreadOS.flush(); if (!ThreadLogStorage.empty()) { // Print ThreadLogStorage lines into an actual stream under a lock std::lock_guard guard(LogMutex); Log << ThreadLogStorage; } }); } } pool.wait(); } size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; return Error::success(); } llvm::Error DwarfTransformer::verify(StringRef GsymPath) { Log << "Verifying GSYM file \"" << GsymPath << "\":\n"; auto Gsym = GsymReader::openFile(GsymPath); if (!Gsym) return Gsym.takeError(); auto NumAddrs = Gsym->getNumAddresses(); DILineInfoSpecifier DLIS( DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, DILineInfoSpecifier::FunctionNameKind::LinkageName); std::string gsymFilename; for (uint32_t I = 0; I < NumAddrs; ++I) { auto FuncAddr = Gsym->getAddress(I); if (!FuncAddr) return createStringError(std::errc::invalid_argument, "failed to extract address[%i]", I); auto FI = Gsym->getFunctionInfo(*FuncAddr); if (!FI) return createStringError(std::errc::invalid_argument, "failed to extract function info for address 0x%" PRIu64, *FuncAddr); for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { const object::SectionedAddress SectAddr{ Addr, object::SectionedAddress::UndefSection}; auto LR = Gsym->lookup(Addr); if (!LR) return LR.takeError(); auto DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); if (NumDwarfInlineInfos == 0) { DwarfInlineInfos.addFrame( DICtx.getLineInfoForAddress(SectAddr, DLIS)); } // Check for 1 entry that has no file and line info if (NumDwarfInlineInfos == 1 && DwarfInlineInfos.getFrame(0).FileName == "") { DwarfInlineInfos = DIInliningInfo(); NumDwarfInlineInfos = 0; } if (NumDwarfInlineInfos > 0 && NumDwarfInlineInfos != LR->Locations.size()) { Log << "error: address " << HEX64(Addr) << " has " << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " << LR->Locations.size() << "\n"; Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { const auto dii = DwarfInlineInfos.getFrame(Idx); Log << " [" << Idx << "]: " << dii.FunctionName << " @ " << dii.FileName << ':' << dii.Line << '\n'; } Log << " " << LR->Locations.size() << " GSYM frames:\n"; for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; ++Idx) { const auto &gii = LR->Locations[Idx]; Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir << '/' << gii.Base << ':' << gii.Line << '\n'; } DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); Gsym->dump(Log, *FI); continue; } for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; ++Idx) { const auto &gii = LR->Locations[Idx]; if (Idx < NumDwarfInlineInfos) { const auto dii = DwarfInlineInfos.getFrame(Idx); gsymFilename = LR->getSourceFile(Idx); // Verify function name if (dii.FunctionName.find(gii.Name.str()) != 0) Log << "error: address " << HEX64(Addr) << " DWARF function \"" << dii.FunctionName.c_str() << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; // Verify source file path if (dii.FileName != gsymFilename) Log << "error: address " << HEX64(Addr) << " DWARF path \"" << dii.FileName.c_str() << "\" doesn't match GSYM path \"" << gsymFilename.c_str() << "\"\n"; // Verify source file line if (dii.Line != gii.Line) Log << "error: address " << HEX64(Addr) << " DWARF line " << dii.Line << " != GSYM line " << gii.Line << "\n"; } } } } return Error::success(); }