1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/PDB/Native/InputFile.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/BinaryFormat/Magic.h"
13 #include "llvm/DebugInfo/CodeView/CodeView.h"
14 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
15 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
16 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
17 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
18 #include "llvm/DebugInfo/PDB/Native/FormatUtil.h"
19 #include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23 #include "llvm/DebugInfo/PDB/Native/RawError.h"
24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25 #include "llvm/DebugInfo/PDB/PDB.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/FormatVariadic.h"
31 using namespace llvm::codeview;
32 using namespace llvm::object;
33 using namespace llvm::pdb;
35 InputFile::InputFile() = default;
36 InputFile::~InputFile() = default;
38 Expected<ModuleDebugStreamRef>
39 llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName,
41 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
43 return DbiOrErr.takeError();
44 DbiStream &Dbi = *DbiOrErr;
45 const auto &Modules = Dbi.modules();
46 if (Index >= Modules.getModuleCount())
47 return make_error<RawError>(raw_error_code::index_out_of_bounds,
48 "Invalid module index");
50 auto Modi = Modules.getModuleDescriptor(Index);
52 ModuleName = Modi.getModuleName();
54 uint16_t ModiStream = Modi.getModuleStreamIndex();
55 if (ModiStream == kInvalidStreamIndex)
56 return make_error<RawError>(raw_error_code::no_stream,
57 "Module stream not present");
59 auto ModStreamData = File.createIndexedStream(ModiStream);
61 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
62 if (auto EC = ModS.reload())
63 return make_error<RawError>(raw_error_code::corrupt_file,
64 "Invalid module stream");
66 return std::move(ModS);
69 Expected<ModuleDebugStreamRef> llvm::pdb::getModuleDebugStream(PDBFile &File,
71 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
73 return DbiOrErr.takeError();
74 DbiStream &Dbi = *DbiOrErr;
75 const auto &Modules = Dbi.modules();
76 auto Modi = Modules.getModuleDescriptor(Index);
78 uint16_t ModiStream = Modi.getModuleStreamIndex();
79 if (ModiStream == kInvalidStreamIndex)
80 return make_error<RawError>(raw_error_code::no_stream,
81 "Module stream not present");
83 auto ModStreamData = File.createIndexedStream(ModiStream);
85 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
86 if (Error Err = ModS.reload())
87 return make_error<RawError>(raw_error_code::corrupt_file,
88 "Invalid module stream");
90 return std::move(ModS);
93 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
95 BinaryStreamReader &Reader) {
96 if (Expected<StringRef> NameOrErr = Section.getName()) {
97 if (*NameOrErr != Name)
100 consumeError(NameOrErr.takeError());
104 Expected<StringRef> ContentsOrErr = Section.getContents();
105 if (!ContentsOrErr) {
106 consumeError(ContentsOrErr.takeError());
110 Reader = BinaryStreamReader(*ContentsOrErr, support::little);
112 if (Reader.bytesRemaining() < sizeof(uint32_t))
114 cantFail(Reader.readInteger(Magic));
115 if (Magic != COFF::DEBUG_SECTION_MAGIC)
120 static inline bool isDebugSSection(object::SectionRef Section,
121 DebugSubsectionArray &Subsections) {
122 BinaryStreamReader Reader;
123 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
126 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
130 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
131 BinaryStreamReader Reader;
132 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
133 !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
135 cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
139 static std::string formatChecksumKind(FileChecksumKind Kind) {
141 RETURN_CASE(FileChecksumKind, None, "None");
142 RETURN_CASE(FileChecksumKind, MD5, "MD5");
143 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
144 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
146 return formatUnknownEnum(Kind);
149 template <typename... Args>
150 static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
152 Printer.format(std::forward<Args>(args)...);
154 Printer.formatLine(std::forward<Args>(args)...);
157 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
162 initializeForPdb(GroupIndex);
166 for (const auto &S : File->obj().sections()) {
167 DebugSubsectionArray SS;
168 if (!isDebugSSection(S, SS))
171 if (!SC.hasChecksums() || !SC.hasStrings())
177 if (SC.hasChecksums() && SC.hasStrings())
180 rebuildChecksumMap();
184 StringRef SymbolGroup::name() const { return Name; }
186 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
190 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
192 void SymbolGroup::initializeForPdb(uint32_t Modi) {
193 assert(File && File->isPdb());
195 // PDB always uses the same string table, but each module has its own
196 // checksums. So we only set the strings if they're not already set.
197 if (!SC.hasStrings()) {
198 auto StringTable = File->pdb().getStringTable();
200 SC.setStrings(StringTable->getStringTable());
202 consumeError(StringTable.takeError());
206 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
208 consumeError(MDS.takeError());
212 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
213 Subsections = DebugStream->getSubsectionsArray();
214 SC.initialize(Subsections);
215 rebuildChecksumMap();
218 void SymbolGroup::rebuildChecksumMap() {
219 if (!SC.hasChecksums())
222 for (const auto &Entry : SC.checksums()) {
223 auto S = SC.strings().getString(Entry.FileNameOffset);
226 ChecksumsByFile[*S] = Entry;
230 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
231 assert(File && File->isPdb() && DebugStream);
235 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
236 return SC.strings().getString(Offset);
239 Expected<StringRef> SymbolGroup::getNameFromChecksums(uint32_t Offset) const {
241 if (!SC.hasChecksums()) {
242 return std::move(Name);
245 auto Iter = SC.checksums().getArray().at(Offset);
246 if (Iter == SC.checksums().getArray().end()) {
247 return std::move(Name);
250 uint32_t FO = Iter->FileNameOffset;
251 auto ExpectedFile = getNameFromStringTable(FO);
253 return std::move(Name);
256 return *ExpectedFile;
259 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
261 auto FC = ChecksumsByFile.find(File);
262 if (FC == ChecksumsByFile.end()) {
263 formatInternal(Printer, Append, "- (no checksum) {0}", File);
267 formatInternal(Printer, Append, "- ({0}: {1}) {2}",
268 formatChecksumKind(FC->getValue().Kind),
269 toHex(FC->getValue().Checksum), File);
272 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
275 if (!SC.hasChecksums()) {
276 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
280 auto Iter = SC.checksums().getArray().at(Offset);
281 if (Iter == SC.checksums().getArray().end()) {
282 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
286 uint32_t FO = Iter->FileNameOffset;
287 auto ExpectedFile = getNameFromStringTable(FO);
289 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
290 consumeError(ExpectedFile.takeError());
293 if (Iter->Kind == FileChecksumKind::None) {
294 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
296 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
297 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
301 Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
303 if (!llvm::sys::fs::exists(Path))
304 return make_error<StringError>(formatv("File {0} not found", Path),
305 inconvertibleErrorCode());
308 if (auto EC = identify_magic(Path, Magic))
309 return make_error<StringError>(
310 formatv("Unable to identify file type for file {0}", Path), EC);
312 if (Magic == file_magic::coff_object) {
313 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
315 return BinaryOrErr.takeError();
317 IF.CoffObject = std::move(*BinaryOrErr);
318 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
319 return std::move(IF);
322 if (Magic == file_magic::pdb) {
323 std::unique_ptr<IPDBSession> Session;
324 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
325 return std::move(Err);
327 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
328 IF.PdbOrObj = &IF.PdbSession->getPDBFile();
330 return std::move(IF);
333 if (!AllowUnknownFile)
334 return make_error<StringError>(
335 formatv("File {0} is not a supported file type", Path),
336 inconvertibleErrorCode());
338 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
339 /*RequiresNullTerminator=*/false);
341 return make_error<StringError>(
342 formatv("File {0} could not be opened", Path), Result.getError());
344 IF.UnknownFile = std::move(*Result);
345 IF.PdbOrObj = IF.UnknownFile.get();
346 return std::move(IF);
349 PDBFile &InputFile::pdb() {
351 return *cast<PDBFile *>(PdbOrObj);
354 const PDBFile &InputFile::pdb() const {
356 return *cast<PDBFile *>(PdbOrObj);
359 object::COFFObjectFile &InputFile::obj() {
361 return *cast<object::COFFObjectFile *>(PdbOrObj);
364 const object::COFFObjectFile &InputFile::obj() const {
366 return *cast<object::COFFObjectFile *>(PdbOrObj);
369 MemoryBuffer &InputFile::unknown() {
371 return *cast<MemoryBuffer *>(PdbOrObj);
374 const MemoryBuffer &InputFile::unknown() const {
376 return *cast<MemoryBuffer *>(PdbOrObj);
379 StringRef InputFile::getFilePath() const {
381 return pdb().getFilePath();
383 return obj().getFileName();
385 return unknown().getBufferIdentifier();
388 bool InputFile::hasTypes() const {
390 return pdb().hasPDBTpiStream();
392 for (const auto &Section : obj().sections()) {
394 if (isDebugTSection(Section, Types))
400 bool InputFile::hasIds() const {
403 return pdb().hasPDBIpiStream();
406 bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }
408 bool InputFile::isObj() const {
409 return isa<object::COFFObjectFile *>(PdbOrObj);
412 bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }
414 codeview::LazyRandomTypeCollection &
415 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
416 if (Types && Kind == kTypes)
418 if (Ids && Kind == kIds)
422 assert(isPdb() && pdb().hasPDBIpiStream());
425 // If the collection was already initialized, we should have just returned it
428 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
429 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
430 : pdb().getPDBTpiStream());
432 auto &Array = Stream.typeArray();
433 uint32_t Count = Stream.getNumTypeRecords();
434 auto Offsets = Stream.getTypeIndexOffsets();
436 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
441 assert(Kind == kTypes);
444 for (const auto &Section : obj().sections()) {
446 if (!isDebugTSection(Section, Records))
449 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
453 Types = std::make_unique<LazyRandomTypeCollection>(100);
457 codeview::LazyRandomTypeCollection &InputFile::types() {
458 return getOrCreateTypeCollection(kTypes);
461 codeview::LazyRandomTypeCollection &InputFile::ids() {
462 // Object files have only one type stream that contains both types and ids.
463 // Similarly, some PDBs don't contain an IPI stream, and for those both types
464 // and IDs are in the same stream.
465 if (isObj() || !pdb().hasPDBIpiStream())
468 return getOrCreateTypeCollection(kIds);
471 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
472 return make_range<SymbolGroupIterator>(symbol_groups_begin(),
473 symbol_groups_end());
476 SymbolGroupIterator InputFile::symbol_groups_begin() {
477 return SymbolGroupIterator(*this);
480 SymbolGroupIterator InputFile::symbol_groups_end() {
481 return SymbolGroupIterator();
484 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
486 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
488 SectionIter = File.obj().section_begin();
493 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
499 if (Value.File != R.Value.File)
501 return Index == R.Index;
504 const SymbolGroup &SymbolGroupIterator::operator*() const {
508 SymbolGroup &SymbolGroupIterator::operator*() {
513 SymbolGroupIterator &SymbolGroupIterator::operator++() {
514 assert(Value.File && !isEnd());
519 if (Value.File->isPdb()) {
520 Value.updatePdbModi(Index);
528 void SymbolGroupIterator::scanToNextDebugS() {
530 auto End = Value.File->obj().section_end();
531 auto &Iter = *SectionIter;
534 while (++Iter != End) {
535 DebugSubsectionArray SS;
536 SectionRef SR = *Iter;
537 if (!isDebugSSection(SR, SS))
540 Value.updateDebugS(SS);
545 bool SymbolGroupIterator::isEnd() const {
548 if (Value.File->isPdb()) {
549 DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
550 uint32_t Count = Dbi.modules().getModuleCount();
551 assert(Index <= Count);
552 return Index == Count;
556 return *SectionIter == Value.File->obj().section_end();
559 static bool isMyCode(const SymbolGroup &Group) {
560 if (Group.getFile().isObj())
563 StringRef Name = Group.name();
564 if (Name.startswith("Import:"))
566 if (Name.ends_with_insensitive(".dll"))
568 if (Name.equals_insensitive("* linker *"))
570 if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))
572 if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))
577 bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group,
578 const FilterOptions &Filters) {
579 if (Filters.JustMyCode && !isMyCode(Group))
582 // If the arg was not specified on the command line, always dump all modules.
583 if (!Filters.DumpModi)
586 // Otherwise, only dump if this is the same module specified.
587 return (Filters.DumpModi == Idx);