1 //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "InputFile.h"
12 #include "FormatUtil.h"
13 #include "LinePrinter.h"
15 #include "llvm/BinaryFormat/Magic.h"
16 #include "llvm/DebugInfo/CodeView/CodeView.h"
17 #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
18 #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
19 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
20 #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
21 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
22 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
23 #include "llvm/DebugInfo/PDB/Native/RawError.h"
24 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
25 #include "llvm/DebugInfo/PDB/PDB.h"
26 #include "llvm/Object/COFF.h"
27 #include "llvm/Support/FileSystem.h"
28 #include "llvm/Support/FormatVariadic.h"
31 using namespace llvm::codeview;
32 using namespace llvm::object;
33 using namespace llvm::pdb;
35 InputFile::InputFile() {}
36 InputFile::~InputFile() {}
38 static Expected<ModuleDebugStreamRef>
39 getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
40 ExitOnError Err("Unexpected error: ");
42 auto &Dbi = Err(File.getPDBDbiStream());
43 const auto &Modules = Dbi.modules();
44 auto Modi = Modules.getModuleDescriptor(Index);
46 ModuleName = Modi.getModuleName();
48 uint16_t ModiStream = Modi.getModuleStreamIndex();
49 if (ModiStream == kInvalidStreamIndex)
50 return make_error<RawError>(raw_error_code::no_stream,
51 "Module stream not present");
53 auto ModStreamData = File.createIndexedStream(ModiStream);
55 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
56 if (auto EC = ModS.reload())
57 return make_error<RawError>(raw_error_code::corrupt_file,
58 "Invalid module stream");
60 return std::move(ModS);
63 static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
65 BinaryStreamReader &Reader) {
66 StringRef SectionName, Contents;
67 if (Section.getName(SectionName))
70 if (SectionName != Name)
73 if (Section.getContents(Contents))
76 Reader = BinaryStreamReader(Contents, support::little);
78 if (Reader.bytesRemaining() < sizeof(uint32_t))
80 cantFail(Reader.readInteger(Magic));
81 if (Magic != COFF::DEBUG_SECTION_MAGIC)
86 static inline bool isDebugSSection(object::SectionRef Section,
87 DebugSubsectionArray &Subsections) {
88 BinaryStreamReader Reader;
89 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
92 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
96 static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
97 BinaryStreamReader Reader;
98 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader))
100 cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
104 static std::string formatChecksumKind(FileChecksumKind Kind) {
106 RETURN_CASE(FileChecksumKind, None, "None");
107 RETURN_CASE(FileChecksumKind, MD5, "MD5");
108 RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
109 RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
111 return formatUnknownEnum(Kind);
114 static const DebugStringTableSubsectionRef &extractStringTable(PDBFile &File) {
115 return cantFail(File.getStringTable()).getStringTable();
118 template <typename... Args>
119 static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
121 Printer.format(std::forward<Args>(args)...);
123 Printer.formatLine(std::forward<Args>(args)...);
126 SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
131 initializeForPdb(GroupIndex);
135 for (const auto &S : File->obj().sections()) {
136 DebugSubsectionArray SS;
137 if (!isDebugSSection(S, SS))
140 if (!SC.hasChecksums() || !SC.hasStrings())
146 if (SC.hasChecksums() && SC.hasStrings())
149 rebuildChecksumMap();
153 StringRef SymbolGroup::name() const { return Name; }
155 void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
159 void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
161 void SymbolGroup::initializeForPdb(uint32_t Modi) {
162 assert(File && File->isPdb());
164 // PDB always uses the same string table, but each module has its own
165 // checksums. So we only set the strings if they're not already set.
166 if (!SC.hasStrings())
167 SC.setStrings(extractStringTable(File->pdb()));
170 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
172 consumeError(MDS.takeError());
176 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
177 Subsections = DebugStream->getSubsectionsArray();
178 SC.initialize(Subsections);
179 rebuildChecksumMap();
182 void SymbolGroup::rebuildChecksumMap() {
183 if (!SC.hasChecksums())
186 for (const auto &Entry : SC.checksums()) {
187 auto S = SC.strings().getString(Entry.FileNameOffset);
190 ChecksumsByFile[*S] = Entry;
194 const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
195 assert(File && File->isPdb() && DebugStream);
199 Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
200 return SC.strings().getString(Offset);
203 void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
205 auto FC = ChecksumsByFile.find(File);
206 if (FC == ChecksumsByFile.end()) {
207 formatInternal(Printer, Append, "- (no checksum) {0}", File);
211 formatInternal(Printer, Append, "- ({0}: {1}) {2}",
212 formatChecksumKind(FC->getValue().Kind),
213 toHex(FC->getValue().Checksum), File);
216 void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
219 if (!SC.hasChecksums()) {
220 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
224 auto Iter = SC.checksums().getArray().at(Offset);
225 if (Iter == SC.checksums().getArray().end()) {
226 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
230 uint32_t FO = Iter->FileNameOffset;
231 auto ExpectedFile = getNameFromStringTable(FO);
233 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
234 consumeError(ExpectedFile.takeError());
237 if (Iter->Kind == FileChecksumKind::None) {
238 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
240 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
241 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
245 Expected<InputFile> InputFile::open(StringRef Path) {
247 if (!llvm::sys::fs::exists(Path))
248 return make_error<StringError>(formatv("File {0} not found", Path),
249 inconvertibleErrorCode());
252 if (auto EC = identify_magic(Path, Magic))
253 return make_error<StringError>(
254 formatv("Unable to identify file type for file {0}", Path), EC);
256 if (Magic == file_magic::coff_object) {
257 Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
259 return BinaryOrErr.takeError();
261 IF.CoffObject = std::move(*BinaryOrErr);
262 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
263 return std::move(IF);
266 if (Magic == file_magic::unknown) {
267 std::unique_ptr<IPDBSession> Session;
268 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
269 return std::move(Err);
271 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
272 IF.PdbOrObj = &IF.PdbSession->getPDBFile();
274 return std::move(IF);
277 return make_error<StringError>(
278 formatv("File {0} is not a supported file type", Path),
279 inconvertibleErrorCode());
282 PDBFile &InputFile::pdb() {
284 return *PdbOrObj.get<PDBFile *>();
287 const PDBFile &InputFile::pdb() const {
289 return *PdbOrObj.get<PDBFile *>();
292 object::COFFObjectFile &InputFile::obj() {
294 return *PdbOrObj.get<object::COFFObjectFile *>();
297 const object::COFFObjectFile &InputFile::obj() const {
299 return *PdbOrObj.get<object::COFFObjectFile *>();
302 bool InputFile::hasTypes() const {
304 return pdb().hasPDBTpiStream();
306 for (const auto &Section : obj().sections()) {
308 if (isDebugTSection(Section, Types))
314 bool InputFile::hasIds() const {
317 return pdb().hasPDBIpiStream();
320 bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
322 bool InputFile::isObj() const {
323 return PdbOrObj.is<object::COFFObjectFile *>();
326 codeview::LazyRandomTypeCollection &
327 InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
328 if (Types && Kind == kTypes)
330 if (Ids && Kind == kIds)
334 assert(isPdb() && pdb().hasPDBIpiStream());
337 // If the collection was already initialized, we should have just returned it
340 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
341 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
342 : pdb().getPDBTpiStream());
344 auto &Array = Stream.typeArray();
345 uint32_t Count = Stream.getNumTypeRecords();
346 auto Offsets = Stream.getTypeIndexOffsets();
348 llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
353 assert(Kind == kTypes);
356 for (const auto &Section : obj().sections()) {
358 if (!isDebugTSection(Section, Records))
361 Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100);
365 Types = llvm::make_unique<LazyRandomTypeCollection>(100);
369 codeview::LazyRandomTypeCollection &InputFile::types() {
370 return getOrCreateTypeCollection(kTypes);
373 codeview::LazyRandomTypeCollection &InputFile::ids() {
374 // Object files have only one type stream that contains both types and ids.
375 // Similarly, some PDBs don't contain an IPI stream, and for those both types
376 // and IDs are in the same stream.
377 if (isObj() || !pdb().hasPDBIpiStream())
380 return getOrCreateTypeCollection(kIds);
383 iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
384 return make_range<SymbolGroupIterator>(symbol_groups_begin(),
385 symbol_groups_end());
388 SymbolGroupIterator InputFile::symbol_groups_begin() {
389 return SymbolGroupIterator(*this);
392 SymbolGroupIterator InputFile::symbol_groups_end() {
393 return SymbolGroupIterator();
396 SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
398 SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
400 SectionIter = File.obj().section_begin();
405 bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
411 if (Value.File != R.Value.File)
413 return Index == R.Index;
416 const SymbolGroup &SymbolGroupIterator::operator*() const {
420 SymbolGroup &SymbolGroupIterator::operator*() {
425 SymbolGroupIterator &SymbolGroupIterator::operator++() {
426 assert(Value.File && !isEnd());
431 if (Value.File->isPdb()) {
432 Value.updatePdbModi(Index);
440 void SymbolGroupIterator::scanToNextDebugS() {
441 assert(SectionIter.hasValue());
442 auto End = Value.File->obj().section_end();
443 auto &Iter = *SectionIter;
446 while (++Iter != End) {
447 DebugSubsectionArray SS;
448 SectionRef SR = *Iter;
449 if (!isDebugSSection(SR, SS))
452 Value.updateDebugS(SS);
457 bool SymbolGroupIterator::isEnd() const {
460 if (Value.File->isPdb()) {
461 auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
462 uint32_t Count = Dbi.modules().getModuleCount();
463 assert(Index <= Count);
464 return Index == Count;
467 assert(SectionIter.hasValue());
468 return *SectionIter == Value.File->obj().section_end();