1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/DebugInfo/MSF/MSFCommon.h"
14 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
15 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
16 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
17 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
18 #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/RawError.h"
21 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23 #include "llvm/Support/BinaryStream.h"
24 #include "llvm/Support/BinaryStreamArray.h"
25 #include "llvm/Support/BinaryStreamReader.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include "llvm/Support/Path.h"
34 using namespace llvm::codeview;
35 using namespace llvm::msf;
36 using namespace llvm::pdb;
39 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40 } // end anonymous namespace
42 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43 BumpPtrAllocator &Allocator)
44 : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
46 PDBFile::~PDBFile() = default;
48 StringRef PDBFile::getFilePath() const { return FilePath; }
50 StringRef PDBFile::getFileDirectory() const {
51 return sys::path::parent_path(FilePath);
54 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
56 uint32_t PDBFile::getFreeBlockMapBlock() const {
57 return ContainerLayout.SB->FreeBlockMapBlock;
60 uint32_t PDBFile::getBlockCount() const {
61 return ContainerLayout.SB->NumBlocks;
64 uint32_t PDBFile::getNumDirectoryBytes() const {
65 return ContainerLayout.SB->NumDirectoryBytes;
68 uint32_t PDBFile::getBlockMapIndex() const {
69 return ContainerLayout.SB->BlockMapAddr;
72 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
74 uint32_t PDBFile::getNumDirectoryBlocks() const {
75 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76 ContainerLayout.SB->BlockSize);
79 uint64_t PDBFile::getBlockMapOffset() const {
80 return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81 ContainerLayout.SB->BlockSize;
84 uint32_t PDBFile::getNumStreams() const {
85 return ContainerLayout.StreamSizes.size();
88 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
89 return ContainerLayout.StreamSizes[StreamIndex];
92 ArrayRef<support::ulittle32_t>
93 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
94 return ContainerLayout.StreamMap[StreamIndex];
97 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
99 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
100 uint32_t NumBytes) const {
101 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
103 ArrayRef<uint8_t> Result;
104 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
105 return std::move(EC);
109 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
110 ArrayRef<uint8_t> Data) const {
111 return make_error<RawError>(raw_error_code::not_writable,
112 "PDBFile is immutable");
115 Error PDBFile::parseFileHeaders() {
116 BinaryStreamReader Reader(*Buffer);
119 const msf::SuperBlock *SB = nullptr;
120 if (auto EC = Reader.readObject(SB)) {
121 consumeError(std::move(EC));
122 return make_error<RawError>(raw_error_code::corrupt_file,
123 "Does not contain superblock");
126 if (auto EC = msf::validateSuperBlock(*SB))
129 if (Buffer->getLength() % SB->BlockSize != 0)
130 return make_error<RawError>(raw_error_code::corrupt_file,
131 "File size is not a multiple of block size");
132 ContainerLayout.SB = SB;
134 // Initialize Free Page Map.
135 ContainerLayout.FreePageMap.resize(SB->NumBlocks);
136 // The Fpm exists either at block 1 or block 2 of the MSF. However, this
137 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
138 // thusly an equal number of total blocks in the file. For a block size
139 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
140 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
141 // the Fpm is split across the file at `getBlockSize()` intervals. As a
142 // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
143 // for any non-negative integer k is an Fpm block. In theory, we only really
144 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
145 // current versions of the MSF format already expect the Fpm to be arranged
146 // at getBlockSize() intervals, so we have to be compatible.
147 // See the function fpmPn() for more information:
148 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
150 MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
151 BinaryStreamReader FpmReader(*FpmStream);
152 ArrayRef<uint8_t> FpmBytes;
153 if (auto EC = FpmReader.readBytes(FpmBytes,
154 msf::getFullFpmByteSize(ContainerLayout)))
156 uint32_t BlocksRemaining = getBlockCount();
158 for (auto Byte : FpmBytes) {
159 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
160 for (uint32_t I = 0; I < BlocksThisByte; ++I) {
162 ContainerLayout.FreePageMap[BI] = true;
168 Reader.setOffset(getBlockMapOffset());
169 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
170 getNumDirectoryBlocks()))
173 return Error::success();
176 Error PDBFile::parseStreamData() {
177 assert(ContainerLayout.SB);
179 return Error::success();
181 uint32_t NumStreams = 0;
183 // Normally you can't use a MappedBlockStream without having fully parsed the
184 // PDB file, because it accesses the directory and various other things, which
185 // is exactly what we are attempting to parse. By specifying a custom
186 // subclass of IPDBStreamData which only accesses the fields that have already
187 // been parsed, we can avoid this and reuse MappedBlockStream.
188 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
190 BinaryStreamReader Reader(*DS);
191 if (auto EC = Reader.readInteger(NumStreams))
194 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
196 for (uint32_t I = 0; I < NumStreams; ++I) {
197 uint32_t StreamSize = getStreamByteSize(I);
198 // FIXME: What does StreamSize ~0U mean?
199 uint64_t NumExpectedStreamBlocks =
200 StreamSize == UINT32_MAX
202 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
204 // For convenience, we store the block array contiguously. This is because
205 // if someone calls setStreamMap(), it is more convenient to be able to call
206 // it with an ArrayRef instead of setting up a StreamRef. Since the
207 // DirectoryStream is cached in the class and thus lives for the life of the
208 // class, we can be guaranteed that readArray() will return a stable
209 // reference, even if it has to allocate from its internal pool.
210 ArrayRef<support::ulittle32_t> Blocks;
211 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
213 for (uint32_t Block : Blocks) {
214 uint64_t BlockEndOffset =
215 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
216 if (BlockEndOffset > getFileSize())
217 return make_error<RawError>(raw_error_code::corrupt_file,
218 "Stream block map is corrupt.");
220 ContainerLayout.StreamMap.push_back(Blocks);
223 // We should have read exactly SB->NumDirectoryBytes bytes.
224 assert(Reader.bytesRemaining() == 0);
225 DirectoryStream = std::move(DS);
226 return Error::success();
229 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
230 return ContainerLayout.DirectoryBlocks;
233 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
235 auto DbiS = getPDBDbiStream();
237 return DbiS.takeError();
239 auto GlobalS = safelyCreateIndexedStream(
240 ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
242 return GlobalS.takeError();
243 auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
244 if (auto EC = TempGlobals->reload())
245 return std::move(EC);
246 Globals = std::move(TempGlobals);
251 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
253 auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
255 return InfoS.takeError();
256 auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
257 if (auto EC = TempInfo->reload())
258 return std::move(EC);
259 Info = std::move(TempInfo);
264 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
266 auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
268 return DbiS.takeError();
269 auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS));
270 if (auto EC = TempDbi->reload())
271 return std::move(EC);
272 Dbi = std::move(TempDbi);
277 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
279 auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
281 return TpiS.takeError();
282 auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
283 if (auto EC = TempTpi->reload())
284 return std::move(EC);
285 Tpi = std::move(TempTpi);
290 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
292 auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
294 return IpiS.takeError();
295 auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
296 if (auto EC = TempIpi->reload())
297 return std::move(EC);
298 Ipi = std::move(TempIpi);
303 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
305 auto DbiS = getPDBDbiStream();
307 return DbiS.takeError();
309 auto PublicS = safelyCreateIndexedStream(
310 ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
312 return PublicS.takeError();
314 llvm::make_unique<PublicsStream>(*this, std::move(*PublicS));
315 if (auto EC = TempPublics->reload())
316 return std::move(EC);
317 Publics = std::move(TempPublics);
322 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
324 auto DbiS = getPDBDbiStream();
326 return DbiS.takeError();
328 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
330 safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
332 return SymbolS.takeError();
334 auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
335 if (auto EC = TempSymbols->reload())
336 return std::move(EC);
337 Symbols = std::move(TempSymbols);
342 Expected<PDBStringTable &> PDBFile::getStringTable() {
344 auto IS = getPDBInfoStream();
346 return IS.takeError();
348 uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names");
351 safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
353 return NS.takeError();
355 auto N = llvm::make_unique<PDBStringTable>();
356 BinaryStreamReader Reader(**NS);
357 if (auto EC = N->reload(Reader))
358 return std::move(EC);
359 assert(Reader.bytesRemaining() == 0);
360 StringTableStream = std::move(*NS);
361 Strings = std::move(N);
366 bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
368 bool PDBFile::hasPDBGlobalsStream() {
369 auto DbiS = getPDBDbiStream();
372 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
375 bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); }
377 bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); }
379 bool PDBFile::hasPDBPublicsStream() {
380 auto DbiS = getPDBDbiStream();
383 return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
386 bool PDBFile::hasPDBSymbolStream() {
387 auto DbiS = getPDBDbiStream();
390 return DbiS->getSymRecordStreamIndex() < getNumStreams();
393 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
395 bool PDBFile::hasPDBStringTable() {
396 auto IS = getPDBInfoStream();
399 return IS->getNamedStreamIndex("/names") < getNumStreams();
402 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
403 /// stream with that index actually exists. If it does not, the return value
404 /// will have an MSFError with code msf_error_code::no_stream. Else, the return
405 /// value will contain the stream returned by createIndexedStream().
406 Expected<std::unique_ptr<MappedBlockStream>>
407 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
408 BinaryStreamRef MsfData,
409 uint32_t StreamIndex) const {
410 if (StreamIndex >= getNumStreams())
411 return make_error<RawError>(raw_error_code::no_stream);
412 return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,