1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
12 #include "llvm/ADT/DenseSet.h"
13 #include "llvm/DebugInfo/CodeView/RecordName.h"
14 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
15 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
16 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
17 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
18 #include "llvm/DebugInfo/MSF/MSFCommon.h"
19 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
20 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
21 #include "llvm/DebugInfo/PDB/Native/Hash.h"
22 #include "llvm/Support/BinaryItemStream.h"
23 #include "llvm/Support/BinaryStreamWriter.h"
24 #include "llvm/Support/xxhash.h"
29 using namespace llvm::msf;
30 using namespace llvm::pdb;
31 using namespace llvm::codeview;
33 struct llvm::pdb::GSIHashStreamBuilder {
34 struct UdtDenseMapInfo {
35 static inline CVSymbol getEmptyKey() {
36 static CVSymbol Empty;
39 static inline CVSymbol getTombstoneKey() {
40 static CVSymbol Tombstone(static_cast<SymbolKind>(-1),
44 static unsigned getHashValue(const CVSymbol &Val) {
45 return xxHash64(Val.RecordData);
47 static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
48 return LHS.RecordData == RHS.RecordData;
52 std::vector<CVSymbol> Records;
54 llvm::DenseSet<CVSymbol, UdtDenseMapInfo> UdtHashes;
55 std::vector<PSHashRecord> HashRecords;
56 std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
57 std::vector<support::ulittle32_t> HashBuckets;
59 uint32_t calculateSerializedLength() const;
60 uint32_t calculateRecordByteSize() const;
61 Error commit(BinaryStreamWriter &Writer);
62 void finalizeBuckets(uint32_t RecordZeroOffset);
64 template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
66 addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
67 CodeViewContainer::Pdb));
69 void addSymbol(const CVSymbol &Symbol) {
70 if (Symbol.kind() == S_UDT) {
71 auto Iter = UdtHashes.insert(Symbol);
76 Records.push_back(Symbol);
80 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
81 uint32_t Size = sizeof(GSIHashHeader);
82 Size += HashRecords.size() * sizeof(PSHashRecord);
83 Size += HashBitmap.size() * sizeof(uint32_t);
84 Size += HashBuckets.size() * sizeof(uint32_t);
88 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
90 for (const auto &Sym : Records)
95 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
97 Header.VerSignature = GSIHashHeader::HdrSignature;
98 Header.VerHdr = GSIHashHeader::HdrVersion;
99 Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
100 Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
102 if (auto EC = Writer.writeObject(Header))
105 if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
107 if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
109 if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
111 return Error::success();
114 static bool isAsciiString(StringRef S) {
115 return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
118 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
119 static bool gsiRecordLess(StringRef S1, StringRef S2) {
120 size_t LS = S1.size();
121 size_t RS = S2.size();
122 // Shorter strings always compare less than longer strings.
126 // If either string contains non ascii characters, memcmp them.
127 if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
128 return memcmp(S1.data(), S2.data(), LS) < 0;
130 // Both strings are ascii, perform a case-insenstive comparison.
131 return S1.compare_lower(S2.data()) < 0;
134 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
135 std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
137 uint32_t SymOffset = RecordZeroOffset;
138 for (const CVSymbol &Sym : Records) {
140 // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
141 HR.Off = SymOffset + 1;
142 HR.CRef = 1; // Always use a refcount of 1.
144 // Hash the name to figure out which bucket this goes into.
145 StringRef Name = getSymbolName(Sym);
146 size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
147 TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
148 SymOffset += Sym.length();
151 // Compute the three tables: the hash records in bucket and chain order, the
152 // bucket presence bitmap, and the bucket chain start offsets.
153 HashRecords.reserve(Records.size());
154 for (ulittle32_t &Word : HashBitmap)
156 for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
157 auto &Bucket = TmpBuckets[BucketIdx];
160 HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
162 // Calculate what the offset of the first hash record in the chain would
163 // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
164 // each record would be 12 bytes. See HROffsetCalc in gsi.h.
165 const int SizeOfHROffsetCalc = 12;
166 ulittle32_t ChainStartOff =
167 ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
168 HashBuckets.push_back(ChainStartOff);
170 // Sort each bucket by memcmp of the symbol's name. It's important that
171 // we use the same sorting algorithm as is used by the reference
172 // implementation to ensure that the search for a record within a bucket
173 // can properly early-out when it detects the record won't be found. The
174 // algorithm used here corredsponds to the function
175 // caseInsensitiveComparePchPchCchCch in the reference implementation.
176 llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
177 const std::pair<StringRef, PSHashRecord> &Right) {
178 return gsiRecordLess(Left.first, Right.first);
181 for (const auto &Entry : Bucket)
182 HashRecords.push_back(Entry.second);
186 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
187 : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
188 GSH(llvm::make_unique<GSIHashStreamBuilder>()) {}
190 GSIStreamBuilder::~GSIStreamBuilder() {}
192 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
194 Size += sizeof(PublicsStreamHeader);
195 Size += PSH->calculateSerializedLength();
196 Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
197 // FIXME: Add thunk map and section offsets for incremental linking.
202 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
203 return GSH->calculateSerializedLength();
206 Error GSIStreamBuilder::finalizeMsfLayout() {
207 // First we write public symbol records, then we write global symbol records.
208 uint32_t PSHZero = 0;
209 uint32_t GSHZero = PSH->calculateRecordByteSize();
211 PSH->finalizeBuckets(PSHZero);
212 GSH->finalizeBuckets(GSHZero);
214 Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
216 return Idx.takeError();
217 GSH->StreamIndex = *Idx;
218 Idx = Msf.addStream(calculatePublicsHashStreamSize());
220 return Idx.takeError();
221 PSH->StreamIndex = *Idx;
223 uint32_t RecordBytes =
224 GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
226 Idx = Msf.addStream(RecordBytes);
228 return Idx.takeError();
229 RecordStreamIdx = *Idx;
230 return Error::success();
233 static bool comparePubSymByAddrAndName(
234 const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
235 const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
236 if (LS.second->Segment != RS.second->Segment)
237 return LS.second->Segment < RS.second->Segment;
238 if (LS.second->Offset != RS.second->Offset)
239 return LS.second->Offset < RS.second->Offset;
241 return LS.second->Name < RS.second->Name;
244 /// Compute the address map. The address map is an array of symbol offsets
245 /// sorted so that it can be binary searched by address.
246 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
247 // Make a vector of pointers to the symbols so we can sort it by address.
248 // Also gather the symbol offsets while we're at it.
250 std::vector<PublicSym32> DeserializedPublics;
251 std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
252 std::vector<uint32_t> SymOffsets;
253 DeserializedPublics.reserve(Records.size());
254 PublicsByAddr.reserve(Records.size());
255 SymOffsets.reserve(Records.size());
257 uint32_t SymOffset = 0;
258 for (const CVSymbol &Sym : Records) {
259 assert(Sym.kind() == SymbolKind::S_PUB32);
260 DeserializedPublics.push_back(
261 cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
262 PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
263 SymOffsets.push_back(SymOffset);
264 SymOffset += Sym.length();
266 std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
267 comparePubSymByAddrAndName);
269 // Fill in the symbol offsets in the appropriate order.
270 std::vector<ulittle32_t> AddrMap;
271 AddrMap.reserve(Records.size());
272 for (auto &Sym : PublicsByAddr) {
273 ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
274 assert(Idx >= 0 && size_t(Idx) < Records.size());
275 AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
280 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const {
281 return PSH->StreamIndex;
284 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const {
285 return GSH->StreamIndex;
288 void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) {
289 PSH->addSymbol(Pub, Msf);
292 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
293 GSH->addSymbol(Sym, Msf);
296 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
297 GSH->addSymbol(Sym, Msf);
300 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
301 GSH->addSymbol(Sym, Msf);
304 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) {
308 static Error writeRecords(BinaryStreamWriter &Writer,
309 ArrayRef<CVSymbol> Records) {
310 BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
311 ItemStream.setItems(Records);
312 BinaryStreamRef RecordsRef(ItemStream);
313 return Writer.writeStreamRef(RecordsRef);
316 Error GSIStreamBuilder::commitSymbolRecordStream(
317 WritableBinaryStreamRef Stream) {
318 BinaryStreamWriter Writer(Stream);
320 // Write public symbol records first, followed by global symbol records. This
321 // must match the order that we assume in finalizeMsfLayout when computing
322 // PSHZero and GSHZero.
323 if (auto EC = writeRecords(Writer, PSH->Records))
325 if (auto EC = writeRecords(Writer, GSH->Records))
328 return Error::success();
331 Error GSIStreamBuilder::commitPublicsHashStream(
332 WritableBinaryStreamRef Stream) {
333 BinaryStreamWriter Writer(Stream);
334 PublicsStreamHeader Header;
336 // FIXME: Fill these in. They are for incremental linking.
337 Header.SymHash = PSH->calculateSerializedLength();
338 Header.AddrMap = PSH->Records.size() * 4;
339 Header.NumThunks = 0;
340 Header.SizeOfThunk = 0;
341 Header.ISectThunkTable = 0;
342 memset(Header.Padding, 0, sizeof(Header.Padding));
343 Header.OffThunkTable = 0;
344 Header.NumSections = 0;
345 if (auto EC = Writer.writeObject(Header))
348 if (auto EC = PSH->commit(Writer))
351 std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
352 if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
355 return Error::success();
358 Error GSIStreamBuilder::commitGlobalsHashStream(
359 WritableBinaryStreamRef Stream) {
360 BinaryStreamWriter Writer(Stream);
361 return GSH->commit(Writer);
364 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
365 WritableBinaryStreamRef Buffer) {
366 auto GS = WritableMappedBlockStream::createIndexedStream(
367 Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
368 auto PS = WritableMappedBlockStream::createIndexedStream(
369 Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
370 auto PRS = WritableMappedBlockStream::createIndexedStream(
371 Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
373 if (auto EC = commitSymbolRecordStream(*PRS))
375 if (auto EC = commitGlobalsHashStream(*GS))
377 if (auto EC = commitPublicsHashStream(*PS))
379 return Error::success();