1 //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/DebugInfo/PDB/Native/Hash.h"
13 #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14 #include "llvm/Support/BinaryStreamWriter.h"
15 #include "llvm/Support/Endian.h"
20 using namespace llvm::msf;
21 using namespace llvm::support;
22 using namespace llvm::support::endian;
23 using namespace llvm::pdb;
25 StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
28 uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
29 // The reference implementation doesn't include code for /src/headerblock
30 // handling, but it can only read natvis entries lld's PDB files if
31 // this hash function truncates the hash to 16 bit.
32 // PDB/include/misc.h in the reference implementation has a hashSz() function
33 // that returns an unsigned short, that seems what's being used for
35 return static_cast<uint16_t>(Table->getIdForString(S));
38 StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
39 return Table->getStringForId(Offset);
42 uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
43 return Table->insert(S);
46 uint32_t PDBStringTableBuilder::insert(StringRef S) {
47 return Strings.insert(S);
50 uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
51 return Strings.getIdForString(S);
54 StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
55 return Strings.getStringForId(Id);
58 static uint32_t computeBucketCount(uint32_t NumStrings) {
59 // This is a precomputed list of Buckets given the specified number of
60 // strings. Matching the reference algorithm exactly is not strictly
61 // necessary for correctness, but it helps when comparing LLD's PDBs with
62 // Microsoft's PDBs so as to eliminate superfluous differences.
63 // The reference implementation does (in nmt.h, NMT::grow()):
64 // unsigned StringCount = 0;
65 // unsigned BucketCount = 1;
68 // if (BucketCount * 3 / 4 < StringCount)
69 // BucketCount = BucketCount * 3 / 2 + 1;
71 // This list contains all StringCount, BucketCount pairs where BucketCount was
72 // just incremented. It ends before the first BucketCount entry where
73 // BucketCount * 3 would overflow a 32-bit unsigned int.
74 static std::map<uint32_t, uint32_t> StringsToBuckets = {
115 {13453488, 26906975},
116 {20180232, 40360463},
117 {30270348, 60540695},
118 {45405522, 90811043},
119 {68108283, 136216565},
120 {102162424, 204324848},
121 {153243637, 306487273},
122 {229865455, 459730910},
123 {344798183, 689596366},
124 {517197275, 1034394550},
125 {775795913, 1551591826},
126 {1163693870, 2327387740}};
127 auto Entry = StringsToBuckets.lower_bound(NumStrings);
128 assert(Entry != StringsToBuckets.end());
129 return Entry->second;
132 uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
133 uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
134 Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
139 uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
141 Size += sizeof(PDBStringTableHeader);
142 Size += Strings.calculateSerializedSize();
143 Size += calculateHashTableSize();
144 Size += sizeof(uint32_t); // The /names stream ends with the string count.
148 void PDBStringTableBuilder::setStrings(
149 const codeview::DebugStringTableSubsection &Strings) {
150 this->Strings = Strings;
153 Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
155 PDBStringTableHeader H;
156 H.Signature = PDBStringTableSignature;
158 H.ByteSize = Strings.calculateSerializedSize();
159 if (auto EC = Writer.writeObject(H))
161 assert(Writer.bytesRemaining() == 0);
162 return Error::success();
165 Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
166 if (auto EC = Strings.commit(Writer))
169 assert(Writer.bytesRemaining() == 0);
170 return Error::success();
173 Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
174 // Write a hash table.
175 uint32_t BucketCount = computeBucketCount(Strings.size());
176 if (auto EC = Writer.writeInteger(BucketCount))
178 std::vector<ulittle32_t> Buckets(BucketCount);
180 for (auto &Pair : Strings) {
181 StringRef S = Pair.getKey();
182 uint32_t Offset = Pair.getValue();
183 uint32_t Hash = hashStringV1(S);
185 for (uint32_t I = 0; I != BucketCount; ++I) {
186 uint32_t Slot = (Hash + I) % BucketCount;
187 if (Buckets[Slot] != 0)
189 Buckets[Slot] = Offset;
194 if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
197 assert(Writer.bytesRemaining() == 0);
198 return Error::success();
201 Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
202 if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
204 assert(Writer.bytesRemaining() == 0);
205 return Error::success();
208 Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
209 BinaryStreamWriter SectionWriter;
211 std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
212 if (auto EC = writeHeader(SectionWriter))
215 std::tie(SectionWriter, Writer) =
216 Writer.split(Strings.calculateSerializedSize());
217 if (auto EC = writeStrings(SectionWriter))
220 std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
221 if (auto EC = writeHashTable(SectionWriter))
224 std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
225 if (auto EC = writeEpilogue(SectionWriter))
228 return Error::success();