1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
12 #include "llvm/ADT/SparseBitVector.h"
13 #include "llvm/ADT/iterator.h"
14 #include "llvm/DebugInfo/PDB/Native/RawError.h"
15 #include "llvm/Support/BinaryStreamReader.h"
16 #include "llvm/Support/BinaryStreamWriter.h"
17 #include "llvm/Support/Endian.h"
18 #include "llvm/Support/Error.h"
26 class BinaryStreamReader;
27 class BinaryStreamWriter;
31 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
32 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
34 template <typename ValueT> class HashTable;
36 template <typename ValueT>
37 class HashTableIterator
38 : public iterator_facade_base<HashTableIterator<ValueT>,
39 std::forward_iterator_tag,
40 const std::pair<uint32_t, ValueT>> {
41 friend HashTable<ValueT>;
43 HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
45 : Map(&Map), Index(Index), IsEnd(IsEnd) {}
48 HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
49 int I = Map.Present.find_first();
54 Index = static_cast<uint32_t>(I);
59 HashTableIterator(const HashTableIterator &R) = default;
60 HashTableIterator &operator=(const HashTableIterator &R) {
64 bool operator==(const HashTableIterator &R) const {
70 return (Map == R.Map) && (Index == R.Index);
72 const std::pair<uint32_t, ValueT> &operator*() const {
73 assert(Map->Present.test(Index));
74 return Map->Buckets[Index];
77 // Implement postfix op++ in terms of prefix op++ by using the superclass
79 using iterator_facade_base<HashTableIterator<ValueT>,
80 std::forward_iterator_tag,
81 const std::pair<uint32_t, ValueT>>::operator++;
82 HashTableIterator &operator++() {
83 while (Index < Map->Buckets.size()) {
85 if (Map->Present.test(Index))
94 bool isEnd() const { return IsEnd; }
95 uint32_t index() const { return Index; }
97 const HashTable<ValueT> *Map;
102 template <typename ValueT>
105 support::ulittle32_t Size;
106 support::ulittle32_t Capacity;
109 using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
112 using const_iterator = HashTableIterator<ValueT>;
113 friend const_iterator;
115 HashTable() { Buckets.resize(8); }
116 explicit HashTable(uint32_t Capacity) {
117 Buckets.resize(Capacity);
120 Error load(BinaryStreamReader &Stream) {
122 if (auto EC = Stream.readObject(H))
124 if (H->Capacity == 0)
125 return make_error<RawError>(raw_error_code::corrupt_file,
126 "Invalid Hash Table Capacity");
127 if (H->Size > maxLoad(H->Capacity))
128 return make_error<RawError>(raw_error_code::corrupt_file,
129 "Invalid Hash Table Size");
131 Buckets.resize(H->Capacity);
133 if (auto EC = readSparseBitVector(Stream, Present))
135 if (Present.count() != H->Size)
136 return make_error<RawError>(raw_error_code::corrupt_file,
137 "Present bit vector does not match size!");
139 if (auto EC = readSparseBitVector(Stream, Deleted))
141 if (Present.intersects(Deleted))
142 return make_error<RawError>(raw_error_code::corrupt_file,
143 "Present bit vector intersects deleted!");
145 for (uint32_t P : Present) {
146 if (auto EC = Stream.readInteger(Buckets[P].first))
149 if (auto EC = Stream.readObject(Value))
151 Buckets[P].second = *Value;
154 return Error::success();
157 uint32_t calculateSerializedLength() const {
158 uint32_t Size = sizeof(Header);
160 constexpr int BitsPerWord = 8 * sizeof(uint32_t);
162 int NumBitsP = Present.find_last() + 1;
163 int NumBitsD = Deleted.find_last() + 1;
165 uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
166 uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
168 // Present bit set number of words (4 bytes), followed by that many actual
169 // words (4 bytes each).
170 Size += sizeof(uint32_t);
171 Size += NumWordsP * sizeof(uint32_t);
173 // Deleted bit set number of words (4 bytes), followed by that many actual
174 // words (4 bytes each).
175 Size += sizeof(uint32_t);
176 Size += NumWordsD * sizeof(uint32_t);
178 // One (Key, ValueT) pair for each entry Present.
179 Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
184 Error commit(BinaryStreamWriter &Writer) const {
187 H.Capacity = capacity();
188 if (auto EC = Writer.writeObject(H))
191 if (auto EC = writeSparseBitVector(Writer, Present))
194 if (auto EC = writeSparseBitVector(Writer, Deleted))
197 for (const auto &Entry : *this) {
198 if (auto EC = Writer.writeInteger(Entry.first))
200 if (auto EC = Writer.writeObject(Entry.second))
203 return Error::success();
212 bool empty() const { return size() == 0; }
213 uint32_t capacity() const { return Buckets.size(); }
214 uint32_t size() const { return Present.count(); }
216 const_iterator begin() const { return const_iterator(*this); }
217 const_iterator end() const { return const_iterator(*this, 0, true); }
219 /// Find the entry whose key has the specified hash value, using the specified
220 /// traits defining hash function and equality.
221 template <typename Key, typename TraitsT>
222 const_iterator find_as(const Key &K, TraitsT &Traits) const {
223 uint32_t H = Traits.hashLookupKey(K) % capacity();
225 Optional<uint32_t> FirstUnused;
228 if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
229 return const_iterator(*this, I, false);
233 // Insertion occurs via linear probing from the slot hint, and will be
234 // inserted at the first empty / deleted location. Therefore, if we are
235 // probing and find a location that is neither present nor deleted, then
236 // nothing must have EVER been inserted at this location, and thus it is
237 // not possible for a matching value to occur later.
241 I = (I + 1) % capacity();
244 // The only way FirstUnused would not be set is if every single entry in the
245 // table were Present. But this would violate the load factor constraints
246 // that we impose, so it should never happen.
248 return const_iterator(*this, *FirstUnused, true);
251 /// Set the entry using a key type that the specified Traits can convert
252 /// from a real key to an internal key.
253 template <typename Key, typename TraitsT>
254 bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
255 return set_as_internal(K, std::move(V), Traits, None);
258 template <typename Key, typename TraitsT>
259 ValueT get(const Key &K, TraitsT &Traits) const {
260 auto Iter = find_as(K, Traits);
261 assert(Iter != end());
262 return (*Iter).second;
266 bool isPresent(uint32_t K) const { return Present.test(K); }
267 bool isDeleted(uint32_t K) const { return Deleted.test(K); }
270 mutable SparseBitVector<> Present;
271 mutable SparseBitVector<> Deleted;
274 /// Set the entry using a key type that the specified Traits can convert
275 /// from a real key to an internal key.
276 template <typename Key, typename TraitsT>
277 bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
278 Optional<uint32_t> InternalKey) {
279 auto Entry = find_as(K, Traits);
280 if (Entry != end()) {
281 assert(isPresent(Entry.index()));
282 assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
283 // We're updating, no need to do anything special.
284 Buckets[Entry.index()].second = V;
288 auto &B = Buckets[Entry.index()];
289 assert(!isPresent(Entry.index()));
290 assert(Entry.isEnd());
291 B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
293 Present.set(Entry.index());
294 Deleted.reset(Entry.index());
298 assert((find_as(K, Traits)) != end());
302 static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
304 template <typename TraitsT>
305 void grow(TraitsT &Traits) {
307 uint32_t MaxLoad = maxLoad(capacity());
308 if (S < maxLoad(capacity()))
310 assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
312 uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
314 // Growing requires rebuilding the table and re-hashing every item. Make a
315 // copy with a larger capacity, insert everything into the copy, then swap
317 HashTable NewMap(NewCapacity);
318 for (auto I : Present) {
319 auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
320 NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
324 Buckets.swap(NewMap.Buckets);
325 std::swap(Present, NewMap.Present);
326 std::swap(Deleted, NewMap.Deleted);
327 assert(capacity() == NewCapacity);
332 } // end namespace pdb
334 } // end namespace llvm
336 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H