contrib/llvm-project/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h

   1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  10 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  11
  12 #include "llvm/ADT/SparseBitVector.h"
  13 #include "llvm/ADT/iterator.h"
  14 #include "llvm/DebugInfo/PDB/Native/RawError.h"
  15 #include "llvm/Support/BinaryStreamReader.h"
  16 #include "llvm/Support/BinaryStreamWriter.h"
  17 #include "llvm/Support/Endian.h"
  18 #include "llvm/Support/Error.h"
  19 #include <cstdint>
  20 #include <iterator>
  21 #include <utility>
  22 #include <vector>
  23
  24 namespace llvm {
  25
  26 class BinaryStreamReader;
  27 class BinaryStreamWriter;
  28
  29 namespace pdb {
  30
  31 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
  32 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
  33
  34 template <typename ValueT> class HashTable;
  35
  36 template <typename ValueT>
  37 class HashTableIterator
  38     : public iterator_facade_base<HashTableIterator<ValueT>,
  39                                   std::forward_iterator_tag,
  40                                   const std::pair<uint32_t, ValueT>> {
  41   friend HashTable<ValueT>;
  42
  43   HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
  44                     bool IsEnd)
  45       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
  46
  47 public:
  48   HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
  49     int I = Map.Present.find_first();
  50     if (I == -1) {
  51       Index = 0;
  52       IsEnd = true;
  53     } else {
  54       Index = static_cast<uint32_t>(I);
  55       IsEnd = false;
  56     }
  57   }
  58
  59   HashTableIterator(const HashTableIterator &R) = default;
  60   HashTableIterator &operator=(const HashTableIterator &R) {
  61     Map = R.Map;
  62     return *this;
  63   }
  64   bool operator==(const HashTableIterator &R) const {
  65     if (IsEnd && R.IsEnd)
  66       return true;
  67     if (IsEnd != R.IsEnd)
  68       return false;
  69
  70     return (Map == R.Map) && (Index == R.Index);
  71   }
  72   const std::pair<uint32_t, ValueT> &operator*() const {
  73     assert(Map->Present.test(Index));
  74     return Map->Buckets[Index];
  75   }
  76
  77   // Implement postfix op++ in terms of prefix op++ by using the superclass
  78   // implementation.
  79   using iterator_facade_base<HashTableIterator<ValueT>,
  80                              std::forward_iterator_tag,
  81                              const std::pair<uint32_t, ValueT>>::operator++;
  82   HashTableIterator &operator++() {
  83     while (Index < Map->Buckets.size()) {
  84       ++Index;
  85       if (Map->Present.test(Index))
  86         return *this;
  87     }
  88
  89     IsEnd = true;
  90     return *this;
  91   }
  92
  93 private:
  94   bool isEnd() const { return IsEnd; }
  95   uint32_t index() const { return Index; }
  96
  97   const HashTable<ValueT> *Map;
  98   uint32_t Index;
  99   bool IsEnd;
 100 };
 101
 102 template <typename ValueT>
 103 class HashTable {
 104   struct Header {
 105     support::ulittle32_t Size;
 106     support::ulittle32_t Capacity;
 107   };
 108
 109   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
 110
 111 public:
 112   using const_iterator = HashTableIterator<ValueT>;
 113   friend const_iterator;
 114
 115   HashTable() { Buckets.resize(8); }
 116   explicit HashTable(uint32_t Capacity) {
 117     Buckets.resize(Capacity);
 118   }
 119
 120   Error load(BinaryStreamReader &Stream) {
 121     const Header *H;
 122     if (auto EC = Stream.readObject(H))
 123       return EC;
 124     if (H->Capacity == 0)
 125       return make_error<RawError>(raw_error_code::corrupt_file,
 126                                   "Invalid Hash Table Capacity");
 127     if (H->Size > maxLoad(H->Capacity))
 128       return make_error<RawError>(raw_error_code::corrupt_file,
 129                                   "Invalid Hash Table Size");
 130
 131     Buckets.resize(H->Capacity);
 132
 133     if (auto EC = readSparseBitVector(Stream, Present))
 134       return EC;
 135     if (Present.count() != H->Size)
 136       return make_error<RawError>(raw_error_code::corrupt_file,
 137                                   "Present bit vector does not match size!");
 138
 139     if (auto EC = readSparseBitVector(Stream, Deleted))
 140       return EC;
 141     if (Present.intersects(Deleted))
 142       return make_error<RawError>(raw_error_code::corrupt_file,
 143                                   "Present bit vector intersects deleted!");
 144
 145     for (uint32_t P : Present) {
 146       if (auto EC = Stream.readInteger(Buckets[P].first))
 147         return EC;
 148       const ValueT *Value;
 149       if (auto EC = Stream.readObject(Value))
 150         return EC;
 151       Buckets[P].second = *Value;
 152     }
 153
 154     return Error::success();
 155   }
 156
 157   uint32_t calculateSerializedLength() const {
 158     uint32_t Size = sizeof(Header);
 159
 160     constexpr int BitsPerWord = 8 * sizeof(uint32_t);
 161
 162     int NumBitsP = Present.find_last() + 1;
 163     int NumBitsD = Deleted.find_last() + 1;
 164
 165     uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
 166     uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
 167
 168     // Present bit set number of words (4 bytes), followed by that many actual
 169     // words (4 bytes each).
 170     Size += sizeof(uint32_t);
 171     Size += NumWordsP * sizeof(uint32_t);
 172
 173     // Deleted bit set number of words (4 bytes), followed by that many actual
 174     // words (4 bytes each).
 175     Size += sizeof(uint32_t);
 176     Size += NumWordsD * sizeof(uint32_t);
 177
 178     // One (Key, ValueT) pair for each entry Present.
 179     Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
 180
 181     return Size;
 182   }
 183
 184   Error commit(BinaryStreamWriter &Writer) const {
 185     Header H;
 186     H.Size = size();
 187     H.Capacity = capacity();
 188     if (auto EC = Writer.writeObject(H))
 189       return EC;
 190
 191     if (auto EC = writeSparseBitVector(Writer, Present))
 192       return EC;
 193
 194     if (auto EC = writeSparseBitVector(Writer, Deleted))
 195       return EC;
 196
 197     for (const auto &Entry : *this) {
 198       if (auto EC = Writer.writeInteger(Entry.first))
 199         return EC;
 200       if (auto EC = Writer.writeObject(Entry.second))
 201         return EC;
 202     }
 203     return Error::success();
 204   }
 205
 206   void clear() {
 207     Buckets.resize(8);
 208     Present.clear();
 209     Deleted.clear();
 210   }
 211
 212   bool empty() const { return size() == 0; }
 213   uint32_t capacity() const { return Buckets.size(); }
 214   uint32_t size() const { return Present.count(); }
 215
 216   const_iterator begin() const { return const_iterator(*this); }
 217   const_iterator end() const { return const_iterator(*this, 0, true); }
 218
 219   /// Find the entry whose key has the specified hash value, using the specified
 220   /// traits defining hash function and equality.
 221   template <typename Key, typename TraitsT>
 222   const_iterator find_as(const Key &K, TraitsT &Traits) const {
 223     uint32_t H = Traits.hashLookupKey(K) % capacity();
 224     uint32_t I = H;
 225     Optional<uint32_t> FirstUnused;
 226     do {
 227       if (isPresent(I)) {
 228         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
 229           return const_iterator(*this, I, false);
 230       } else {
 231         if (!FirstUnused)
 232           FirstUnused = I;
 233         // Insertion occurs via linear probing from the slot hint, and will be
 234         // inserted at the first empty / deleted location.  Therefore, if we are
 235         // probing and find a location that is neither present nor deleted, then
 236         // nothing must have EVER been inserted at this location, and thus it is
 237         // not possible for a matching value to occur later.
 238         if (!isDeleted(I))
 239           break;
 240       }
 241       I = (I + 1) % capacity();
 242     } while (I != H);
 243
 244     // The only way FirstUnused would not be set is if every single entry in the
 245     // table were Present.  But this would violate the load factor constraints
 246     // that we impose, so it should never happen.
 247     assert(FirstUnused);
 248     return const_iterator(*this, *FirstUnused, true);
 249   }
 250
 251   /// Set the entry using a key type that the specified Traits can convert
 252   /// from a real key to an internal key.
 253   template <typename Key, typename TraitsT>
 254   bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
 255     return set_as_internal(K, std::move(V), Traits, None);
 256   }
 257
 258   template <typename Key, typename TraitsT>
 259   ValueT get(const Key &K, TraitsT &Traits) const {
 260     auto Iter = find_as(K, Traits);
 261     assert(Iter != end());
 262     return (*Iter).second;
 263   }
 264
 265 protected:
 266   bool isPresent(uint32_t K) const { return Present.test(K); }
 267   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
 268
 269   BucketList Buckets;
 270   mutable SparseBitVector<> Present;
 271   mutable SparseBitVector<> Deleted;
 272
 273 private:
 274   /// Set the entry using a key type that the specified Traits can convert
 275   /// from a real key to an internal key.
 276   template <typename Key, typename TraitsT>
 277   bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
 278                        Optional<uint32_t> InternalKey) {
 279     auto Entry = find_as(K, Traits);
 280     if (Entry != end()) {
 281       assert(isPresent(Entry.index()));
 282       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
 283       // We're updating, no need to do anything special.
 284       Buckets[Entry.index()].second = V;
 285       return false;
 286     }
 287
 288     auto &B = Buckets[Entry.index()];
 289     assert(!isPresent(Entry.index()));
 290     assert(Entry.isEnd());
 291     B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
 292     B.second = V;
 293     Present.set(Entry.index());
 294     Deleted.reset(Entry.index());
 295
 296     grow(Traits);
 297
 298     assert((find_as(K, Traits)) != end());
 299     return true;
 300   }
 301
 302   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
 303
 304   template <typename TraitsT>
 305   void grow(TraitsT &Traits) {
 306     uint32_t S = size();
 307     uint32_t MaxLoad = maxLoad(capacity());
 308     if (S < maxLoad(capacity()))
 309       return;
 310     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
 311
 312     uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
 313
 314     // Growing requires rebuilding the table and re-hashing every item.  Make a
 315     // copy with a larger capacity, insert everything into the copy, then swap
 316     // it in.
 317     HashTable NewMap(NewCapacity);
 318     for (auto I : Present) {
 319       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
 320       NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
 321                              Buckets[I].first);
 322     }
 323
 324     Buckets.swap(NewMap.Buckets);
 325     std::swap(Present, NewMap.Present);
 326     std::swap(Deleted, NewMap.Deleted);
 327     assert(capacity() == NewCapacity);
 328     assert(size() == S);
 329   }
 330 };
 331
 332 } // end namespace pdb
 333
 334 } // end namespace llvm
 335
 336 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H