contrib/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h

   1 //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  11 #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
  12
  13 #include "llvm/ADT/SparseBitVector.h"
  14 #include "llvm/ADT/iterator.h"
  15 #include "llvm/DebugInfo/PDB/Native/RawError.h"
  16 #include "llvm/Support/BinaryStreamReader.h"
  17 #include "llvm/Support/BinaryStreamWriter.h"
  18 #include "llvm/Support/Endian.h"
  19 #include "llvm/Support/Error.h"
  20 #include <cstdint>
  21 #include <iterator>
  22 #include <utility>
  23 #include <vector>
  24
  25 namespace llvm {
  26
  27 class BinaryStreamReader;
  28 class BinaryStreamWriter;
  29
  30 namespace pdb {
  31
  32 Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
  33 Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
  34
  35 template <typename ValueT, typename TraitsT> class HashTable;
  36
  37 template <typename ValueT, typename TraitsT>
  38 class HashTableIterator
  39     : public iterator_facade_base<HashTableIterator<ValueT, TraitsT>,
  40                                   std::forward_iterator_tag,
  41                                   std::pair<uint32_t, ValueT>> {
  42   friend HashTable<ValueT, TraitsT>;
  43
  44   HashTableIterator(const HashTable<ValueT, TraitsT> &Map, uint32_t Index,
  45                     bool IsEnd)
  46       : Map(&Map), Index(Index), IsEnd(IsEnd) {}
  47
  48 public:
  49   HashTableIterator(const HashTable<ValueT, TraitsT> &Map) : Map(&Map) {
  50     int I = Map.Present.find_first();
  51     if (I == -1) {
  52       Index = 0;
  53       IsEnd = true;
  54     } else {
  55       Index = static_cast<uint32_t>(I);
  56       IsEnd = false;
  57     }
  58   }
  59
  60   HashTableIterator &operator=(const HashTableIterator &R) {
  61     Map = R.Map;
  62     return *this;
  63   }
  64   bool operator==(const HashTableIterator &R) const {
  65     if (IsEnd && R.IsEnd)
  66       return true;
  67     if (IsEnd != R.IsEnd)
  68       return false;
  69
  70     return (Map == R.Map) && (Index == R.Index);
  71   }
  72   const std::pair<uint32_t, ValueT> &operator*() const {
  73     assert(Map->Present.test(Index));
  74     return Map->Buckets[Index];
  75   }
  76   HashTableIterator &operator++() {
  77     while (Index < Map->Buckets.size()) {
  78       ++Index;
  79       if (Map->Present.test(Index))
  80         return *this;
  81     }
  82
  83     IsEnd = true;
  84     return *this;
  85   }
  86
  87 private:
  88   bool isEnd() const { return IsEnd; }
  89   uint32_t index() const { return Index; }
  90
  91   const HashTable<ValueT, TraitsT> *Map;
  92   uint32_t Index;
  93   bool IsEnd;
  94 };
  95
  96 template <typename T> struct PdbHashTraits {};
  97
  98 template <> struct PdbHashTraits<uint32_t> {
  99   uint32_t hashLookupKey(uint32_t N) const { return N; }
 100   uint32_t storageKeyToLookupKey(uint32_t N) const { return N; }
 101   uint32_t lookupKeyToStorageKey(uint32_t N) { return N; }
 102 };
 103
 104 template <typename ValueT, typename TraitsT = PdbHashTraits<ValueT>>
 105 class HashTable {
 106   using iterator = HashTableIterator<ValueT, TraitsT>;
 107   friend iterator;
 108
 109   struct Header {
 110     support::ulittle32_t Size;
 111     support::ulittle32_t Capacity;
 112   };
 113
 114   using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
 115
 116 public:
 117   HashTable() { Buckets.resize(8); }
 118
 119   explicit HashTable(TraitsT Traits) : HashTable(8, std::move(Traits)) {}
 120   HashTable(uint32_t Capacity, TraitsT Traits) : Traits(Traits) {
 121     Buckets.resize(Capacity);
 122   }
 123
 124   Error load(BinaryStreamReader &Stream) {
 125     const Header *H;
 126     if (auto EC = Stream.readObject(H))
 127       return EC;
 128     if (H->Capacity == 0)
 129       return make_error<RawError>(raw_error_code::corrupt_file,
 130                                   "Invalid Hash Table Capacity");
 131     if (H->Size > maxLoad(H->Capacity))
 132       return make_error<RawError>(raw_error_code::corrupt_file,
 133                                   "Invalid Hash Table Size");
 134
 135     Buckets.resize(H->Capacity);
 136
 137     if (auto EC = readSparseBitVector(Stream, Present))
 138       return EC;
 139     if (Present.count() != H->Size)
 140       return make_error<RawError>(raw_error_code::corrupt_file,
 141                                   "Present bit vector does not match size!");
 142
 143     if (auto EC = readSparseBitVector(Stream, Deleted))
 144       return EC;
 145     if (Present.intersects(Deleted))
 146       return make_error<RawError>(raw_error_code::corrupt_file,
 147                                   "Present bit vector interesects deleted!");
 148
 149     for (uint32_t P : Present) {
 150       if (auto EC = Stream.readInteger(Buckets[P].first))
 151         return EC;
 152       const ValueT *Value;
 153       if (auto EC = Stream.readObject(Value))
 154         return EC;
 155       Buckets[P].second = *Value;
 156     }
 157
 158     return Error::success();
 159   }
 160
 161   uint32_t calculateSerializedLength() const {
 162     uint32_t Size = sizeof(Header);
 163
 164     constexpr int BitsPerWord = 8 * sizeof(uint32_t);
 165
 166     int NumBitsP = Present.find_last() + 1;
 167     int NumBitsD = Deleted.find_last() + 1;
 168
 169     uint32_t NumWordsP = alignTo(NumBitsP, BitsPerWord) / BitsPerWord;
 170     uint32_t NumWordsD = alignTo(NumBitsD, BitsPerWord) / BitsPerWord;
 171
 172     // Present bit set number of words (4 bytes), followed by that many actual
 173     // words (4 bytes each).
 174     Size += sizeof(uint32_t);
 175     Size += NumWordsP * sizeof(uint32_t);
 176
 177     // Deleted bit set number of words (4 bytes), followed by that many actual
 178     // words (4 bytes each).
 179     Size += sizeof(uint32_t);
 180     Size += NumWordsD * sizeof(uint32_t);
 181
 182     // One (Key, ValueT) pair for each entry Present.
 183     Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
 184
 185     return Size;
 186   }
 187
 188   Error commit(BinaryStreamWriter &Writer) const {
 189     Header H;
 190     H.Size = size();
 191     H.Capacity = capacity();
 192     if (auto EC = Writer.writeObject(H))
 193       return EC;
 194
 195     if (auto EC = writeSparseBitVector(Writer, Present))
 196       return EC;
 197
 198     if (auto EC = writeSparseBitVector(Writer, Deleted))
 199       return EC;
 200
 201     for (const auto &Entry : *this) {
 202       if (auto EC = Writer.writeInteger(Entry.first))
 203         return EC;
 204       if (auto EC = Writer.writeObject(Entry.second))
 205         return EC;
 206     }
 207     return Error::success();
 208   }
 209
 210   void clear() {
 211     Buckets.resize(8);
 212     Present.clear();
 213     Deleted.clear();
 214   }
 215
 216   bool empty() const { return size() == 0; }
 217   uint32_t capacity() const { return Buckets.size(); }
 218   uint32_t size() const { return Present.count(); }
 219
 220   iterator begin() const { return iterator(*this); }
 221   iterator end() const { return iterator(*this, 0, true); }
 222
 223   /// Find the entry whose key has the specified hash value, using the specified
 224   /// traits defining hash function and equality.
 225   template <typename Key> iterator find_as(const Key &K) const {
 226     uint32_t H = Traits.hashLookupKey(K) % capacity();
 227     uint32_t I = H;
 228     Optional<uint32_t> FirstUnused;
 229     do {
 230       if (isPresent(I)) {
 231         if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
 232           return iterator(*this, I, false);
 233       } else {
 234         if (!FirstUnused)
 235           FirstUnused = I;
 236         // Insertion occurs via linear probing from the slot hint, and will be
 237         // inserted at the first empty / deleted location.  Therefore, if we are
 238         // probing and find a location that is neither present nor deleted, then
 239         // nothing must have EVER been inserted at this location, and thus it is
 240         // not possible for a matching value to occur later.
 241         if (!isDeleted(I))
 242           break;
 243       }
 244       I = (I + 1) % capacity();
 245     } while (I != H);
 246
 247     // The only way FirstUnused would not be set is if every single entry in the
 248     // table were Present.  But this would violate the load factor constraints
 249     // that we impose, so it should never happen.
 250     assert(FirstUnused);
 251     return iterator(*this, *FirstUnused, true);
 252   }
 253
 254   /// Set the entry using a key type that the specified Traits can convert
 255   /// from a real key to an internal key.
 256   template <typename Key> bool set_as(const Key &K, ValueT V) {
 257     return set_as_internal(K, std::move(V), None);
 258   }
 259
 260   template <typename Key> ValueT get(const Key &K) const {
 261     auto Iter = find_as(K);
 262     assert(Iter != end());
 263     return (*Iter).second;
 264   }
 265
 266 protected:
 267   bool isPresent(uint32_t K) const { return Present.test(K); }
 268   bool isDeleted(uint32_t K) const { return Deleted.test(K); }
 269
 270   TraitsT Traits;
 271   BucketList Buckets;
 272   mutable SparseBitVector<> Present;
 273   mutable SparseBitVector<> Deleted;
 274
 275 private:
 276   /// Set the entry using a key type that the specified Traits can convert
 277   /// from a real key to an internal key.
 278   template <typename Key>
 279   bool set_as_internal(const Key &K, ValueT V, Optional<uint32_t> InternalKey) {
 280     auto Entry = find_as(K);
 281     if (Entry != end()) {
 282       assert(isPresent(Entry.index()));
 283       assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
 284       // We're updating, no need to do anything special.
 285       Buckets[Entry.index()].second = V;
 286       return false;
 287     }
 288
 289     auto &B = Buckets[Entry.index()];
 290     assert(!isPresent(Entry.index()));
 291     assert(Entry.isEnd());
 292     B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
 293     B.second = V;
 294     Present.set(Entry.index());
 295     Deleted.reset(Entry.index());
 296
 297     grow();
 298
 299     assert((find_as(K)) != end());
 300     return true;
 301   }
 302
 303   static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; }
 304
 305   void grow() {
 306     uint32_t S = size();
 307     uint32_t MaxLoad = maxLoad(capacity());
 308     if (S < maxLoad(capacity()))
 309       return;
 310     assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
 311
 312     uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX;
 313
 314     // Growing requires rebuilding the table and re-hashing every item.  Make a
 315     // copy with a larger capacity, insert everything into the copy, then swap
 316     // it in.
 317     HashTable NewMap(NewCapacity, Traits);
 318     for (auto I : Present) {
 319       auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
 320       NewMap.set_as_internal(LookupKey, Buckets[I].second, Buckets[I].first);
 321     }
 322
 323     Buckets.swap(NewMap.Buckets);
 324     std::swap(Present, NewMap.Present);
 325     std::swap(Deleted, NewMap.Deleted);
 326     assert(capacity() == NewCapacity);
 327     assert(size() == S);
 328   }
 329 };
 330
 331 } // end namespace pdb
 332
 333 } // end namespace llvm
 334
 335 #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H