contrib/llvm/tools/lldb/include/lldb/Core/MappedHash.h

   1 //===-- MappedHash.h --------------------------------------------*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #ifndef liblldb_MappedHash_h_
  11 #define liblldb_MappedHash_h_
  12
  13 // C Includes
  14 #include <assert.h>
  15 #include <stdint.h>
  16
  17 // C++ Includes
  18 #include <algorithm>
  19 #include <functional>
  20 #include <map>
  21 #include <vector>
  22
  23 // Other libraries and framework includes
  24 // Project includes
  25 #include "lldb/Core/DataExtractor.h"
  26 #include "lldb/Core/Stream.h"
  27
  28 class MappedHash {
  29 public:
  30   enum HashFunctionType {
  31     eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used
  32                           // by the ELF GNU_HASH sections
  33   };
  34
  35   static uint32_t HashStringUsingDJB(const char *s) {
  36     uint32_t h = 5381;
  37
  38     for (unsigned char c = *s; c; c = *++s)
  39       h = ((h << 5) + h) + c;
  40
  41     return h;
  42   }
  43
  44   static uint32_t HashString(uint32_t hash_function, const char *s) {
  45     if (!s)
  46       return 0;
  47
  48     switch (hash_function) {
  49     case MappedHash::eHashFunctionDJB:
  50       return HashStringUsingDJB(s);
  51
  52     default:
  53       break;
  54     }
  55     llvm_unreachable("Invalid hash function index");
  56   }
  57
  58   static const uint32_t HASH_MAGIC = 0x48415348u;
  59   static const uint32_t HASH_CIGAM = 0x48534148u;
  60
  61   template <typename T> struct Header {
  62     typedef T HeaderData;
  63
  64     uint32_t
  65         magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
  66     uint16_t version;         // Version number
  67     uint16_t hash_function;   // The hash function enumeration that was used
  68     uint32_t bucket_count;    // The number of buckets in this hash table
  69     uint32_t hashes_count;    // The total number of unique hash values and hash
  70                               // data offsets in this table
  71     uint32_t header_data_len; // The size in bytes of the "header_data" template
  72                               // member below
  73     HeaderData header_data;   //
  74
  75     Header()
  76         : magic(HASH_MAGIC), version(1), hash_function(eHashFunctionDJB),
  77           bucket_count(0), hashes_count(0), header_data_len(sizeof(T)),
  78           header_data() {}
  79
  80     virtual ~Header() = default;
  81
  82     size_t GetByteSize() const {
  83       return sizeof(magic) + sizeof(version) + sizeof(hash_function) +
  84              sizeof(bucket_count) + sizeof(hashes_count) +
  85              sizeof(header_data_len) + header_data_len;
  86     }
  87
  88     virtual size_t GetByteSize(const HeaderData &header_data) = 0;
  89
  90     void SetHeaderDataByteSize(uint32_t header_data_byte_size) {
  91       header_data_len = header_data_byte_size;
  92     }
  93
  94     void Dump(lldb_private::Stream &s) {
  95       s.Printf("header.magic              = 0x%8.8x\n", magic);
  96       s.Printf("header.version            = 0x%4.4x\n", version);
  97       s.Printf("header.hash_function      = 0x%4.4x\n", hash_function);
  98       s.Printf("header.bucket_count       = 0x%8.8x %u\n", bucket_count,
  99                bucket_count);
 100       s.Printf("header.hashes_count       = 0x%8.8x %u\n", hashes_count,
 101                hashes_count);
 102       s.Printf("header.header_data_len    = 0x%8.8x %u\n", header_data_len,
 103                header_data_len);
 104     }
 105
 106     virtual lldb::offset_t Read(lldb_private::DataExtractor &data,
 107                                 lldb::offset_t offset) {
 108       if (data.ValidOffsetForDataOfSize(
 109               offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) +
 110                           sizeof(bucket_count) + sizeof(hashes_count) +
 111                           sizeof(header_data_len))) {
 112         magic = data.GetU32(&offset);
 113         if (magic != HASH_MAGIC) {
 114           if (magic == HASH_CIGAM) {
 115             switch (data.GetByteOrder()) {
 116             case lldb::eByteOrderBig:
 117               data.SetByteOrder(lldb::eByteOrderLittle);
 118               break;
 119             case lldb::eByteOrderLittle:
 120               data.SetByteOrder(lldb::eByteOrderBig);
 121               break;
 122             default:
 123               return LLDB_INVALID_OFFSET;
 124             }
 125           } else {
 126             // Magic bytes didn't match
 127             version = 0;
 128             return LLDB_INVALID_OFFSET;
 129           }
 130         }
 131
 132         version = data.GetU16(&offset);
 133         if (version != 1) {
 134           // Unsupported version
 135           return LLDB_INVALID_OFFSET;
 136         }
 137         hash_function = data.GetU16(&offset);
 138         if (hash_function == 4)
 139           hash_function = 0; // Deal with pre-release version of this table...
 140         bucket_count = data.GetU32(&offset);
 141         hashes_count = data.GetU32(&offset);
 142         header_data_len = data.GetU32(&offset);
 143         return offset;
 144       }
 145       return LLDB_INVALID_OFFSET;
 146     }
 147     //
 148     //        // Returns a buffer that contains a serialized version of this
 149     //        table
 150     //        // that must be freed with free().
 151     //        virtual void *
 152     //        Write (int fd);
 153   };
 154
 155   template <typename __KeyType, class __HeaderDataType, class __ValueType>
 156   class ExportTable {
 157   public:
 158     typedef __HeaderDataType HeaderDataType;
 159     typedef Header<HeaderDataType> HeaderType;
 160     typedef __KeyType KeyType;
 161     typedef __ValueType ValueType;
 162
 163     struct Entry {
 164       uint32_t hash;
 165       KeyType key;
 166       ValueType value;
 167     };
 168
 169     typedef std::vector<ValueType> ValueArrayType;
 170
 171     typedef std::map<KeyType, ValueArrayType> HashData;
 172     // Map a name hash to one or more name infos
 173     typedef std::map<uint32_t, HashData> HashToHashData;
 174
 175     virtual KeyType GetKeyForStringType(const char *cstr) const = 0;
 176
 177     virtual size_t GetByteSize(const HashData &key_to_key_values) = 0;
 178
 179     virtual bool WriteHashData(const HashData &hash_data,
 180                                lldb_private::Stream &ostrm) = 0;
 181     //
 182     void AddEntry(const char *cstr, const ValueType &value) {
 183       Entry entry;
 184       entry.hash = MappedHash::HashString(eHashFunctionDJB, cstr);
 185       entry.key = GetKeyForStringType(cstr);
 186       entry.value = value;
 187       m_entries.push_back(entry);
 188     }
 189
 190     void Save(const HeaderDataType &header_data, lldb_private::Stream &ostrm) {
 191       if (m_entries.empty())
 192         return;
 193
 194       const uint32_t num_entries = m_entries.size();
 195       uint32_t i = 0;
 196
 197       HeaderType header;
 198
 199       header.magic = HASH_MAGIC;
 200       header.version = 1;
 201       header.hash_function = eHashFunctionDJB;
 202       header.bucket_count = 0;
 203       header.hashes_count = 0;
 204       header.prologue_length = header_data.GetByteSize();
 205
 206       // We need to figure out the number of unique hashes first before we can
 207       // calculate the number of buckets we want to use.
 208       typedef std::vector<uint32_t> hash_coll;
 209       hash_coll unique_hashes;
 210       unique_hashes.resize(num_entries);
 211       for (i = 0; i < num_entries; ++i)
 212         unique_hashes[i] = m_entries[i].hash;
 213       std::sort(unique_hashes.begin(), unique_hashes.end());
 214       hash_coll::iterator pos =
 215           std::unique(unique_hashes.begin(), unique_hashes.end());
 216       const size_t num_unique_hashes =
 217           std::distance(unique_hashes.begin(), pos);
 218
 219       if (num_unique_hashes > 1024)
 220         header.bucket_count = num_unique_hashes / 4;
 221       else if (num_unique_hashes > 16)
 222         header.bucket_count = num_unique_hashes / 2;
 223       else
 224         header.bucket_count = num_unique_hashes;
 225       if (header.bucket_count == 0)
 226         header.bucket_count = 1;
 227
 228       std::vector<HashToHashData> hash_buckets;
 229       std::vector<uint32_t> hash_indexes(header.bucket_count, 0);
 230       std::vector<uint32_t> hash_values;
 231       std::vector<uint32_t> hash_offsets;
 232       hash_buckets.resize(header.bucket_count);
 233       uint32_t bucket_entry_empties = 0;
 234       // StreamString hash_file_data(Stream::eBinary,
 235       // dwarf->GetObjectFile()->GetAddressByteSize(),
 236       // dwarf->GetObjectFile()->GetByteSize());
 237
 238       // Push all of the hashes into their buckets and create all bucket
 239       // entries all populated with data.
 240       for (i = 0; i < num_entries; ++i) {
 241         const uint32_t hash = m_entries[i].hash;
 242         const uint32_t bucket_idx = hash % header.bucket_count;
 243         const uint32_t strp_offset = m_entries[i].str_offset;
 244         const uint32_t die_offset = m_entries[i].die_offset;
 245         hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset);
 246       }
 247
 248       // Now for each bucket we write the bucket value which is the
 249       // number of hashes and the hash index encoded into a single
 250       // 32 bit unsigned integer.
 251       for (i = 0; i < header.bucket_count; ++i) {
 252         HashToHashData &bucket_entry = hash_buckets[i];
 253
 254         if (bucket_entry.empty()) {
 255           // Empty bucket
 256           ++bucket_entry_empties;
 257           hash_indexes[i] = UINT32_MAX;
 258         } else {
 259           const uint32_t hash_value_index = hash_values.size();
 260           uint32_t hash_count = 0;
 261           typename HashToHashData::const_iterator pos, end = bucket_entry.end();
 262           for (pos = bucket_entry.begin(); pos != end; ++pos) {
 263             hash_values.push_back(pos->first);
 264             hash_offsets.push_back(GetByteSize(pos->second));
 265             ++hash_count;
 266           }
 267
 268           hash_indexes[i] = hash_value_index;
 269         }
 270       }
 271       header.hashes_count = hash_values.size();
 272
 273       // Write the header out now that we have the hash_count
 274       header.Write(ostrm);
 275
 276       // Now for each bucket we write the start index of the hashes
 277       // for the current bucket, or UINT32_MAX if the bucket is empty
 278       for (i = 0; i < header.bucket_count; ++i) {
 279         ostrm.PutHex32(hash_indexes[i]);
 280       }
 281
 282       // Now we need to write out all of the hash values
 283       for (i = 0; i < header.hashes_count; ++i) {
 284         ostrm.PutHex32(hash_values[i]);
 285       }
 286
 287       // Now we need to write out all of the hash data offsets,
 288       // there is an offset for each hash in the hashes array
 289       // that was written out above
 290       for (i = 0; i < header.hashes_count; ++i) {
 291         ostrm.PutHex32(hash_offsets[i]);
 292       }
 293
 294       // Now we write the data for each hash and verify we got the offset
 295       // correct above...
 296       for (i = 0; i < header.bucket_count; ++i) {
 297         HashToHashData &bucket_entry = hash_buckets[i];
 298
 299         typename HashToHashData::const_iterator pos, end = bucket_entry.end();
 300         for (pos = bucket_entry.begin(); pos != end; ++pos) {
 301           if (!bucket_entry.empty()) {
 302             WriteHashData(pos->second);
 303           }
 304         }
 305       }
 306     }
 307
 308   protected:
 309     typedef std::vector<Entry> collection;
 310     collection m_entries;
 311   };
 312
 313   // A class for reading and using a saved hash table from a block of data
 314   // in memory
 315   template <typename __KeyType, class __HeaderType, class __HashData>
 316   class MemoryTable {
 317   public:
 318     typedef __HeaderType HeaderType;
 319     typedef __KeyType KeyType;
 320     typedef __HashData HashData;
 321
 322     enum Result {
 323       eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was
 324                             // filled in successfully
 325       eResultKeyMismatch =
 326           1u, // Bucket hash data collision, but key didn't match
 327       eResultEndOfHashData = 2u, // The chain of items for this hash data in
 328                                  // this bucket is terminated, search no more
 329       eResultError = 3u          // Error parsing the hash data, abort
 330     };
 331
 332     struct Pair {
 333       KeyType key;
 334       HashData value;
 335     };
 336
 337     MemoryTable(lldb_private::DataExtractor &data)
 338         : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr),
 339           m_hash_offsets(nullptr) {
 340       lldb::offset_t offset = m_header.Read(data, 0);
 341       if (offset != LLDB_INVALID_OFFSET && IsValid()) {
 342         m_hash_indexes = (const uint32_t *)data.GetData(
 343             &offset, m_header.bucket_count * sizeof(uint32_t));
 344         m_hash_values = (const uint32_t *)data.GetData(
 345             &offset, m_header.hashes_count * sizeof(uint32_t));
 346         m_hash_offsets = (const uint32_t *)data.GetData(
 347             &offset, m_header.hashes_count * sizeof(uint32_t));
 348       }
 349     }
 350
 351     virtual ~MemoryTable() = default;
 352
 353     bool IsValid() const {
 354       return m_header.version == 1 &&
 355              m_header.hash_function == eHashFunctionDJB &&
 356              m_header.bucket_count > 0 && m_header.hashes_count > 0;
 357     }
 358
 359     uint32_t GetHashIndex(uint32_t bucket_idx) const {
 360       if (m_hash_indexes && bucket_idx < m_header.bucket_count)
 361         return m_hash_indexes[bucket_idx];
 362       return UINT32_MAX;
 363     }
 364
 365     uint32_t GetHashValue(uint32_t hash_idx) const {
 366       if (m_hash_values && hash_idx < m_header.hashes_count)
 367         return m_hash_values[hash_idx];
 368       return UINT32_MAX;
 369     }
 370
 371     uint32_t GetHashDataOffset(uint32_t hash_idx) const {
 372       if (m_hash_offsets && hash_idx < m_header.hashes_count)
 373         return m_hash_offsets[hash_idx];
 374       return UINT32_MAX;
 375     }
 376
 377     bool Find(const char *name, Pair &pair) const {
 378       if (!name || !name[0])
 379         return false;
 380
 381       if (IsValid()) {
 382         const uint32_t bucket_count = m_header.bucket_count;
 383         const uint32_t hash_count = m_header.hashes_count;
 384         const uint32_t hash_value =
 385             MappedHash::HashString(m_header.hash_function, name);
 386         const uint32_t bucket_idx = hash_value % bucket_count;
 387         uint32_t hash_idx = GetHashIndex(bucket_idx);
 388         if (hash_idx < hash_count) {
 389           for (; hash_idx < hash_count; ++hash_idx) {
 390             const uint32_t curr_hash_value = GetHashValue(hash_idx);
 391             if (curr_hash_value == hash_value) {
 392               lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx);
 393               while (hash_data_offset != UINT32_MAX) {
 394                 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
 395                 Result hash_result =
 396                     GetHashDataForName(name, &hash_data_offset, pair);
 397                 // Check the result of getting our hash data
 398                 switch (hash_result) {
 399                 case eResultKeyMatch:
 400                   return true;
 401
 402                 case eResultKeyMismatch:
 403                   if (prev_hash_data_offset == hash_data_offset)
 404                     return false;
 405                   break;
 406
 407                 case eResultEndOfHashData:
 408                   // The last HashData for this key has been reached, stop
 409                   // searching
 410                   return false;
 411                 case eResultError:
 412                   // Error parsing the hash data, abort
 413                   return false;
 414                 }
 415               }
 416             }
 417             if ((curr_hash_value % bucket_count) != bucket_idx)
 418               break;
 419           }
 420         }
 421       }
 422       return false;
 423     }
 424
 425     // This method must be implemented in any subclasses.
 426     // The KeyType is user specified and must somehow result in a string
 427     // value. For example, the KeyType might be a string offset in a string
 428     // table and subclasses can store their string table as a member of the
 429     // subclass and return a valie "const char *" given a "key". The value
 430     // could also be a C string pointer, in which case just returning "key"
 431     // will suffice.
 432     virtual const char *GetStringForKeyType(KeyType key) const = 0;
 433
 434     virtual bool ReadHashData(uint32_t hash_data_offset,
 435                               HashData &hash_data) const = 0;
 436
 437     // This method must be implemented in any subclasses and it must try to
 438     // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
 439     // parameter. This offset should be updated as bytes are consumed and
 440     // a value "Result" enum should be returned. If the "name" matches the
 441     // full name for the "pair.key" (which must be filled in by this call),
 442     // then the HashData in the pair ("pair.value") should be extracted and
 443     // filled in and "eResultKeyMatch" should be returned. If "name" doesn't
 444     // match this string for the key, then "eResultKeyMismatch" should be
 445     // returned and all data for the current HashData must be consumed or
 446     // skipped and the "hash_data_offset_ptr" offset needs to be updated to
 447     // point to the next HashData. If the end of the HashData objects for
 448     // a given hash value have been reached, then "eResultEndOfHashData"
 449     // should be returned. If anything else goes wrong during parsing,
 450     // return "eResultError" and the corresponding "Find()" function will
 451     // be canceled and return false.
 452     virtual Result GetHashDataForName(const char *name,
 453                                       lldb::offset_t *hash_data_offset_ptr,
 454                                       Pair &pair) const = 0;
 455
 456     const HeaderType &GetHeader() { return m_header; }
 457
 458     void ForEach(
 459         std::function<bool(const HashData &hash_data)> const &callback) const {
 460       const size_t num_hash_offsets = m_header.hashes_count;
 461       for (size_t i = 0; i < num_hash_offsets; ++i) {
 462         uint32_t hash_data_offset = GetHashDataOffset(i);
 463         if (hash_data_offset != UINT32_MAX) {
 464           HashData hash_data;
 465           if (ReadHashData(hash_data_offset, hash_data)) {
 466             // If the callback returns false, then we are done and should stop
 467             if (callback(hash_data) == false)
 468               return;
 469           }
 470         }
 471       }
 472     }
 473
 474   protected:
 475     // Implementation agnostic information
 476     HeaderType m_header;
 477     const uint32_t *m_hash_indexes;
 478     const uint32_t *m_hash_values;
 479     const uint32_t *m_hash_offsets;
 480   };
 481 };
 482
 483 #endif // liblldb_MappedHash_h_