contrib/llvm/tools/lldb/include/lldb/Core/MappedHash.h

   1 //
   2 //  MappedHash.h
   3 //
   4
   5 #ifndef liblldb_MappedHash_h_
   6 #define liblldb_MappedHash_h_
   7
   8 #include <assert.h>
   9 #include <stdint.h>
  10
  11 #include <map>
  12 #include <vector>
  13
  14 #include "lldb/Core/DataExtractor.h"
  15 #include "lldb/Core/Stream.h"
  16
  17 class MappedHash
  18 {
  19 public:
  20
  21     enum HashFunctionType
  22     {
  23         eHashFunctionDJB        = 0u    // Daniel J Bernstein hash function that is also used by the ELF GNU_HASH sections
  24     };
  25
  26
  27     static uint32_t
  28     HashStringUsingDJB (const char *s)
  29     {
  30         uint32_t h = 5381;
  31
  32         for (unsigned char c = *s; c; c = *++s)
  33             h = ((h << 5) + h) + c;
  34
  35         return h;
  36     }
  37
  38     static uint32_t
  39     HashString (uint32_t hash_function, const char *s)
  40     {
  41         switch (hash_function)
  42         {
  43             case MappedHash::eHashFunctionDJB:
  44                 return HashStringUsingDJB (s);
  45
  46             default:
  47                 break;
  48         }
  49         assert (!"Invalid hash function index");
  50         return 0;
  51     }
  52
  53
  54     static const uint32_t HASH_MAGIC = 0x48415348u;
  55     static const uint32_t HASH_CIGAM = 0x48534148u;
  56
  57     template <typename T>
  58     struct Header
  59         {
  60         typedef T HeaderData;
  61
  62         uint32_t    magic;             // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
  63         uint16_t    version;           // Version number
  64                 uint16_t    hash_function;     // The hash function enumeration that was used
  65                 uint32_t    bucket_count;      // The number of buckets in this hash table
  66                 uint32_t    hashes_count;      // The total number of unique hash values and hash data offsets in this table
  67         uint32_t    header_data_len;   // The size in bytes of the "header_data" template member below
  68         HeaderData  header_data;       //
  69
  70                 Header () :
  71             magic (HASH_MAGIC),
  72             version (1),
  73             hash_function (eHashFunctionDJB),
  74             bucket_count (0),
  75             hashes_count (0),
  76             header_data_len (sizeof(T)),
  77             header_data ()
  78                 {
  79                 }
  80
  81         virtual
  82         ~Header()
  83         {
  84         }
  85
  86         size_t
  87         GetByteSize() const
  88         {
  89             return  sizeof(magic) +
  90                     sizeof(version) +
  91                     sizeof(hash_function) +
  92                     sizeof(bucket_count) +
  93                     sizeof(hashes_count) +
  94                     sizeof(header_data_len) +
  95                     header_data_len;
  96         }
  97
  98         virtual size_t
  99         GetByteSize (const HeaderData &header_data) = 0;
 100
 101         void
 102         SetHeaderDataByteSize (uint32_t header_data_byte_size)
 103         {
 104             header_data_len = header_data_byte_size;
 105         }
 106
 107         void
 108         Dump (lldb_private::Stream &s)
 109         {
 110             s.Printf ("header.magic              = 0x%8.8x\n", magic);
 111             s.Printf ("header.version            = 0x%4.4x\n", version);
 112             s.Printf ("header.hash_function      = 0x%4.4x\n", hash_function);
 113             s.Printf ("header.bucket_count       = 0x%8.8x %u\n", bucket_count, bucket_count);
 114             s.Printf ("header.hashes_count       = 0x%8.8x %u\n", hashes_count, hashes_count);
 115             s.Printf ("header.header_data_len    = 0x%8.8x %u\n", header_data_len, header_data_len);
 116         }
 117
 118         virtual lldb::offset_t
 119         Read (lldb_private::DataExtractor &data, lldb::offset_t offset)
 120         {
 121             if (data.ValidOffsetForDataOfSize (offset,
 122                                                sizeof (magic) +
 123                                                sizeof (version) +
 124                                                sizeof (hash_function) +
 125                                                sizeof (bucket_count) +
 126                                                sizeof (hashes_count) +
 127                                                sizeof (header_data_len)))
 128             {
 129                 magic = data.GetU32 (&offset);
 130                 if (magic != HASH_MAGIC)
 131                 {
 132                     if (magic == HASH_CIGAM)
 133                     {
 134                         switch (data.GetByteOrder())
 135                         {
 136                             case lldb::eByteOrderBig:
 137                                 data.SetByteOrder(lldb::eByteOrderLittle);
 138                                 break;
 139                             case lldb::eByteOrderLittle:
 140                                 data.SetByteOrder(lldb::eByteOrderBig);
 141                                 break;
 142                             default:
 143                                 return LLDB_INVALID_OFFSET;
 144                         }
 145                     }
 146                     else
 147                     {
 148                         // Magic bytes didn't match
 149                         version = 0;
 150                         return LLDB_INVALID_OFFSET;
 151                     }
 152                 }
 153
 154                 version = data.GetU16 (&offset);
 155                 if (version != 1)
 156                 {
 157                     // Unsupported version
 158                     return LLDB_INVALID_OFFSET;
 159                 }
 160                 hash_function       = data.GetU16 (&offset);
 161                 if (hash_function == 4)
 162                     hash_function = 0; // Deal with pre-release version of this table...
 163                 bucket_count        = data.GetU32 (&offset);
 164                 hashes_count        = data.GetU32 (&offset);
 165                 header_data_len     = data.GetU32 (&offset);
 166                 return offset;
 167             }
 168             return LLDB_INVALID_OFFSET;
 169         }
 170 //
 171 //        // Returns a buffer that contains a serialized version of this table
 172 //        // that must be freed with free().
 173 //        virtual void *
 174 //        Write (int fd);
 175         };
 176
 177     template <typename __KeyType, class __HeaderDataType, class __ValueType>
 178     class ExportTable
 179     {
 180     public:
 181         typedef __HeaderDataType HeaderDataType;
 182         typedef Header<HeaderDataType> HeaderType;
 183         typedef __KeyType KeyType;
 184         typedef __ValueType ValueType;
 185
 186         struct Entry
 187         {
 188             uint32_t    hash;
 189             KeyType     key;
 190             ValueType   value;
 191         };
 192
 193         typedef std::vector<ValueType> ValueArrayType;
 194
 195         typedef std::map<KeyType, ValueArrayType> HashData;
 196         // Map a name hash to one or more name infos
 197         typedef std::map<uint32_t, HashData> HashToHashData;
 198
 199         virtual KeyType
 200         GetKeyForStringType (const char *cstr) const = 0;
 201
 202         virtual size_t
 203         GetByteSize (const HashData &key_to_key_values) = 0;
 204
 205         virtual bool
 206         WriteHashData (const HashData &hash_data,
 207                        lldb_private::Stream &ostrm) = 0;
 208 //
 209         void
 210         AddEntry (const char *cstr, const ValueType &value)
 211         {
 212             Entry entry;
 213             entry.hash = MappedHash::HashString (eHashFunctionDJB, cstr);
 214             entry.key = GetKeyForStringType (cstr);
 215             entry.value = value;
 216             m_entries.push_back (entry);
 217         }
 218
 219         void
 220         Save (const HeaderDataType &header_data,
 221               lldb_private::Stream &ostrm)
 222         {
 223             if (m_entries.empty())
 224                 return;
 225
 226             const uint32_t num_entries = m_entries.size();
 227             uint32_t i = 0;
 228
 229             HeaderType header;
 230
 231             header.magic = HASH_MAGIC;
 232             header.version = 1;
 233             header.hash_function = eHashFunctionDJB;
 234             header.bucket_count = 0;
 235             header.hashes_count = 0;
 236             header.prologue_length = header_data.GetByteSize();
 237
 238             // We need to figure out the number of unique hashes first before we can
 239             // calculate the number of buckets we want to use.
 240             typedef std::vector<uint32_t> hash_coll;
 241             hash_coll unique_hashes;
 242             unique_hashes.resize (num_entries);
 243             for (i=0; i<num_entries; ++i)
 244                 unique_hashes[i] = m_entries[i].hash;
 245             std::sort (unique_hashes.begin(), unique_hashes.end());
 246             hash_coll::iterator pos = std::unique (unique_hashes.begin(), unique_hashes.end());
 247             const size_t num_unique_hashes = std::distance (unique_hashes.begin(), pos);
 248
 249             if (num_unique_hashes > 1024)
 250                 header.bucket_count = num_unique_hashes/4;
 251             else if (num_unique_hashes > 16)
 252                 header.bucket_count = num_unique_hashes/2;
 253             else
 254                 header.bucket_count = num_unique_hashes;
 255             if (header.bucket_count == 0)
 256                 header.bucket_count = 1;
 257
 258
 259             std::vector<HashToHashData> hash_buckets;
 260             std::vector<uint32_t> hash_indexes (header.bucket_count, 0);
 261             std::vector<uint32_t> hash_values;
 262             std::vector<uint32_t> hash_offsets;
 263             hash_buckets.resize (header.bucket_count);
 264             uint32_t bucket_entry_empties = 0;
 265             //StreamString hash_file_data(Stream::eBinary, dwarf->GetObjectFile()->GetAddressByteSize(), dwarf->GetObjectFile()->GetByteSize());
 266
 267             // Push all of the hashes into their buckets and create all bucket
 268             // entries all populated with data.
 269             for (i=0; i<num_entries; ++i)
 270             {
 271                 const uint32_t hash = m_entries[i].hash;
 272                 const uint32_t bucket_idx = hash % header.bucket_count;
 273                 const uint32_t strp_offset = m_entries[i].str_offset;
 274                 const uint32_t die_offset = m_entries[i].die_offset;
 275                 hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset);
 276             }
 277
 278             // Now for each bucket we write the bucket value which is the
 279             // number of hashes and the hash index encoded into a single
 280             // 32 bit unsigned integer.
 281             for (i=0; i<header.bucket_count; ++i)
 282             {
 283                 HashToHashData &bucket_entry = hash_buckets[i];
 284
 285                 if (bucket_entry.empty())
 286                 {
 287                     // Empty bucket
 288                     ++bucket_entry_empties;
 289                     hash_indexes[i] = UINT32_MAX;
 290                 }
 291                 else
 292                 {
 293                     const uint32_t hash_value_index = hash_values.size();
 294                     uint32_t hash_count = 0;
 295                     typename HashToHashData::const_iterator pos, end = bucket_entry.end();
 296                     for (pos = bucket_entry.begin(); pos != end; ++pos)
 297                     {
 298                         hash_values.push_back (pos->first);
 299                         hash_offsets.push_back (GetByteSize (pos->second));
 300                         ++hash_count;
 301                     }
 302
 303                     hash_indexes[i] = hash_value_index;
 304                 }
 305             }
 306             header.hashes_count = hash_values.size();
 307
 308             // Write the header out now that we have the hash_count
 309             header.Write (ostrm);
 310
 311             // Now for each bucket we write the start index of the hashes
 312             // for the current bucket, or UINT32_MAX if the bucket is empty
 313             for (i=0; i<header.bucket_count; ++i)
 314             {
 315                 ostrm.PutHex32(hash_indexes[i]);
 316             }
 317
 318             // Now we need to write out all of the hash values
 319             for (i=0; i<header.hashes_count; ++i)
 320             {
 321                 ostrm.PutHex32(hash_values[i]);
 322             }
 323
 324             // Now we need to write out all of the hash data offsets,
 325             // there is an offset for each hash in the hashes array
 326             // that was written out above
 327             for (i=0; i<header.hashes_count; ++i)
 328             {
 329                 ostrm.PutHex32(hash_offsets[i]);
 330             }
 331
 332             // Now we write the data for each hash and verify we got the offset
 333             // correct above...
 334             for (i=0; i<header.bucket_count; ++i)
 335             {
 336                 HashToHashData &bucket_entry = hash_buckets[i];
 337
 338                 typename HashToHashData::const_iterator pos, end = bucket_entry.end();
 339                 for (pos = bucket_entry.begin(); pos != end; ++pos)
 340                 {
 341                     if (!bucket_entry.empty())
 342                     {
 343                         WriteHashData (pos->second);
 344                     }
 345                 }
 346             }
 347         }
 348     protected:
 349         typedef std::vector<Entry> collection;
 350         collection m_entries;
 351     };
 352     // A class for reading and using a saved hash table from a block of data
 353     // in memory
 354     template <typename __KeyType, class __HeaderType, class __HashData>
 355     class MemoryTable
 356     {
 357     public:
 358         typedef __HeaderType HeaderType;
 359         typedef __KeyType KeyType;
 360         typedef __HashData HashData;
 361
 362         enum Result
 363         {
 364             eResultKeyMatch         = 0u, // The entry was found, key matched and "pair" was filled in successfully
 365             eResultKeyMismatch      = 1u, // Bucket hash data collision, but key didn't match
 366             eResultEndOfHashData    = 2u, // The chain of items for this hash data in this bucket is terminated, search no more
 367             eResultError            = 3u  // Error parsing the hash data, abort
 368         };
 369
 370         struct Pair
 371         {
 372             KeyType key;
 373             HashData value;
 374         };
 375
 376         MemoryTable (lldb_private::DataExtractor &data) :
 377             m_header (),
 378             m_hash_indexes (NULL),
 379             m_hash_values (NULL),
 380             m_hash_offsets (NULL)
 381         {
 382             lldb::offset_t offset = m_header.Read (data, 0);
 383             if (offset != LLDB_INVALID_OFFSET && IsValid ())
 384             {
 385                 m_hash_indexes = (uint32_t *)data.GetData (&offset, m_header.bucket_count * sizeof(uint32_t));
 386                 m_hash_values  = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
 387                 m_hash_offsets = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
 388             }
 389         }
 390
 391         virtual
 392         ~MemoryTable ()
 393         {
 394         }
 395
 396         bool
 397         IsValid () const
 398         {
 399             return m_header.version == 1 &&
 400                    m_header.hash_function == eHashFunctionDJB &&
 401                    m_header.bucket_count > 0 &&
 402                    m_header.hashes_count > 0;
 403         }
 404
 405         uint32_t
 406         GetHashIndex (uint32_t bucket_idx) const
 407         {
 408             if (m_hash_indexes && bucket_idx < m_header.bucket_count)
 409                 return m_hash_indexes[bucket_idx];
 410             return UINT32_MAX;
 411         }
 412
 413         uint32_t
 414         GetHashValue (uint32_t hash_idx) const
 415         {
 416             if (m_hash_values && hash_idx < m_header.hashes_count)
 417                 return m_hash_values[hash_idx];
 418             return UINT32_MAX;
 419         }
 420
 421         uint32_t
 422         GetHashDataOffset (uint32_t hash_idx) const
 423         {
 424             if (m_hash_offsets && hash_idx < m_header.hashes_count)
 425                 return m_hash_offsets[hash_idx];
 426             return UINT32_MAX;
 427         }
 428
 429         bool
 430         Find (const char *name, Pair &pair) const
 431         {
 432             if (IsValid ())
 433             {
 434                 const uint32_t bucket_count = m_header.bucket_count;
 435                 const uint32_t hash_count = m_header.hashes_count;
 436                 const uint32_t hash_value = MappedHash::HashString (m_header.hash_function, name);
 437                 const uint32_t bucket_idx = hash_value % bucket_count;
 438                 uint32_t hash_idx = GetHashIndex (bucket_idx);
 439                 if (hash_idx < hash_count)
 440                 {
 441                     for (; hash_idx < hash_count; ++hash_idx)
 442                     {
 443                         const uint32_t curr_hash_value = GetHashValue (hash_idx);
 444                         if (curr_hash_value == hash_value)
 445                         {
 446                             lldb::offset_t hash_data_offset = GetHashDataOffset (hash_idx);
 447                             while (hash_data_offset != UINT32_MAX)
 448                             {
 449                                 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
 450                                 Result hash_result = GetHashDataForName (name, &hash_data_offset, pair);
 451                                 // Check the result of getting our hash data
 452                                 switch (hash_result)
 453                                 {
 454                                 case eResultKeyMatch:
 455                                     return true;
 456
 457                                 case eResultKeyMismatch:
 458                                     if (prev_hash_data_offset == hash_data_offset)
 459                                         return false;
 460                                     break;
 461
 462                                 case eResultEndOfHashData:
 463                                     // The last HashData for this key has been reached, stop searching
 464                                     return false;
 465                                 case eResultError:
 466                                     // Error parsing the hash data, abort
 467                                     return false;
 468                                 }
 469                             }
 470                         }
 471                         if ((curr_hash_value % bucket_count) != bucket_idx)
 472                             break;
 473                     }
 474                 }
 475             }
 476             return false;
 477         }
 478
 479         // This method must be implemented in any subclasses.
 480         // The KeyType is user specified and must somehow result in a string
 481         // value. For example, the KeyType might be a string offset in a string
 482         // table and subclasses can store their string table as a member of the
 483         // subclass and return a valie "const char *" given a "key". The value
 484         // could also be a C string pointer, in which case just returning "key"
 485         // will suffice.
 486
 487         virtual const char *
 488         GetStringForKeyType (KeyType key) const = 0;
 489
 490         virtual bool
 491         ReadHashData (uint32_t hash_data_offset,
 492                       HashData &hash_data) const = 0;
 493
 494         // This method must be implemented in any subclasses and it must try to
 495         // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
 496         // parameter. This offset should be updated as bytes are consumed and
 497         // a value "Result" enum should be returned. If the "name" matches the
 498         // full name for the "pair.key" (which must be filled in by this call),
 499         // then the HashData in the pair ("pair.value") should be extracted and
 500         // filled in and "eResultKeyMatch" should be returned. If "name" doesn't
 501         // match this string for the key, then "eResultKeyMismatch" should be
 502         // returned and all data for the current HashData must be consumed or
 503         // skipped and the "hash_data_offset_ptr" offset needs to be updated to
 504         // point to the next HashData. If the end of the HashData objects for
 505         // a given hash value have been reached, then "eResultEndOfHashData"
 506         // should be returned. If anything else goes wrong during parsing,
 507         // return "eResultError" and the corresponding "Find()" function will
 508         // be canceled and return false.
 509
 510         virtual Result
 511         GetHashDataForName (const char *name,
 512                             lldb::offset_t* hash_data_offset_ptr,
 513                             Pair &pair) const = 0;
 514
 515         const HeaderType &
 516         GetHeader()
 517         {
 518             return m_header;
 519         }
 520
 521
 522         void
 523         ForEach (std::function <bool(const HashData &hash_data)> const &callback) const
 524         {
 525             const size_t num_hash_offsets = m_header.hashes_count;
 526             for (size_t i=0; i<num_hash_offsets; ++i)
 527             {
 528                 uint32_t hash_data_offset = GetHashDataOffset (i);
 529                 if (hash_data_offset != UINT32_MAX)
 530                 {
 531                     HashData hash_data;
 532                     if (ReadHashData (hash_data_offset, hash_data))
 533                     {
 534                         // If the callback returns false, then we are done and should stop
 535                         if (callback(hash_data) == false)
 536                             return;
 537                     }
 538                 }
 539             }
 540         }
 541
 542     protected:
 543         // Implementation agnostic information
 544         HeaderType m_header;
 545         uint32_t *m_hash_indexes;
 546         uint32_t *m_hash_values;
 547         uint32_t *m_hash_offsets;
 548     };
 549
 550 };
 551
 552 #endif // #ifndef liblldb_MappedHash_h_