5 #ifndef liblldb_MappedHash_h_
6 #define liblldb_MappedHash_h_
14 #include "lldb/Core/DataExtractor.h"
15 #include "lldb/Core/Stream.h"
23 eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used by the ELF GNU_HASH sections
28 HashStringUsingDJB (const char *s)
32 for (unsigned char c = *s; c; c = *++s)
33 h = ((h << 5) + h) + c;
39 HashString (uint32_t hash_function, const char *s)
41 switch (hash_function)
43 case MappedHash::eHashFunctionDJB:
44 return HashStringUsingDJB (s);
49 assert (!"Invalid hash function index");
54 static const uint32_t HASH_MAGIC = 0x48415348u;
55 static const uint32_t HASH_CIGAM = 0x48534148u;
62 uint32_t magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
63 uint16_t version; // Version number
64 uint16_t hash_function; // The hash function enumeration that was used
65 uint32_t bucket_count; // The number of buckets in this hash table
66 uint32_t hashes_count; // The total number of unique hash values and hash data offsets in this table
67 uint32_t header_data_len; // The size in bytes of the "header_data" template member below
68 HeaderData header_data; //
73 hash_function (eHashFunctionDJB),
76 header_data_len (sizeof(T)),
89 return sizeof(magic) +
91 sizeof(hash_function) +
92 sizeof(bucket_count) +
93 sizeof(hashes_count) +
94 sizeof(header_data_len) +
99 GetByteSize (const HeaderData &header_data) = 0;
102 SetHeaderDataByteSize (uint32_t header_data_byte_size)
104 header_data_len = header_data_byte_size;
108 Dump (lldb_private::Stream &s)
110 s.Printf ("header.magic = 0x%8.8x\n", magic);
111 s.Printf ("header.version = 0x%4.4x\n", version);
112 s.Printf ("header.hash_function = 0x%4.4x\n", hash_function);
113 s.Printf ("header.bucket_count = 0x%8.8x %u\n", bucket_count, bucket_count);
114 s.Printf ("header.hashes_count = 0x%8.8x %u\n", hashes_count, hashes_count);
115 s.Printf ("header.header_data_len = 0x%8.8x %u\n", header_data_len, header_data_len);
118 virtual lldb::offset_t
119 Read (lldb_private::DataExtractor &data, lldb::offset_t offset)
121 if (data.ValidOffsetForDataOfSize (offset,
124 sizeof (hash_function) +
125 sizeof (bucket_count) +
126 sizeof (hashes_count) +
127 sizeof (header_data_len)))
129 magic = data.GetU32 (&offset);
130 if (magic != HASH_MAGIC)
132 if (magic == HASH_CIGAM)
134 switch (data.GetByteOrder())
136 case lldb::eByteOrderBig:
137 data.SetByteOrder(lldb::eByteOrderLittle);
139 case lldb::eByteOrderLittle:
140 data.SetByteOrder(lldb::eByteOrderBig);
143 return LLDB_INVALID_OFFSET;
148 // Magic bytes didn't match
150 return LLDB_INVALID_OFFSET;
154 version = data.GetU16 (&offset);
157 // Unsupported version
158 return LLDB_INVALID_OFFSET;
160 hash_function = data.GetU16 (&offset);
161 if (hash_function == 4)
162 hash_function = 0; // Deal with pre-release version of this table...
163 bucket_count = data.GetU32 (&offset);
164 hashes_count = data.GetU32 (&offset);
165 header_data_len = data.GetU32 (&offset);
168 return LLDB_INVALID_OFFSET;
171 // // Returns a buffer that contains a serialized version of this table
172 // // that must be freed with free().
177 template <typename __KeyType, class __HeaderDataType, class __ValueType>
181 typedef __HeaderDataType HeaderDataType;
182 typedef Header<HeaderDataType> HeaderType;
183 typedef __KeyType KeyType;
184 typedef __ValueType ValueType;
193 typedef std::vector<ValueType> ValueArrayType;
195 typedef std::map<KeyType, ValueArrayType> HashData;
196 // Map a name hash to one or more name infos
197 typedef std::map<uint32_t, HashData> HashToHashData;
200 GetKeyForStringType (const char *cstr) const = 0;
203 GetByteSize (const HashData &key_to_key_values) = 0;
206 WriteHashData (const HashData &hash_data,
207 lldb_private::Stream &ostrm) = 0;
210 AddEntry (const char *cstr, const ValueType &value)
213 entry.hash = MappedHash::HashString (eHashFunctionDJB, cstr);
214 entry.key = GetKeyForStringType (cstr);
216 m_entries.push_back (entry);
220 Save (const HeaderDataType &header_data,
221 lldb_private::Stream &ostrm)
223 if (m_entries.empty())
226 const uint32_t num_entries = m_entries.size();
231 header.magic = HASH_MAGIC;
233 header.hash_function = eHashFunctionDJB;
234 header.bucket_count = 0;
235 header.hashes_count = 0;
236 header.prologue_length = header_data.GetByteSize();
238 // We need to figure out the number of unique hashes first before we can
239 // calculate the number of buckets we want to use.
240 typedef std::vector<uint32_t> hash_coll;
241 hash_coll unique_hashes;
242 unique_hashes.resize (num_entries);
243 for (i=0; i<num_entries; ++i)
244 unique_hashes[i] = m_entries[i].hash;
245 std::sort (unique_hashes.begin(), unique_hashes.end());
246 hash_coll::iterator pos = std::unique (unique_hashes.begin(), unique_hashes.end());
247 const size_t num_unique_hashes = std::distance (unique_hashes.begin(), pos);
249 if (num_unique_hashes > 1024)
250 header.bucket_count = num_unique_hashes/4;
251 else if (num_unique_hashes > 16)
252 header.bucket_count = num_unique_hashes/2;
254 header.bucket_count = num_unique_hashes;
255 if (header.bucket_count == 0)
256 header.bucket_count = 1;
259 std::vector<HashToHashData> hash_buckets;
260 std::vector<uint32_t> hash_indexes (header.bucket_count, 0);
261 std::vector<uint32_t> hash_values;
262 std::vector<uint32_t> hash_offsets;
263 hash_buckets.resize (header.bucket_count);
264 uint32_t bucket_entry_empties = 0;
265 //StreamString hash_file_data(Stream::eBinary, dwarf->GetObjectFile()->GetAddressByteSize(), dwarf->GetObjectFile()->GetByteSize());
267 // Push all of the hashes into their buckets and create all bucket
268 // entries all populated with data.
269 for (i=0; i<num_entries; ++i)
271 const uint32_t hash = m_entries[i].hash;
272 const uint32_t bucket_idx = hash % header.bucket_count;
273 const uint32_t strp_offset = m_entries[i].str_offset;
274 const uint32_t die_offset = m_entries[i].die_offset;
275 hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset);
278 // Now for each bucket we write the bucket value which is the
279 // number of hashes and the hash index encoded into a single
280 // 32 bit unsigned integer.
281 for (i=0; i<header.bucket_count; ++i)
283 HashToHashData &bucket_entry = hash_buckets[i];
285 if (bucket_entry.empty())
288 ++bucket_entry_empties;
289 hash_indexes[i] = UINT32_MAX;
293 const uint32_t hash_value_index = hash_values.size();
294 uint32_t hash_count = 0;
295 typename HashToHashData::const_iterator pos, end = bucket_entry.end();
296 for (pos = bucket_entry.begin(); pos != end; ++pos)
298 hash_values.push_back (pos->first);
299 hash_offsets.push_back (GetByteSize (pos->second));
303 hash_indexes[i] = hash_value_index;
306 header.hashes_count = hash_values.size();
308 // Write the header out now that we have the hash_count
309 header.Write (ostrm);
311 // Now for each bucket we write the start index of the hashes
312 // for the current bucket, or UINT32_MAX if the bucket is empty
313 for (i=0; i<header.bucket_count; ++i)
315 ostrm.PutHex32(hash_indexes[i]);
318 // Now we need to write out all of the hash values
319 for (i=0; i<header.hashes_count; ++i)
321 ostrm.PutHex32(hash_values[i]);
324 // Now we need to write out all of the hash data offsets,
325 // there is an offset for each hash in the hashes array
326 // that was written out above
327 for (i=0; i<header.hashes_count; ++i)
329 ostrm.PutHex32(hash_offsets[i]);
332 // Now we write the data for each hash and verify we got the offset
334 for (i=0; i<header.bucket_count; ++i)
336 HashToHashData &bucket_entry = hash_buckets[i];
338 typename HashToHashData::const_iterator pos, end = bucket_entry.end();
339 for (pos = bucket_entry.begin(); pos != end; ++pos)
341 if (!bucket_entry.empty())
343 WriteHashData (pos->second);
349 typedef std::vector<Entry> collection;
350 collection m_entries;
352 // A class for reading and using a saved hash table from a block of data
354 template <typename __KeyType, class __HeaderType, class __HashData>
358 typedef __HeaderType HeaderType;
359 typedef __KeyType KeyType;
360 typedef __HashData HashData;
364 eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was filled in successfully
365 eResultKeyMismatch = 1u, // Bucket hash data collision, but key didn't match
366 eResultEndOfHashData = 2u, // The chain of items for this hash data in this bucket is terminated, search no more
367 eResultError = 3u // Error parsing the hash data, abort
376 MemoryTable (lldb_private::DataExtractor &data) :
378 m_hash_indexes (NULL),
379 m_hash_values (NULL),
380 m_hash_offsets (NULL)
382 lldb::offset_t offset = m_header.Read (data, 0);
383 if (offset != LLDB_INVALID_OFFSET && IsValid ())
385 m_hash_indexes = (uint32_t *)data.GetData (&offset, m_header.bucket_count * sizeof(uint32_t));
386 m_hash_values = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
387 m_hash_offsets = (uint32_t *)data.GetData (&offset, m_header.hashes_count * sizeof(uint32_t));
399 return m_header.version == 1 &&
400 m_header.hash_function == eHashFunctionDJB &&
401 m_header.bucket_count > 0 &&
402 m_header.hashes_count > 0;
406 GetHashIndex (uint32_t bucket_idx) const
408 if (m_hash_indexes && bucket_idx < m_header.bucket_count)
409 return m_hash_indexes[bucket_idx];
414 GetHashValue (uint32_t hash_idx) const
416 if (m_hash_values && hash_idx < m_header.hashes_count)
417 return m_hash_values[hash_idx];
422 GetHashDataOffset (uint32_t hash_idx) const
424 if (m_hash_offsets && hash_idx < m_header.hashes_count)
425 return m_hash_offsets[hash_idx];
430 Find (const char *name, Pair &pair) const
434 const uint32_t bucket_count = m_header.bucket_count;
435 const uint32_t hash_count = m_header.hashes_count;
436 const uint32_t hash_value = MappedHash::HashString (m_header.hash_function, name);
437 const uint32_t bucket_idx = hash_value % bucket_count;
438 uint32_t hash_idx = GetHashIndex (bucket_idx);
439 if (hash_idx < hash_count)
441 for (; hash_idx < hash_count; ++hash_idx)
443 const uint32_t curr_hash_value = GetHashValue (hash_idx);
444 if (curr_hash_value == hash_value)
446 lldb::offset_t hash_data_offset = GetHashDataOffset (hash_idx);
447 while (hash_data_offset != UINT32_MAX)
449 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
450 Result hash_result = GetHashDataForName (name, &hash_data_offset, pair);
451 // Check the result of getting our hash data
454 case eResultKeyMatch:
457 case eResultKeyMismatch:
458 if (prev_hash_data_offset == hash_data_offset)
462 case eResultEndOfHashData:
463 // The last HashData for this key has been reached, stop searching
466 // Error parsing the hash data, abort
471 if ((curr_hash_value % bucket_count) != bucket_idx)
479 // This method must be implemented in any subclasses.
480 // The KeyType is user specified and must somehow result in a string
481 // value. For example, the KeyType might be a string offset in a string
482 // table and subclasses can store their string table as a member of the
483 // subclass and return a valie "const char *" given a "key". The value
484 // could also be a C string pointer, in which case just returning "key"
488 GetStringForKeyType (KeyType key) const = 0;
491 ReadHashData (uint32_t hash_data_offset,
492 HashData &hash_data) const = 0;
494 // This method must be implemented in any subclasses and it must try to
495 // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
496 // parameter. This offset should be updated as bytes are consumed and
497 // a value "Result" enum should be returned. If the "name" matches the
498 // full name for the "pair.key" (which must be filled in by this call),
499 // then the HashData in the pair ("pair.value") should be extracted and
500 // filled in and "eResultKeyMatch" should be returned. If "name" doesn't
501 // match this string for the key, then "eResultKeyMismatch" should be
502 // returned and all data for the current HashData must be consumed or
503 // skipped and the "hash_data_offset_ptr" offset needs to be updated to
504 // point to the next HashData. If the end of the HashData objects for
505 // a given hash value have been reached, then "eResultEndOfHashData"
506 // should be returned. If anything else goes wrong during parsing,
507 // return "eResultError" and the corresponding "Find()" function will
508 // be canceled and return false.
511 GetHashDataForName (const char *name,
512 lldb::offset_t* hash_data_offset_ptr,
513 Pair &pair) const = 0;
523 ForEach (std::function <bool(const HashData &hash_data)> const &callback) const
525 const size_t num_hash_offsets = m_header.hashes_count;
526 for (size_t i=0; i<num_hash_offsets; ++i)
528 uint32_t hash_data_offset = GetHashDataOffset (i);
529 if (hash_data_offset != UINT32_MAX)
532 if (ReadHashData (hash_data_offset, hash_data))
534 // If the callback returns false, then we are done and should stop
535 if (callback(hash_data) == false)
543 // Implementation agnostic information
545 uint32_t *m_hash_indexes;
546 uint32_t *m_hash_values;
547 uint32_t *m_hash_offsets;
552 #endif // #ifndef liblldb_MappedHash_h_