1 //===-- MappedHash.h --------------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #ifndef liblldb_MappedHash_h_
11 #define liblldb_MappedHash_h_
23 // Other libraries and framework includes
25 #include "lldb/Core/DataExtractor.h"
26 #include "lldb/Core/Stream.h"
30 enum HashFunctionType {
31 eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used
32 // by the ELF GNU_HASH sections
35 static uint32_t HashStringUsingDJB(const char *s) {
38 for (unsigned char c = *s; c; c = *++s)
39 h = ((h << 5) + h) + c;
44 static uint32_t HashString(uint32_t hash_function, const char *s) {
48 switch (hash_function) {
49 case MappedHash::eHashFunctionDJB:
50 return HashStringUsingDJB(s);
55 llvm_unreachable("Invalid hash function index");
58 static const uint32_t HASH_MAGIC = 0x48415348u;
59 static const uint32_t HASH_CIGAM = 0x48534148u;
61 template <typename T> struct Header {
65 magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
66 uint16_t version; // Version number
67 uint16_t hash_function; // The hash function enumeration that was used
68 uint32_t bucket_count; // The number of buckets in this hash table
69 uint32_t hashes_count; // The total number of unique hash values and hash
70 // data offsets in this table
71 uint32_t header_data_len; // The size in bytes of the "header_data" template
73 HeaderData header_data; //
76 : magic(HASH_MAGIC), version(1), hash_function(eHashFunctionDJB),
77 bucket_count(0), hashes_count(0), header_data_len(sizeof(T)),
80 virtual ~Header() = default;
82 size_t GetByteSize() const {
83 return sizeof(magic) + sizeof(version) + sizeof(hash_function) +
84 sizeof(bucket_count) + sizeof(hashes_count) +
85 sizeof(header_data_len) + header_data_len;
88 virtual size_t GetByteSize(const HeaderData &header_data) = 0;
90 void SetHeaderDataByteSize(uint32_t header_data_byte_size) {
91 header_data_len = header_data_byte_size;
94 void Dump(lldb_private::Stream &s) {
95 s.Printf("header.magic = 0x%8.8x\n", magic);
96 s.Printf("header.version = 0x%4.4x\n", version);
97 s.Printf("header.hash_function = 0x%4.4x\n", hash_function);
98 s.Printf("header.bucket_count = 0x%8.8x %u\n", bucket_count,
100 s.Printf("header.hashes_count = 0x%8.8x %u\n", hashes_count,
102 s.Printf("header.header_data_len = 0x%8.8x %u\n", header_data_len,
106 virtual lldb::offset_t Read(lldb_private::DataExtractor &data,
107 lldb::offset_t offset) {
108 if (data.ValidOffsetForDataOfSize(
109 offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) +
110 sizeof(bucket_count) + sizeof(hashes_count) +
111 sizeof(header_data_len))) {
112 magic = data.GetU32(&offset);
113 if (magic != HASH_MAGIC) {
114 if (magic == HASH_CIGAM) {
115 switch (data.GetByteOrder()) {
116 case lldb::eByteOrderBig:
117 data.SetByteOrder(lldb::eByteOrderLittle);
119 case lldb::eByteOrderLittle:
120 data.SetByteOrder(lldb::eByteOrderBig);
123 return LLDB_INVALID_OFFSET;
126 // Magic bytes didn't match
128 return LLDB_INVALID_OFFSET;
132 version = data.GetU16(&offset);
134 // Unsupported version
135 return LLDB_INVALID_OFFSET;
137 hash_function = data.GetU16(&offset);
138 if (hash_function == 4)
139 hash_function = 0; // Deal with pre-release version of this table...
140 bucket_count = data.GetU32(&offset);
141 hashes_count = data.GetU32(&offset);
142 header_data_len = data.GetU32(&offset);
145 return LLDB_INVALID_OFFSET;
148 // // Returns a buffer that contains a serialized version of this
150 // // that must be freed with free().
155 template <typename __KeyType, class __HeaderDataType, class __ValueType>
158 typedef __HeaderDataType HeaderDataType;
159 typedef Header<HeaderDataType> HeaderType;
160 typedef __KeyType KeyType;
161 typedef __ValueType ValueType;
169 typedef std::vector<ValueType> ValueArrayType;
171 typedef std::map<KeyType, ValueArrayType> HashData;
172 // Map a name hash to one or more name infos
173 typedef std::map<uint32_t, HashData> HashToHashData;
175 virtual KeyType GetKeyForStringType(const char *cstr) const = 0;
177 virtual size_t GetByteSize(const HashData &key_to_key_values) = 0;
179 virtual bool WriteHashData(const HashData &hash_data,
180 lldb_private::Stream &ostrm) = 0;
182 void AddEntry(const char *cstr, const ValueType &value) {
184 entry.hash = MappedHash::HashString(eHashFunctionDJB, cstr);
185 entry.key = GetKeyForStringType(cstr);
187 m_entries.push_back(entry);
190 void Save(const HeaderDataType &header_data, lldb_private::Stream &ostrm) {
191 if (m_entries.empty())
194 const uint32_t num_entries = m_entries.size();
199 header.magic = HASH_MAGIC;
201 header.hash_function = eHashFunctionDJB;
202 header.bucket_count = 0;
203 header.hashes_count = 0;
204 header.prologue_length = header_data.GetByteSize();
206 // We need to figure out the number of unique hashes first before we can
207 // calculate the number of buckets we want to use.
208 typedef std::vector<uint32_t> hash_coll;
209 hash_coll unique_hashes;
210 unique_hashes.resize(num_entries);
211 for (i = 0; i < num_entries; ++i)
212 unique_hashes[i] = m_entries[i].hash;
213 std::sort(unique_hashes.begin(), unique_hashes.end());
214 hash_coll::iterator pos =
215 std::unique(unique_hashes.begin(), unique_hashes.end());
216 const size_t num_unique_hashes =
217 std::distance(unique_hashes.begin(), pos);
219 if (num_unique_hashes > 1024)
220 header.bucket_count = num_unique_hashes / 4;
221 else if (num_unique_hashes > 16)
222 header.bucket_count = num_unique_hashes / 2;
224 header.bucket_count = num_unique_hashes;
225 if (header.bucket_count == 0)
226 header.bucket_count = 1;
228 std::vector<HashToHashData> hash_buckets;
229 std::vector<uint32_t> hash_indexes(header.bucket_count, 0);
230 std::vector<uint32_t> hash_values;
231 std::vector<uint32_t> hash_offsets;
232 hash_buckets.resize(header.bucket_count);
233 uint32_t bucket_entry_empties = 0;
234 // StreamString hash_file_data(Stream::eBinary,
235 // dwarf->GetObjectFile()->GetAddressByteSize(),
236 // dwarf->GetObjectFile()->GetByteSize());
238 // Push all of the hashes into their buckets and create all bucket
239 // entries all populated with data.
240 for (i = 0; i < num_entries; ++i) {
241 const uint32_t hash = m_entries[i].hash;
242 const uint32_t bucket_idx = hash % header.bucket_count;
243 const uint32_t strp_offset = m_entries[i].str_offset;
244 const uint32_t die_offset = m_entries[i].die_offset;
245 hash_buckets[bucket_idx][hash][strp_offset].push_back(die_offset);
248 // Now for each bucket we write the bucket value which is the
249 // number of hashes and the hash index encoded into a single
250 // 32 bit unsigned integer.
251 for (i = 0; i < header.bucket_count; ++i) {
252 HashToHashData &bucket_entry = hash_buckets[i];
254 if (bucket_entry.empty()) {
256 ++bucket_entry_empties;
257 hash_indexes[i] = UINT32_MAX;
259 const uint32_t hash_value_index = hash_values.size();
260 uint32_t hash_count = 0;
261 typename HashToHashData::const_iterator pos, end = bucket_entry.end();
262 for (pos = bucket_entry.begin(); pos != end; ++pos) {
263 hash_values.push_back(pos->first);
264 hash_offsets.push_back(GetByteSize(pos->second));
268 hash_indexes[i] = hash_value_index;
271 header.hashes_count = hash_values.size();
273 // Write the header out now that we have the hash_count
276 // Now for each bucket we write the start index of the hashes
277 // for the current bucket, or UINT32_MAX if the bucket is empty
278 for (i = 0; i < header.bucket_count; ++i) {
279 ostrm.PutHex32(hash_indexes[i]);
282 // Now we need to write out all of the hash values
283 for (i = 0; i < header.hashes_count; ++i) {
284 ostrm.PutHex32(hash_values[i]);
287 // Now we need to write out all of the hash data offsets,
288 // there is an offset for each hash in the hashes array
289 // that was written out above
290 for (i = 0; i < header.hashes_count; ++i) {
291 ostrm.PutHex32(hash_offsets[i]);
294 // Now we write the data for each hash and verify we got the offset
296 for (i = 0; i < header.bucket_count; ++i) {
297 HashToHashData &bucket_entry = hash_buckets[i];
299 typename HashToHashData::const_iterator pos, end = bucket_entry.end();
300 for (pos = bucket_entry.begin(); pos != end; ++pos) {
301 if (!bucket_entry.empty()) {
302 WriteHashData(pos->second);
309 typedef std::vector<Entry> collection;
310 collection m_entries;
313 // A class for reading and using a saved hash table from a block of data
315 template <typename __KeyType, class __HeaderType, class __HashData>
318 typedef __HeaderType HeaderType;
319 typedef __KeyType KeyType;
320 typedef __HashData HashData;
323 eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was
324 // filled in successfully
326 1u, // Bucket hash data collision, but key didn't match
327 eResultEndOfHashData = 2u, // The chain of items for this hash data in
328 // this bucket is terminated, search no more
329 eResultError = 3u // Error parsing the hash data, abort
337 MemoryTable(lldb_private::DataExtractor &data)
338 : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr),
339 m_hash_offsets(nullptr) {
340 lldb::offset_t offset = m_header.Read(data, 0);
341 if (offset != LLDB_INVALID_OFFSET && IsValid()) {
342 m_hash_indexes = (const uint32_t *)data.GetData(
343 &offset, m_header.bucket_count * sizeof(uint32_t));
344 m_hash_values = (const uint32_t *)data.GetData(
345 &offset, m_header.hashes_count * sizeof(uint32_t));
346 m_hash_offsets = (const uint32_t *)data.GetData(
347 &offset, m_header.hashes_count * sizeof(uint32_t));
351 virtual ~MemoryTable() = default;
353 bool IsValid() const {
354 return m_header.version == 1 &&
355 m_header.hash_function == eHashFunctionDJB &&
356 m_header.bucket_count > 0 && m_header.hashes_count > 0;
359 uint32_t GetHashIndex(uint32_t bucket_idx) const {
360 if (m_hash_indexes && bucket_idx < m_header.bucket_count)
361 return m_hash_indexes[bucket_idx];
365 uint32_t GetHashValue(uint32_t hash_idx) const {
366 if (m_hash_values && hash_idx < m_header.hashes_count)
367 return m_hash_values[hash_idx];
371 uint32_t GetHashDataOffset(uint32_t hash_idx) const {
372 if (m_hash_offsets && hash_idx < m_header.hashes_count)
373 return m_hash_offsets[hash_idx];
377 bool Find(const char *name, Pair &pair) const {
378 if (!name || !name[0])
382 const uint32_t bucket_count = m_header.bucket_count;
383 const uint32_t hash_count = m_header.hashes_count;
384 const uint32_t hash_value =
385 MappedHash::HashString(m_header.hash_function, name);
386 const uint32_t bucket_idx = hash_value % bucket_count;
387 uint32_t hash_idx = GetHashIndex(bucket_idx);
388 if (hash_idx < hash_count) {
389 for (; hash_idx < hash_count; ++hash_idx) {
390 const uint32_t curr_hash_value = GetHashValue(hash_idx);
391 if (curr_hash_value == hash_value) {
392 lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx);
393 while (hash_data_offset != UINT32_MAX) {
394 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
396 GetHashDataForName(name, &hash_data_offset, pair);
397 // Check the result of getting our hash data
398 switch (hash_result) {
399 case eResultKeyMatch:
402 case eResultKeyMismatch:
403 if (prev_hash_data_offset == hash_data_offset)
407 case eResultEndOfHashData:
408 // The last HashData for this key has been reached, stop
412 // Error parsing the hash data, abort
417 if ((curr_hash_value % bucket_count) != bucket_idx)
425 // This method must be implemented in any subclasses.
426 // The KeyType is user specified and must somehow result in a string
427 // value. For example, the KeyType might be a string offset in a string
428 // table and subclasses can store their string table as a member of the
429 // subclass and return a valie "const char *" given a "key". The value
430 // could also be a C string pointer, in which case just returning "key"
432 virtual const char *GetStringForKeyType(KeyType key) const = 0;
434 virtual bool ReadHashData(uint32_t hash_data_offset,
435 HashData &hash_data) const = 0;
437 // This method must be implemented in any subclasses and it must try to
438 // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
439 // parameter. This offset should be updated as bytes are consumed and
440 // a value "Result" enum should be returned. If the "name" matches the
441 // full name for the "pair.key" (which must be filled in by this call),
442 // then the HashData in the pair ("pair.value") should be extracted and
443 // filled in and "eResultKeyMatch" should be returned. If "name" doesn't
444 // match this string for the key, then "eResultKeyMismatch" should be
445 // returned and all data for the current HashData must be consumed or
446 // skipped and the "hash_data_offset_ptr" offset needs to be updated to
447 // point to the next HashData. If the end of the HashData objects for
448 // a given hash value have been reached, then "eResultEndOfHashData"
449 // should be returned. If anything else goes wrong during parsing,
450 // return "eResultError" and the corresponding "Find()" function will
451 // be canceled and return false.
452 virtual Result GetHashDataForName(const char *name,
453 lldb::offset_t *hash_data_offset_ptr,
454 Pair &pair) const = 0;
456 const HeaderType &GetHeader() { return m_header; }
459 std::function<bool(const HashData &hash_data)> const &callback) const {
460 const size_t num_hash_offsets = m_header.hashes_count;
461 for (size_t i = 0; i < num_hash_offsets; ++i) {
462 uint32_t hash_data_offset = GetHashDataOffset(i);
463 if (hash_data_offset != UINT32_MAX) {
465 if (ReadHashData(hash_data_offset, hash_data)) {
466 // If the callback returns false, then we are done and should stop
467 if (callback(hash_data) == false)
475 // Implementation agnostic information
477 const uint32_t *m_hash_indexes;
478 const uint32_t *m_hash_values;
479 const uint32_t *m_hash_offsets;
483 #endif // liblldb_MappedHash_h_