1 //===-- MappedHash.h --------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef liblldb_MappedHash_h_
10 #define liblldb_MappedHash_h_
20 #include "lldb/Utility/DataExtractor.h"
21 #include "lldb/Utility/Stream.h"
22 #include "llvm/Support/DJB.h"
26 enum HashFunctionType {
27 eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used
28 // by the ELF GNU_HASH sections
31 static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) {
32 switch (hash_function) {
33 case MappedHash::eHashFunctionDJB:
34 return llvm::djbHash(s);
39 llvm_unreachable("Invalid hash function index");
42 static const uint32_t HASH_MAGIC = 0x48415348u;
43 static const uint32_t HASH_CIGAM = 0x48534148u;
45 template <typename T> struct Header {
49 magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
50 uint16_t version; // Version number
51 uint16_t hash_function; // The hash function enumeration that was used
52 uint32_t bucket_count; // The number of buckets in this hash table
53 uint32_t hashes_count; // The total number of unique hash values and hash
54 // data offsets in this table
55 uint32_t header_data_len; // The size in bytes of the "header_data" template
57 HeaderData header_data; //
60 : magic(HASH_MAGIC), version(1), hash_function(eHashFunctionDJB),
61 bucket_count(0), hashes_count(0), header_data_len(sizeof(T)),
64 virtual ~Header() = default;
66 size_t GetByteSize() const {
67 return sizeof(magic) + sizeof(version) + sizeof(hash_function) +
68 sizeof(bucket_count) + sizeof(hashes_count) +
69 sizeof(header_data_len) + header_data_len;
72 virtual size_t GetByteSize(const HeaderData &header_data) = 0;
74 void SetHeaderDataByteSize(uint32_t header_data_byte_size) {
75 header_data_len = header_data_byte_size;
78 void Dump(lldb_private::Stream &s) {
79 s.Printf("header.magic = 0x%8.8x\n", magic);
80 s.Printf("header.version = 0x%4.4x\n", version);
81 s.Printf("header.hash_function = 0x%4.4x\n", hash_function);
82 s.Printf("header.bucket_count = 0x%8.8x %u\n", bucket_count,
84 s.Printf("header.hashes_count = 0x%8.8x %u\n", hashes_count,
86 s.Printf("header.header_data_len = 0x%8.8x %u\n", header_data_len,
90 virtual lldb::offset_t Read(lldb_private::DataExtractor &data,
91 lldb::offset_t offset) {
92 if (data.ValidOffsetForDataOfSize(
93 offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) +
94 sizeof(bucket_count) + sizeof(hashes_count) +
95 sizeof(header_data_len))) {
96 magic = data.GetU32(&offset);
97 if (magic != HASH_MAGIC) {
98 if (magic == HASH_CIGAM) {
99 switch (data.GetByteOrder()) {
100 case lldb::eByteOrderBig:
101 data.SetByteOrder(lldb::eByteOrderLittle);
103 case lldb::eByteOrderLittle:
104 data.SetByteOrder(lldb::eByteOrderBig);
107 return LLDB_INVALID_OFFSET;
110 // Magic bytes didn't match
112 return LLDB_INVALID_OFFSET;
116 version = data.GetU16(&offset);
118 // Unsupported version
119 return LLDB_INVALID_OFFSET;
121 hash_function = data.GetU16(&offset);
122 if (hash_function == 4)
123 hash_function = 0; // Deal with pre-release version of this table...
124 bucket_count = data.GetU32(&offset);
125 hashes_count = data.GetU32(&offset);
126 header_data_len = data.GetU32(&offset);
129 return LLDB_INVALID_OFFSET;
132 // // Returns a buffer that contains a serialized version of this
134 // // that must be freed with free().
139 // A class for reading and using a saved hash table from a block of data
141 template <typename __KeyType, class __HeaderType, class __HashData>
144 typedef __HeaderType HeaderType;
145 typedef __KeyType KeyType;
146 typedef __HashData HashData;
149 eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was
150 // filled in successfully
152 1u, // Bucket hash data collision, but key didn't match
153 eResultEndOfHashData = 2u, // The chain of items for this hash data in
154 // this bucket is terminated, search no more
155 eResultError = 3u // Status parsing the hash data, abort
163 MemoryTable(lldb_private::DataExtractor &data)
164 : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr),
165 m_hash_offsets(nullptr) {
166 lldb::offset_t offset = m_header.Read(data, 0);
167 if (offset != LLDB_INVALID_OFFSET && IsValid()) {
168 m_hash_indexes = (const uint32_t *)data.GetData(
169 &offset, m_header.bucket_count * sizeof(uint32_t));
170 m_hash_values = (const uint32_t *)data.GetData(
171 &offset, m_header.hashes_count * sizeof(uint32_t));
172 m_hash_offsets = (const uint32_t *)data.GetData(
173 &offset, m_header.hashes_count * sizeof(uint32_t));
177 virtual ~MemoryTable() = default;
179 bool IsValid() const {
180 return m_header.version == 1 &&
181 m_header.hash_function == eHashFunctionDJB &&
182 m_header.bucket_count > 0;
185 uint32_t GetHashIndex(uint32_t bucket_idx) const {
186 uint32_t result = UINT32_MAX;
187 if (m_hash_indexes && bucket_idx < m_header.bucket_count)
188 memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t));
192 uint32_t GetHashValue(uint32_t hash_idx) const {
193 uint32_t result = UINT32_MAX;
194 if (m_hash_values && hash_idx < m_header.hashes_count)
195 memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t));
199 uint32_t GetHashDataOffset(uint32_t hash_idx) const {
200 uint32_t result = UINT32_MAX;
201 if (m_hash_offsets && hash_idx < m_header.hashes_count)
202 memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t));
206 bool Find(llvm::StringRef name, Pair &pair) const {
211 const uint32_t bucket_count = m_header.bucket_count;
212 const uint32_t hash_count = m_header.hashes_count;
213 const uint32_t hash_value =
214 MappedHash::HashString(m_header.hash_function, name);
215 const uint32_t bucket_idx = hash_value % bucket_count;
216 uint32_t hash_idx = GetHashIndex(bucket_idx);
217 if (hash_idx < hash_count) {
218 for (; hash_idx < hash_count; ++hash_idx) {
219 const uint32_t curr_hash_value = GetHashValue(hash_idx);
220 if (curr_hash_value == hash_value) {
221 lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx);
222 while (hash_data_offset != UINT32_MAX) {
223 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
225 GetHashDataForName(name, &hash_data_offset, pair);
226 // Check the result of getting our hash data
227 switch (hash_result) {
228 case eResultKeyMatch:
231 case eResultKeyMismatch:
232 if (prev_hash_data_offset == hash_data_offset)
236 case eResultEndOfHashData:
237 // The last HashData for this key has been reached, stop
241 // Status parsing the hash data, abort
246 if ((curr_hash_value % bucket_count) != bucket_idx)
254 // This method must be implemented in any subclasses. The KeyType is user
255 // specified and must somehow result in a string value. For example, the
256 // KeyType might be a string offset in a string table and subclasses can
257 // store their string table as a member of the subclass and return a valie
258 // "const char *" given a "key". The value could also be a C string
259 // pointer, in which case just returning "key" will suffice.
260 virtual const char *GetStringForKeyType(KeyType key) const = 0;
262 virtual bool ReadHashData(uint32_t hash_data_offset,
263 HashData &hash_data) const = 0;
265 // This method must be implemented in any subclasses and it must try to
266 // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
267 // parameter. This offset should be updated as bytes are consumed and a
268 // value "Result" enum should be returned. If the "name" matches the full
269 // name for the "pair.key" (which must be filled in by this call), then the
270 // HashData in the pair ("pair.value") should be extracted and filled in
271 // and "eResultKeyMatch" should be returned. If "name" doesn't match this
272 // string for the key, then "eResultKeyMismatch" should be returned and all
273 // data for the current HashData must be consumed or skipped and the
274 // "hash_data_offset_ptr" offset needs to be updated to point to the next
275 // HashData. If the end of the HashData objects for a given hash value have
276 // been reached, then "eResultEndOfHashData" should be returned. If
277 // anything else goes wrong during parsing, return "eResultError" and the
278 // corresponding "Find()" function will be canceled and return false.
279 virtual Result GetHashDataForName(llvm::StringRef name,
280 lldb::offset_t *hash_data_offset_ptr,
281 Pair &pair) const = 0;
283 const HeaderType &GetHeader() { return m_header; }
286 std::function<bool(const HashData &hash_data)> const &callback) const {
287 const size_t num_hash_offsets = m_header.hashes_count;
288 for (size_t i = 0; i < num_hash_offsets; ++i) {
289 uint32_t hash_data_offset = GetHashDataOffset(i);
290 if (hash_data_offset != UINT32_MAX) {
292 if (ReadHashData(hash_data_offset, hash_data)) {
293 // If the callback returns false, then we are done and should stop
294 if (callback(hash_data) == false)
302 // Implementation agnostic information
304 const uint32_t *m_hash_indexes;
305 const uint32_t *m_hash_values;
306 const uint32_t *m_hash_offsets;
310 #endif // liblldb_MappedHash_h_