1 //===-- HashedNameToDIE.h ---------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #ifndef SymbolFileDWARF_HashedNameToDIE_h_
11 #define SymbolFileDWARF_HashedNameToDIE_h_
15 #include "DWARFDefines.h"
16 #include "DWARFFormValue.h"
18 #include "lldb/lldb-defines.h"
19 #include "lldb/Core/dwarf.h"
20 #include "lldb/Core/RegularExpression.h"
21 #include "lldb/Core/MappedHash.h"
24 class SymbolFileDWARF;
25 class DWARFCompileUnit;
26 class DWARFDebugInfoEntry;
28 struct DWARFMappedHash
32 dw_offset_t offset; // The DIE offset
34 uint32_t type_flags; // Any flags for this DIEInfo
35 uint32_t qualified_name_hash; // A 32 bit hash of the fully qualified name
38 offset (DW_INVALID_OFFSET),
41 qualified_name_hash (0)
45 DIEInfo (dw_offset_t o, dw_tag_t t, uint32_t f, uint32_t h) :
49 qualified_name_hash (h)
56 offset = DW_INVALID_OFFSET;
59 qualified_name_hash = 0;
63 typedef std::vector<DIEInfo> DIEInfoArray;
64 typedef std::vector<uint32_t> DIEArray;
67 ExtractDIEArray (const DIEInfoArray &die_info_array,
68 DIEArray &die_offsets)
70 const size_t count = die_info_array.size();
71 for (size_t i=0; i<count; ++i)
73 die_offsets.push_back (die_info_array[i].offset);
78 ExtractDIEArray (const DIEInfoArray &die_info_array,
80 DIEArray &die_offsets)
84 ExtractDIEArray (die_info_array, die_offsets);
88 const size_t count = die_info_array.size();
89 for (size_t i=0; i<count; ++i)
91 const dw_tag_t die_tag = die_info_array[i].tag;
92 bool tag_matches = die_tag == 0 || tag == die_tag;
95 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
96 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
99 die_offsets.push_back (die_info_array[i].offset);
105 ExtractDIEArray (const DIEInfoArray &die_info_array,
107 const uint32_t qualified_name_hash,
108 DIEArray &die_offsets)
112 ExtractDIEArray (die_info_array, die_offsets);
116 const size_t count = die_info_array.size();
117 for (size_t i=0; i<count; ++i)
119 if (qualified_name_hash != die_info_array[i].qualified_name_hash)
121 const dw_tag_t die_tag = die_info_array[i].tag;
122 bool tag_matches = die_tag == 0 || tag == die_tag;
125 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
126 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
129 die_offsets.push_back (die_info_array[i].offset);
137 eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding
138 eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that contains the item in question
139 eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
140 eAtomTypeNameFlags = 4u, // Flags from enum NameFlags
141 eAtomTypeTypeFlags = 5u, // Flags from enum TypeFlags,
142 eAtomTypeQualNameHash = 6u // A 32 bit hash of the full qualified name (since all hash entries are basename only)
143 // For example a type like "std::vector<int>::iterator" would have a name of "iterator"
144 // and a 32 bit hash for "std::vector<int>::iterator" to allow us to not have to pull
145 // in debug info for a type when we know the fully qualified name.
148 // Bit definitions for the eAtomTypeTypeFlags flags
151 // Always set for C++, only set for ObjC if this is the
152 // @implementation for class
153 eTypeFlagClassIsImplementation = ( 1u << 1 )
158 ExtractClassOrStructDIEArray (const DIEInfoArray &die_info_array,
159 bool return_implementation_only_if_available,
160 DIEArray &die_offsets)
162 const size_t count = die_info_array.size();
163 for (size_t i=0; i<count; ++i)
165 const dw_tag_t die_tag = die_info_array[i].tag;
166 if (die_tag == 0 || die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
168 if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation)
170 if (return_implementation_only_if_available)
172 // We found the one true definiton for this class, so
175 die_offsets.push_back (die_info_array[i].offset);
180 // Put the one true definition as the first entry so it
182 die_offsets.insert (die_offsets.begin(), die_info_array[i].offset);
187 die_offsets.push_back (die_info_array[i].offset);
194 ExtractTypesFromDIEArray (const DIEInfoArray &die_info_array,
195 uint32_t type_flag_mask,
196 uint32_t type_flag_value,
197 DIEArray &die_offsets)
199 const size_t count = die_info_array.size();
200 for (size_t i=0; i<count; ++i)
202 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value)
203 die_offsets.push_back (die_info_array[i].offset);
212 Atom (uint16_t t = eAtomTypeNULL, dw_form_t f = 0) :
219 typedef std::vector<Atom> AtomArray;
222 GetTypeFlags (SymbolFileDWARF *dwarf2Data,
223 const DWARFCompileUnit* cu,
224 const DWARFDebugInfoEntry* die);
228 GetAtomTypeName (uint16_t atom)
232 case eAtomTypeNULL: return "NULL";
233 case eAtomTypeDIEOffset: return "die-offset";
234 case eAtomTypeCUOffset: return "cu-offset";
235 case eAtomTypeTag: return "die-tag";
236 case eAtomTypeNameFlags: return "name-flags";
237 case eAtomTypeTypeFlags: return "type-flags";
238 case eAtomTypeQualNameHash: return "qualified-name-hash";
244 // DIE offset base so die offsets in hash_data can be CU relative
245 dw_offset_t die_base_offset;
248 size_t min_hash_data_byte_size;
249 bool hash_data_has_fixed_byte_size;
251 Prologue (dw_offset_t _die_base_offset = 0) :
252 die_base_offset (_die_base_offset),
255 min_hash_data_byte_size(0),
256 hash_data_has_fixed_byte_size(true)
258 // Define an array of DIE offsets by first defining an array,
259 // and then define the atom type for the array, in this case
260 // we have an array of DIE offsets
261 AppendAtom (eAtomTypeDIEOffset, DW_FORM_data4);
271 hash_data_has_fixed_byte_size = true;
272 min_hash_data_byte_size = 0;
278 ContainsAtom (AtomType atom_type) const
280 return (atom_mask & (1u << atom_type)) != 0;
291 AppendAtom (AtomType type, dw_form_t form)
293 atoms.push_back (Atom(type, form));
294 atom_mask |= 1u << type;
297 case DW_FORM_indirect:
298 case DW_FORM_exprloc:
299 case DW_FORM_flag_present:
300 case DW_FORM_ref_sig8:
301 assert (!"Unhandled atom form");
309 case DW_FORM_ref_udata:
310 hash_data_has_fixed_byte_size = false;
311 // Fall through to the cases below...
315 case DW_FORM_sec_offset:
316 min_hash_data_byte_size += 1;
320 hash_data_has_fixed_byte_size = false;
321 // Fall through to the cases below...
324 min_hash_data_byte_size += 2;
328 hash_data_has_fixed_byte_size = false;
329 // Fall through to the cases below...
333 case DW_FORM_ref_addr:
335 min_hash_data_byte_size += 4;
340 min_hash_data_byte_size += 8;
347 // Dump (std::ostream* ostrm_ptr);
350 Read (const lldb_private::DataExtractor &data,
351 lldb::offset_t offset)
355 die_base_offset = data.GetU32 (&offset);
357 const uint32_t atom_count = data.GetU32 (&offset);
358 if (atom_count == 0x00060003u)
360 // Old format, deal with contents of old pre-release format
361 while (data.GetU32(&offset))
364 // Hardcode to the only known value for now.
365 AppendAtom (eAtomTypeDIEOffset, DW_FORM_data4);
369 for (uint32_t i=0; i<atom_count; ++i)
371 AtomType type = (AtomType)data.GetU16 (&offset);
372 dw_form_t form = (dw_form_t)data.GetU16 (&offset);
373 AppendAtom (type, form);
380 // Write (BinaryStreamBuf &s);
385 // Add an extra count to the atoms size for the zero termination Atom that gets
387 return sizeof(die_base_offset) + sizeof(uint32_t) + atoms.size() * sizeof(Atom);
391 GetMinumumHashDataByteSize () const
393 return min_hash_data_byte_size;
397 HashDataHasFixedByteSize() const
399 return hash_data_has_fixed_byte_size;
403 struct Header : public MappedHash::Header<Prologue>
405 Header (dw_offset_t _die_base_offset = 0)
415 GetByteSize (const HeaderData &header_data)
417 return header_data.GetByteSize();
421 // Dump (std::ostream* ostrm_ptr);
423 virtual lldb::offset_t
424 Read (lldb_private::DataExtractor &data, lldb::offset_t offset)
426 offset = MappedHash::Header<Prologue>::Read (data, offset);
427 if (offset != UINT32_MAX)
429 offset = header_data.Read (data, offset);
435 Read (const lldb_private::DWARFDataExtractor &data,
436 lldb::offset_t *offset_ptr,
437 DIEInfo &hash_data) const
439 const size_t num_atoms = header_data.atoms.size();
443 for (size_t i=0; i<num_atoms; ++i)
445 DWARFFormValue form_value (header_data.atoms[i].form);
447 if (!form_value.ExtractValue(data, offset_ptr, NULL))
450 switch (header_data.atoms[i].type)
452 case eAtomTypeDIEOffset: // DIE offset, check form for encoding
453 hash_data.offset = (dw_offset_t)form_value.Reference (header_data.die_base_offset);
456 case eAtomTypeTag: // DW_TAG value for the DIE
457 hash_data.tag = (dw_tag_t)form_value.Unsigned ();
459 case eAtomTypeTypeFlags: // Flags from enum TypeFlags
460 hash_data.type_flags = (uint32_t)form_value.Unsigned ();
463 case eAtomTypeQualNameHash: // Flags from enum TypeFlags
464 hash_data.qualified_name_hash = form_value.Unsigned ();
468 // We can always skip atomes we don't know about
476 Dump (lldb_private::Stream& strm, const DIEInfo &hash_data) const
478 const size_t num_atoms = header_data.atoms.size();
479 for (size_t i=0; i<num_atoms; ++i)
482 strm.PutCString (", ");
484 DWARFFormValue form_value (header_data.atoms[i].form);
485 switch (header_data.atoms[i].type)
487 case eAtomTypeDIEOffset: // DIE offset, check form for encoding
488 strm.Printf ("{0x%8.8x}", hash_data.offset);
491 case eAtomTypeTag: // DW_TAG value for the DIE
493 const char *tag_cstr = lldb_private::DW_TAG_value_to_name (hash_data.tag);
495 strm.PutCString (tag_cstr);
497 strm.Printf ("DW_TAG_(0x%4.4x)", hash_data.tag);
501 case eAtomTypeTypeFlags: // Flags from enum TypeFlags
502 strm.Printf ("0x%2.2x", hash_data.type_flags);
503 if (hash_data.type_flags)
505 strm.PutCString (" (");
506 if (hash_data.type_flags & eTypeFlagClassIsImplementation)
507 strm.PutCString (" implementation");
508 strm.PutCString (" )");
512 case eAtomTypeQualNameHash: // Flags from enum TypeFlags
513 strm.Printf ("0x%8.8x", hash_data.qualified_name_hash);
517 strm.Printf ("AtomType(0x%x)", header_data.atoms[i].type);
530 // AppendNames (DWARFDebugPubnamesSet &pubnames_set,
531 // StringTable &string_table);
534 // AppendNamesEntry (SymbolFileDWARF *dwarf2Data,
535 // const DWARFCompileUnit* cu,
536 // const DWARFDebugInfoEntry* die,
537 // StringTable &string_table);
540 // AppendTypesEntry (DWARFData *dwarf2Data,
541 // const DWARFCompileUnit* cu,
542 // const DWARFDebugInfoEntry* die,
543 // StringTable &string_table);
546 // Save (BinaryStreamBuf &names_data, const StringTable &string_table);
549 // AppendName (const char *name,
550 // uint32_t die_offset,
551 // StringTable &string_table,
552 // dw_offset_t name_debug_str_offset = DW_INVALID_OFFSET); // If "name" has already been looked up, then it can be supplied
554 // AppendType (const char *name,
555 // uint32_t die_offset,
556 // StringTable &string_table);
563 // uint32_t str_offset;
564 // uint32_t die_offset;
567 // // Map uniqued .debug_str offset to the corresponding DIE offsets
568 // typedef std::map<uint32_t, DIEInfoArray> NameInfo;
569 // // Map a name hash to one or more name infos
570 // typedef std::map<uint32_t, NameInfo> BucketEntry;
573 // GetByteSize (const NameInfo &name_info);
575 // typedef std::vector<BucketEntry> BucketEntryColl;
576 // typedef std::vector<Entry> EntryColl;
577 // EntryColl m_entries;
582 // A class for reading and using a saved hash table from a block of data
584 class MemoryTable : public MappedHash::MemoryTable<uint32_t, DWARFMappedHash::Header, DIEInfoArray>
588 MemoryTable (lldb_private::DWARFDataExtractor &table_data,
589 const lldb_private::DWARFDataExtractor &string_table,
591 MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray> (table_data),
593 m_string_table (string_table),
604 GetStringForKeyType (KeyType key) const
606 // The key in the DWARF table is the .debug_str offset for the string
607 return m_string_table.PeekCStr (key);
611 ReadHashData (uint32_t hash_data_offset,
612 HashData &hash_data) const
614 lldb::offset_t offset = hash_data_offset;
615 offset += 4; // Skip string table offset that contains offset of hash name in .debug_str
616 const uint32_t count = m_data.GetU32 (&offset);
619 hash_data.resize(count);
620 for (uint32_t i=0; i<count; ++i)
622 if (!m_header.Read(m_data, &offset, hash_data[i]))
632 GetHashDataForName (const char *name,
633 lldb::offset_t* hash_data_offset_ptr,
636 pair.key = m_data.GetU32 (hash_data_offset_ptr);
639 // If the key is zero, this terminates our chain of HashData objects
640 // for this hash value.
642 return eResultEndOfHashData;
644 // There definitely should be a string for this string offset, if
645 // there isn't, there is something wrong, return and error
646 const char *strp_cstr = m_string_table.PeekCStr (pair.key);
647 if (strp_cstr == NULL)
649 *hash_data_offset_ptr = UINT32_MAX;
653 const uint32_t count = m_data.GetU32 (hash_data_offset_ptr);
654 const size_t min_total_hash_data_size = count * m_header.header_data.GetMinumumHashDataByteSize();
655 if (count > 0 && m_data.ValidOffsetForDataOfSize (*hash_data_offset_ptr, min_total_hash_data_size))
657 // We have at least one HashData entry, and we have enough
658 // data to parse at leats "count" HashData enties.
660 // First make sure the entire C string matches...
661 const bool match = strcmp (name, strp_cstr) == 0;
663 if (!match && m_header.header_data.HashDataHasFixedByteSize())
665 // If the string doesn't match and we have fixed size data,
666 // we can just add the total byte size of all HashData objects
667 // to the hash data offset and be done...
668 *hash_data_offset_ptr += min_total_hash_data_size;
672 // If the string does match, or we don't have fixed size data
673 // then we need to read the hash data as a stream. If the
674 // string matches we also append all HashData objects to the
676 for (uint32_t i=0; i<count; ++i)
679 if (m_header.Read(m_data, hash_data_offset_ptr, die_info))
681 // Only happend the HashData if the string matched...
683 pair.value.push_back (die_info);
687 // Something went wrong while reading the data
688 *hash_data_offset_ptr = UINT32_MAX;
693 // Return the correct response depending on if the string matched
696 return eResultKeyMatch; // The key (cstring) matches and we have lookup results!
698 return eResultKeyMismatch; // The key doesn't match, this function will get called
699 // again for the next key/value or the key terminator
700 // which in our case is a zero .debug_str offset.
704 *hash_data_offset_ptr = UINT32_MAX;
710 AppendHashDataForRegularExpression (const lldb_private::RegularExpression& regex,
711 lldb::offset_t* hash_data_offset_ptr,
714 pair.key = m_data.GetU32 (hash_data_offset_ptr);
715 // If the key is zero, this terminates our chain of HashData objects
716 // for this hash value.
718 return eResultEndOfHashData;
720 // There definitely should be a string for this string offset, if
721 // there isn't, there is something wrong, return and error
722 const char *strp_cstr = m_string_table.PeekCStr (pair.key);
723 if (strp_cstr == NULL)
726 const uint32_t count = m_data.GetU32 (hash_data_offset_ptr);
727 const size_t min_total_hash_data_size = count * m_header.header_data.GetMinumumHashDataByteSize();
728 if (count > 0 && m_data.ValidOffsetForDataOfSize (*hash_data_offset_ptr, min_total_hash_data_size))
730 const bool match = regex.Execute(strp_cstr);
732 if (!match && m_header.header_data.HashDataHasFixedByteSize())
734 // If the regex doesn't match and we have fixed size data,
735 // we can just add the total byte size of all HashData objects
736 // to the hash data offset and be done...
737 *hash_data_offset_ptr += min_total_hash_data_size;
741 // If the string does match, or we don't have fixed size data
742 // then we need to read the hash data as a stream. If the
743 // string matches we also append all HashData objects to the
745 for (uint32_t i=0; i<count; ++i)
748 if (m_header.Read(m_data, hash_data_offset_ptr, die_info))
750 // Only happend the HashData if the string matched...
752 pair.value.push_back (die_info);
756 // Something went wrong while reading the data
757 *hash_data_offset_ptr = UINT32_MAX;
762 // Return the correct response depending on if the string matched
765 return eResultKeyMatch; // The key (cstring) matches and we have lookup results!
767 return eResultKeyMismatch; // The key doesn't match, this function will get called
768 // again for the next key/value or the key terminator
769 // which in our case is a zero .debug_str offset.
773 *hash_data_offset_ptr = UINT32_MAX;
779 AppendAllDIEsThatMatchingRegex (const lldb_private::RegularExpression& regex,
780 DIEInfoArray &die_info_array) const
782 const uint32_t hash_count = m_header.hashes_count;
784 for (uint32_t offset_idx=0; offset_idx<hash_count; ++offset_idx)
786 lldb::offset_t hash_data_offset = GetHashDataOffset (offset_idx);
787 while (hash_data_offset != UINT32_MAX)
789 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
790 Result hash_result = AppendHashDataForRegularExpression (regex, &hash_data_offset, pair);
791 if (prev_hash_data_offset == hash_data_offset)
794 // Check the result of getting our hash data
797 case eResultKeyMatch:
798 case eResultKeyMismatch:
799 // Whether we matches or not, it doesn't matter, we
803 case eResultEndOfHashData:
805 hash_data_offset = UINT32_MAX;
810 die_info_array.swap (pair.value);
811 return die_info_array.size();
815 AppendAllDIEsInRange (const uint32_t die_offset_start,
816 const uint32_t die_offset_end,
817 DIEInfoArray &die_info_array) const
819 const uint32_t hash_count = m_header.hashes_count;
820 for (uint32_t offset_idx=0; offset_idx<hash_count; ++offset_idx)
823 lldb::offset_t hash_data_offset = GetHashDataOffset (offset_idx);
824 while (!done && hash_data_offset != UINT32_MAX)
826 KeyType key = m_data.GetU32 (&hash_data_offset);
827 // If the key is zero, this terminates our chain of HashData objects
828 // for this hash value.
832 const uint32_t count = m_data.GetU32 (&hash_data_offset);
833 for (uint32_t i=0; i<count; ++i)
836 if (m_header.Read(m_data, &hash_data_offset, die_info))
838 if (die_info.offset == 0)
840 if (die_offset_start <= die_info.offset && die_info.offset < die_offset_end)
841 die_info_array.push_back(die_info);
846 return die_info_array.size();
850 FindByName (const char *name, DIEArray &die_offsets)
852 DIEInfoArray die_info_array;
853 if (FindByName(name, die_info_array))
854 DWARFMappedHash::ExtractDIEArray (die_info_array, die_offsets);
855 return die_info_array.size();
859 FindByNameAndTag (const char *name,
861 DIEArray &die_offsets)
863 DIEInfoArray die_info_array;
864 if (FindByName(name, die_info_array))
865 DWARFMappedHash::ExtractDIEArray (die_info_array, tag, die_offsets);
866 return die_info_array.size();
870 FindByNameAndTagAndQualifiedNameHash (const char *name,
872 const uint32_t qualified_name_hash,
873 DIEArray &die_offsets)
875 DIEInfoArray die_info_array;
876 if (FindByName(name, die_info_array))
877 DWARFMappedHash::ExtractDIEArray (die_info_array, tag, qualified_name_hash, die_offsets);
878 return die_info_array.size();
882 FindCompleteObjCClassByName (const char *name, DIEArray &die_offsets, bool must_be_implementation)
884 DIEInfoArray die_info_array;
885 if (FindByName(name, die_info_array))
887 if (must_be_implementation && GetHeader().header_data.ContainsAtom (eAtomTypeTypeFlags))
889 // If we have two atoms, then we have the DIE offset and
890 // the type flags so we can find the objective C class
892 DWARFMappedHash::ExtractTypesFromDIEArray (die_info_array,
894 eTypeFlagClassIsImplementation,
899 // We don't only want the one true definition, so try and see
900 // what we can find, and only return class or struct DIEs.
901 // If we do have the full implementation, then return it alone,
902 // else return all possible matches.
903 const bool return_implementation_only_if_available = true;
904 DWARFMappedHash::ExtractClassOrStructDIEArray (die_info_array,
905 return_implementation_only_if_available,
909 return die_offsets.size();
913 FindByName (const char *name, DIEInfoArray &die_info_array)
916 size_t old_size = die_info_array.size();
917 if (Find (name, kv_pair))
919 die_info_array.swap(kv_pair.value);
920 return die_info_array.size() - old_size;
926 const lldb_private::DWARFDataExtractor &m_data;
927 const lldb_private::DWARFDataExtractor &m_string_table;
933 #endif // SymbolFileDWARF_HashedNameToDIE_h_