contrib/llvm-project/lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp

   1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "HashedNameToDIE.h"
  10 #include "llvm/ADT/StringRef.h"
  11
  12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
  13                                       DIEArray &die_offsets) {
  14   const size_t count = die_info_array.size();
  15   for (size_t i = 0; i < count; ++i)
  16     die_offsets.emplace_back(die_info_array[i]);
  17 }
  18
  19 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
  20                                       const dw_tag_t tag,
  21                                       DIEArray &die_offsets) {
  22   if (tag == 0) {
  23     ExtractDIEArray(die_info_array, die_offsets);
  24   } else {
  25     const size_t count = die_info_array.size();
  26     for (size_t i = 0; i < count; ++i) {
  27       const dw_tag_t die_tag = die_info_array[i].tag;
  28       bool tag_matches = die_tag == 0 || tag == die_tag;
  29       if (!tag_matches) {
  30         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
  31           tag_matches =
  32               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
  33       }
  34       if (tag_matches)
  35         die_offsets.emplace_back(die_info_array[i]);
  36     }
  37   }
  38 }
  39
  40 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
  41                                       const dw_tag_t tag,
  42                                       const uint32_t qualified_name_hash,
  43                                       DIEArray &die_offsets) {
  44   if (tag == 0) {
  45     ExtractDIEArray(die_info_array, die_offsets);
  46   } else {
  47     const size_t count = die_info_array.size();
  48     for (size_t i = 0; i < count; ++i) {
  49       if (qualified_name_hash != die_info_array[i].qualified_name_hash)
  50         continue;
  51       const dw_tag_t die_tag = die_info_array[i].tag;
  52       bool tag_matches = die_tag == 0 || tag == die_tag;
  53       if (!tag_matches) {
  54         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
  55           tag_matches =
  56               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
  57       }
  58       if (tag_matches)
  59         die_offsets.emplace_back(die_info_array[i]);
  60     }
  61   }
  62 }
  63
  64 void DWARFMappedHash::ExtractClassOrStructDIEArray(
  65     const DIEInfoArray &die_info_array,
  66     bool return_implementation_only_if_available, DIEArray &die_offsets) {
  67   const size_t count = die_info_array.size();
  68   for (size_t i = 0; i < count; ++i) {
  69     const dw_tag_t die_tag = die_info_array[i].tag;
  70     if (die_tag == 0 || die_tag == DW_TAG_class_type ||
  71         die_tag == DW_TAG_structure_type) {
  72       if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) {
  73         if (return_implementation_only_if_available) {
  74           // We found the one true definition for this class, so only return
  75           // that
  76           die_offsets.clear();
  77           die_offsets.emplace_back(die_info_array[i]);
  78           return;
  79         } else {
  80           // Put the one true definition as the first entry so it matches first
  81           die_offsets.emplace(die_offsets.begin(), die_info_array[i]);
  82         }
  83       } else {
  84         die_offsets.emplace_back(die_info_array[i]);
  85       }
  86     }
  87   }
  88 }
  89
  90 void DWARFMappedHash::ExtractTypesFromDIEArray(
  91     const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
  92     uint32_t type_flag_value, DIEArray &die_offsets) {
  93   const size_t count = die_info_array.size();
  94   for (size_t i = 0; i < count; ++i) {
  95     if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value)
  96       die_offsets.emplace_back(die_info_array[i]);
  97   }
  98 }
  99
 100 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) {
 101   switch (atom) {
 102   case eAtomTypeNULL:
 103     return "NULL";
 104   case eAtomTypeDIEOffset:
 105     return "die-offset";
 106   case eAtomTypeCUOffset:
 107     return "cu-offset";
 108   case eAtomTypeTag:
 109     return "die-tag";
 110   case eAtomTypeNameFlags:
 111     return "name-flags";
 112   case eAtomTypeTypeFlags:
 113     return "type-flags";
 114   case eAtomTypeQualNameHash:
 115     return "qualified-name-hash";
 116   }
 117   return "<invalid>";
 118 }
 119
 120 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t o, dw_tag_t t, uint32_t f,
 121                                   uint32_t h)
 122     : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
 123
 124 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset)
 125     : die_base_offset(_die_base_offset), atoms(), atom_mask(0),
 126       min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) {
 127   // Define an array of DIE offsets by first defining an array, and then define
 128   // the atom type for the array, in this case we have an array of DIE offsets
 129   AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
 130 }
 131
 132 void DWARFMappedHash::Prologue::ClearAtoms() {
 133   hash_data_has_fixed_byte_size = true;
 134   min_hash_data_byte_size = 0;
 135   atom_mask = 0;
 136   atoms.clear();
 137 }
 138
 139 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const {
 140   return (atom_mask & (1u << atom_type)) != 0;
 141 }
 142
 143 void DWARFMappedHash::Prologue::Clear() {
 144   die_base_offset = 0;
 145   ClearAtoms();
 146 }
 147
 148 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) {
 149   atoms.push_back({type, form});
 150   atom_mask |= 1u << type;
 151   switch (form) {
 152   case DW_FORM_indirect:
 153   case DW_FORM_exprloc:
 154   case DW_FORM_flag_present:
 155   case DW_FORM_ref_sig8:
 156     llvm_unreachable("Unhandled atom form");
 157
 158   case DW_FORM_addrx:
 159   case DW_FORM_string:
 160   case DW_FORM_block:
 161   case DW_FORM_block1:
 162   case DW_FORM_sdata:
 163   case DW_FORM_udata:
 164   case DW_FORM_ref_udata:
 165   case DW_FORM_GNU_addr_index:
 166   case DW_FORM_GNU_str_index:
 167     hash_data_has_fixed_byte_size = false;
 168     LLVM_FALLTHROUGH;
 169   case DW_FORM_flag:
 170   case DW_FORM_data1:
 171   case DW_FORM_ref1:
 172   case DW_FORM_sec_offset:
 173     min_hash_data_byte_size += 1;
 174     break;
 175
 176   case DW_FORM_block2:
 177     hash_data_has_fixed_byte_size = false;
 178     LLVM_FALLTHROUGH;
 179   case DW_FORM_data2:
 180   case DW_FORM_ref2:
 181     min_hash_data_byte_size += 2;
 182     break;
 183
 184   case DW_FORM_block4:
 185     hash_data_has_fixed_byte_size = false;
 186     LLVM_FALLTHROUGH;
 187   case DW_FORM_data4:
 188   case DW_FORM_ref4:
 189   case DW_FORM_addr:
 190   case DW_FORM_ref_addr:
 191   case DW_FORM_strp:
 192     min_hash_data_byte_size += 4;
 193     break;
 194
 195   case DW_FORM_data8:
 196   case DW_FORM_ref8:
 197     min_hash_data_byte_size += 8;
 198     break;
 199   }
 200 }
 201
 202 lldb::offset_t
 203 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data,
 204                                 lldb::offset_t offset) {
 205   ClearAtoms();
 206
 207   die_base_offset = data.GetU32(&offset);
 208
 209   const uint32_t atom_count = data.GetU32(&offset);
 210   if (atom_count == 0x00060003u) {
 211     // Old format, deal with contents of old pre-release format
 212     while (data.GetU32(&offset))
 213       /* do nothing */;
 214
 215     // Hardcode to the only known value for now.
 216     AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
 217   } else {
 218     for (uint32_t i = 0; i < atom_count; ++i) {
 219       AtomType type = (AtomType)data.GetU16(&offset);
 220       dw_form_t form = (dw_form_t)data.GetU16(&offset);
 221       AppendAtom(type, form);
 222     }
 223   }
 224   return offset;
 225 }
 226
 227 size_t DWARFMappedHash::Prologue::GetByteSize() const {
 228   // Add an extra count to the atoms size for the zero termination Atom that
 229   // gets written to disk
 230   return sizeof(die_base_offset) + sizeof(uint32_t) +
 231          atoms.size() * sizeof(Atom);
 232 }
 233
 234 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const {
 235   return min_hash_data_byte_size;
 236 }
 237
 238 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const {
 239   return hash_data_has_fixed_byte_size;
 240 }
 241
 242 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
 243   return header_data.GetByteSize();
 244 }
 245
 246 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data,
 247                                              lldb::offset_t offset) {
 248   offset = MappedHash::Header<Prologue>::Read(data, offset);
 249   if (offset != UINT32_MAX) {
 250     offset = header_data.Read(data, offset);
 251   }
 252   return offset;
 253 }
 254
 255 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
 256                                    lldb::offset_t *offset_ptr,
 257                                    DIEInfo &hash_data) const {
 258   const size_t num_atoms = header_data.atoms.size();
 259   if (num_atoms == 0)
 260     return false;
 261
 262   for (size_t i = 0; i < num_atoms; ++i) {
 263     DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
 264
 265     if (!form_value.ExtractValue(data, offset_ptr))
 266       return false;
 267
 268     switch (header_data.atoms[i].type) {
 269     case eAtomTypeDIEOffset: // DIE offset, check form for encoding
 270       hash_data.die_offset =
 271           DWARFFormValue::IsDataForm(form_value.Form())
 272               ? form_value.Unsigned()
 273               : form_value.Reference(header_data.die_base_offset);
 274       break;
 275
 276     case eAtomTypeTag: // DW_TAG value for the DIE
 277       hash_data.tag = (dw_tag_t)form_value.Unsigned();
 278       break;
 279
 280     case eAtomTypeTypeFlags: // Flags from enum TypeFlags
 281       hash_data.type_flags = (uint32_t)form_value.Unsigned();
 282       break;
 283
 284     case eAtomTypeQualNameHash: // Flags from enum TypeFlags
 285       hash_data.qualified_name_hash = form_value.Unsigned();
 286       break;
 287
 288     default:
 289       // We can always skip atoms we don't know about
 290       break;
 291     }
 292   }
 293   return hash_data.die_offset != DW_INVALID_OFFSET;
 294 }
 295
 296 DWARFMappedHash::MemoryTable::MemoryTable(
 297     lldb_private::DWARFDataExtractor &table_data,
 298     const lldb_private::DWARFDataExtractor &string_table, const char *name)
 299     : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data),
 300       m_data(table_data), m_string_table(string_table), m_name(name) {}
 301
 302 const char *
 303 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const {
 304   // The key in the DWARF table is the .debug_str offset for the string
 305   return m_string_table.PeekCStr(key);
 306 }
 307
 308 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
 309                                                 HashData &hash_data) const {
 310   lldb::offset_t offset = hash_data_offset;
 311   offset += 4; // Skip string table offset that contains offset of hash name in
 312                // .debug_str
 313   const uint32_t count = m_data.GetU32(&offset);
 314   if (count > 0) {
 315     hash_data.resize(count);
 316     for (uint32_t i = 0; i < count; ++i) {
 317       if (!m_header.Read(m_data, &offset, hash_data[i]))
 318         return false;
 319     }
 320   } else
 321     hash_data.clear();
 322   return true;
 323 }
 324
 325 DWARFMappedHash::MemoryTable::Result
 326 DWARFMappedHash::MemoryTable::GetHashDataForName(
 327     llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
 328     Pair &pair) const {
 329   pair.key = m_data.GetU32(hash_data_offset_ptr);
 330   pair.value.clear();
 331
 332   // If the key is zero, this terminates our chain of HashData objects for this
 333   // hash value.
 334   if (pair.key == 0)
 335     return eResultEndOfHashData;
 336
 337   // There definitely should be a string for this string offset, if there
 338   // isn't, there is something wrong, return and error
 339   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
 340   if (strp_cstr == nullptr) {
 341     *hash_data_offset_ptr = UINT32_MAX;
 342     return eResultError;
 343   }
 344
 345   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
 346   const size_t min_total_hash_data_size =
 347       count * m_header.header_data.GetMinimumHashDataByteSize();
 348   if (count > 0 &&
 349       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
 350                                       min_total_hash_data_size)) {
 351     // We have at least one HashData entry, and we have enough data to parse at
 352     // least "count" HashData entries.
 353
 354     // First make sure the entire C string matches...
 355     const bool match = name == strp_cstr;
 356
 357     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
 358       // If the string doesn't match and we have fixed size data, we can just
 359       // add the total byte size of all HashData objects to the hash data
 360       // offset and be done...
 361       *hash_data_offset_ptr += min_total_hash_data_size;
 362     } else {
 363       // If the string does match, or we don't have fixed size data then we
 364       // need to read the hash data as a stream. If the string matches we also
 365       // append all HashData objects to the value array.
 366       for (uint32_t i = 0; i < count; ++i) {
 367         DIEInfo die_info;
 368         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
 369           // Only happened if the HashData of the string matched...
 370           if (match)
 371             pair.value.push_back(die_info);
 372         } else {
 373           // Something went wrong while reading the data
 374           *hash_data_offset_ptr = UINT32_MAX;
 375           return eResultError;
 376         }
 377       }
 378     }
 379     // Return the correct response depending on if the string matched or not...
 380     if (match)
 381       return eResultKeyMatch; // The key (cstring) matches and we have lookup
 382                               // results!
 383     else
 384       return eResultKeyMismatch; // The key doesn't match, this function will
 385                                  // get called
 386     // again for the next key/value or the key terminator which in our case is
 387     // a zero .debug_str offset.
 388   } else {
 389     *hash_data_offset_ptr = UINT32_MAX;
 390     return eResultError;
 391   }
 392 }
 393
 394 DWARFMappedHash::MemoryTable::Result
 395 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
 396     const lldb_private::RegularExpression &regex,
 397     lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
 398   pair.key = m_data.GetU32(hash_data_offset_ptr);
 399   // If the key is zero, this terminates our chain of HashData objects for this
 400   // hash value.
 401   if (pair.key == 0)
 402     return eResultEndOfHashData;
 403
 404   // There definitely should be a string for this string offset, if there
 405   // isn't, there is something wrong, return and error
 406   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
 407   if (strp_cstr == nullptr)
 408     return eResultError;
 409
 410   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
 411   const size_t min_total_hash_data_size =
 412       count * m_header.header_data.GetMinimumHashDataByteSize();
 413   if (count > 0 &&
 414       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
 415                                       min_total_hash_data_size)) {
 416     const bool match = regex.Execute(llvm::StringRef(strp_cstr));
 417
 418     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
 419       // If the regex doesn't match and we have fixed size data, we can just
 420       // add the total byte size of all HashData objects to the hash data
 421       // offset and be done...
 422       *hash_data_offset_ptr += min_total_hash_data_size;
 423     } else {
 424       // If the string does match, or we don't have fixed size data then we
 425       // need to read the hash data as a stream. If the string matches we also
 426       // append all HashData objects to the value array.
 427       for (uint32_t i = 0; i < count; ++i) {
 428         DIEInfo die_info;
 429         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
 430           // Only happened if the HashData of the string matched...
 431           if (match)
 432             pair.value.push_back(die_info);
 433         } else {
 434           // Something went wrong while reading the data
 435           *hash_data_offset_ptr = UINT32_MAX;
 436           return eResultError;
 437         }
 438       }
 439     }
 440     // Return the correct response depending on if the string matched or not...
 441     if (match)
 442       return eResultKeyMatch; // The key (cstring) matches and we have lookup
 443                               // results!
 444     else
 445       return eResultKeyMismatch; // The key doesn't match, this function will
 446                                  // get called
 447     // again for the next key/value or the key terminator which in our case is
 448     // a zero .debug_str offset.
 449   } else {
 450     *hash_data_offset_ptr = UINT32_MAX;
 451     return eResultError;
 452   }
 453 }
 454
 455 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex(
 456     const lldb_private::RegularExpression &regex,
 457     DIEInfoArray &die_info_array) const {
 458   const uint32_t hash_count = m_header.hashes_count;
 459   Pair pair;
 460   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
 461     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
 462     while (hash_data_offset != UINT32_MAX) {
 463       const lldb::offset_t prev_hash_data_offset = hash_data_offset;
 464       Result hash_result =
 465           AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
 466       if (prev_hash_data_offset == hash_data_offset)
 467         break;
 468
 469       // Check the result of getting our hash data
 470       switch (hash_result) {
 471       case eResultKeyMatch:
 472       case eResultKeyMismatch:
 473         // Whether we matches or not, it doesn't matter, we keep looking.
 474         break;
 475
 476       case eResultEndOfHashData:
 477       case eResultError:
 478         hash_data_offset = UINT32_MAX;
 479         break;
 480       }
 481     }
 482   }
 483   die_info_array.swap(pair.value);
 484   return die_info_array.size();
 485 }
 486
 487 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
 488     const uint32_t die_offset_start, const uint32_t die_offset_end,
 489     DIEInfoArray &die_info_array) const {
 490   const uint32_t hash_count = m_header.hashes_count;
 491   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
 492     bool done = false;
 493     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
 494     while (!done && hash_data_offset != UINT32_MAX) {
 495       KeyType key = m_data.GetU32(&hash_data_offset);
 496       // If the key is zero, this terminates our chain of HashData objects for
 497       // this hash value.
 498       if (key == 0)
 499         break;
 500
 501       const uint32_t count = m_data.GetU32(&hash_data_offset);
 502       for (uint32_t i = 0; i < count; ++i) {
 503         DIEInfo die_info;
 504         if (m_header.Read(m_data, &hash_data_offset, die_info)) {
 505           if (die_info.die_offset == 0)
 506             done = true;
 507           if (die_offset_start <= die_info.die_offset &&
 508               die_info.die_offset < die_offset_end)
 509             die_info_array.push_back(die_info);
 510         }
 511       }
 512     }
 513   }
 514   return die_info_array.size();
 515 }
 516
 517 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
 518                                                 DIEArray &die_offsets) {
 519   if (name.empty())
 520     return 0;
 521
 522   DIEInfoArray die_info_array;
 523   if (FindByName(name, die_info_array))
 524     DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets);
 525   return die_info_array.size();
 526 }
 527
 528 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name,
 529                                                       const dw_tag_t tag,
 530                                                       DIEArray &die_offsets) {
 531   DIEInfoArray die_info_array;
 532   if (FindByName(name, die_info_array))
 533     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets);
 534   return die_info_array.size();
 535 }
 536
 537 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
 538     llvm::StringRef name, const dw_tag_t tag,
 539     const uint32_t qualified_name_hash, DIEArray &die_offsets) {
 540   DIEInfoArray die_info_array;
 541   if (FindByName(name, die_info_array))
 542     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
 543                                      die_offsets);
 544   return die_info_array.size();
 545 }
 546
 547 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
 548     llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) {
 549   DIEInfoArray die_info_array;
 550   if (FindByName(name, die_info_array)) {
 551     if (must_be_implementation &&
 552         GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
 553       // If we have two atoms, then we have the DIE offset and the type flags
 554       // so we can find the objective C class efficiently.
 555       DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX,
 556                                                 eTypeFlagClassIsImplementation,
 557                                                 die_offsets);
 558     } else {
 559       // We don't only want the one true definition, so try and see what we can
 560       // find, and only return class or struct DIEs. If we do have the full
 561       // implementation, then return it alone, else return all possible
 562       // matches.
 563       const bool return_implementation_only_if_available = true;
 564       DWARFMappedHash::ExtractClassOrStructDIEArray(
 565           die_info_array, return_implementation_only_if_available, die_offsets);
 566     }
 567   }
 568   return die_offsets.size();
 569 }
 570
 571 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
 572                                                 DIEInfoArray &die_info_array) {
 573   if (name.empty())
 574     return 0;
 575
 576   Pair kv_pair;
 577   size_t old_size = die_info_array.size();
 578   if (Find(name, kv_pair)) {
 579     die_info_array.swap(kv_pair.value);
 580     return die_info_array.size() - old_size;
 581   }
 582   return 0;
 583 }