1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 #include "MinidumpParser.h"
12 #include "NtStructures.h"
13 #include "RegisterContextMinidump_x86_32.h"
15 // Other libraries and framework includes
16 #include "lldb/Target/MemoryRegionInfo.h"
17 #include "lldb/Utility/LLDBAssert.h"
25 using namespace lldb_private;
26 using namespace minidump;
28 llvm::Optional<MinidumpParser>
29 MinidumpParser::Create(const lldb::DataBufferSP &data_buf_sp) {
30 if (data_buf_sp->GetByteSize() < sizeof(MinidumpHeader)) {
33 return MinidumpParser(data_buf_sp);
36 MinidumpParser::MinidumpParser(const lldb::DataBufferSP &data_buf_sp)
37 : m_data_sp(data_buf_sp) {}
39 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
40 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
41 m_data_sp->GetByteSize());
44 llvm::ArrayRef<uint8_t>
45 MinidumpParser::GetStream(MinidumpStreamType stream_type) {
46 auto iter = m_directory_map.find(static_cast<uint32_t>(stream_type));
47 if (iter == m_directory_map.end())
50 // check if there is enough data
51 if (iter->second.rva + iter->second.data_size > m_data_sp->GetByteSize())
54 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes() + iter->second.rva,
55 iter->second.data_size);
58 llvm::Optional<std::string> MinidumpParser::GetMinidumpString(uint32_t rva) {
59 auto arr_ref = m_data_sp->GetData();
60 if (rva > arr_ref.size())
62 arr_ref = arr_ref.drop_front(rva);
63 return parseMinidumpString(arr_ref);
66 UUID MinidumpParser::GetModuleUUID(const MinidumpModule *module) {
68 GetData().slice(module->CV_record.rva, module->CV_record.data_size);
70 // Read the CV record signature
71 const llvm::support::ulittle32_t *signature = nullptr;
72 Status error = consumeObject(cv_record, signature);
76 const CvSignature cv_signature =
77 static_cast<CvSignature>(static_cast<const uint32_t>(*signature));
79 if (cv_signature == CvSignature::Pdb70) {
81 const CvRecordPdb70 *pdb70_uuid = nullptr;
82 Status error = consumeObject(cv_record, pdb70_uuid);
84 return UUID::fromData(pdb70_uuid, sizeof(*pdb70_uuid));
85 } else if (cv_signature == CvSignature::ElfBuildId)
86 return UUID::fromData(cv_record);
91 llvm::ArrayRef<MinidumpThread> MinidumpParser::GetThreads() {
92 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ThreadList);
97 return MinidumpThread::ParseThreadList(data);
100 llvm::ArrayRef<uint8_t>
101 MinidumpParser::GetThreadContext(const MinidumpThread &td) {
102 if (td.thread_context.rva + td.thread_context.data_size > GetData().size())
105 return GetData().slice(td.thread_context.rva, td.thread_context.data_size);
108 llvm::ArrayRef<uint8_t>
109 MinidumpParser::GetThreadContextWow64(const MinidumpThread &td) {
110 // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
111 // the minidump was captured with a 64-bit debugger, then the CONTEXT we just
112 // grabbed from the mini_dump_thread is the one for the 64-bit "native"
113 // process rather than the 32-bit "guest" process we care about. In this
114 // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
115 // Block) of the 64-bit process.
116 auto teb_mem = GetMemory(td.teb, sizeof(TEB64));
120 const TEB64 *wow64teb;
121 Status error = consumeObject(teb_mem, wow64teb);
125 // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
126 // that includes the 32-bit CONTEXT (after a ULONG). See:
127 // https://msdn.microsoft.com/en-us/library/ms681670.aspx
129 GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
130 if (context.size() < sizeof(MinidumpContext_x86_32))
134 // NOTE: We don't currently use the TEB for anything else. If we
135 // need it in the future, the 32-bit TEB is located according to the address
136 // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]).
139 const MinidumpSystemInfo *MinidumpParser::GetSystemInfo() {
140 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::SystemInfo);
142 if (data.size() == 0)
145 return MinidumpSystemInfo::Parse(data);
148 ArchSpec MinidumpParser::GetArchitecture() {
150 const MinidumpSystemInfo *system_info = GetSystemInfo();
155 // TODO what to do about big endiand flavors of arm ?
156 // TODO set the arm subarch stuff if the minidump has info about it
159 triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
161 const MinidumpCPUArchitecture arch =
162 static_cast<const MinidumpCPUArchitecture>(
163 static_cast<const uint32_t>(system_info->processor_arch));
166 case MinidumpCPUArchitecture::X86:
167 triple.setArch(llvm::Triple::ArchType::x86);
169 case MinidumpCPUArchitecture::AMD64:
170 triple.setArch(llvm::Triple::ArchType::x86_64);
172 case MinidumpCPUArchitecture::ARM:
173 triple.setArch(llvm::Triple::ArchType::arm);
175 case MinidumpCPUArchitecture::ARM64:
176 triple.setArch(llvm::Triple::ArchType::aarch64);
179 triple.setArch(llvm::Triple::ArchType::UnknownArch);
183 const MinidumpOSPlatform os = static_cast<const MinidumpOSPlatform>(
184 static_cast<const uint32_t>(system_info->platform_id));
186 // TODO add all of the OSes that Minidump/breakpad distinguishes?
188 case MinidumpOSPlatform::Win32S:
189 case MinidumpOSPlatform::Win32Windows:
190 case MinidumpOSPlatform::Win32NT:
191 case MinidumpOSPlatform::Win32CE:
192 triple.setOS(llvm::Triple::OSType::Win32);
194 case MinidumpOSPlatform::Linux:
195 triple.setOS(llvm::Triple::OSType::Linux);
197 case MinidumpOSPlatform::MacOSX:
198 triple.setOS(llvm::Triple::OSType::MacOSX);
200 case MinidumpOSPlatform::Android:
201 triple.setOS(llvm::Triple::OSType::Linux);
202 triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
205 triple.setOS(llvm::Triple::OSType::UnknownOS);
209 arch_spec.SetTriple(triple);
214 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
215 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MiscInfo);
217 if (data.size() == 0)
220 return MinidumpMiscInfo::Parse(data);
223 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
224 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::LinuxProcStatus);
226 if (data.size() == 0)
229 return LinuxProcStatus::Parse(data);
232 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
233 const MinidumpMiscInfo *misc_info = GetMiscInfo();
234 if (misc_info != nullptr) {
235 return misc_info->GetPid();
238 llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
239 if (proc_status.hasValue()) {
240 return proc_status->GetPid();
246 llvm::ArrayRef<MinidumpModule> MinidumpParser::GetModuleList() {
247 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::ModuleList);
249 if (data.size() == 0)
252 return MinidumpModule::ParseModuleList(data);
255 std::vector<const MinidumpModule *> MinidumpParser::GetFilteredModuleList() {
256 llvm::ArrayRef<MinidumpModule> modules = GetModuleList();
257 // map module_name -> pair(load_address, pointer to module struct in memory)
258 llvm::StringMap<std::pair<uint64_t, const MinidumpModule *>> lowest_addr;
260 std::vector<const MinidumpModule *> filtered_modules;
262 llvm::Optional<std::string> name;
263 std::string module_name;
265 for (const auto &module : modules) {
266 name = GetMinidumpString(module.module_name_rva);
271 module_name = name.getValue();
273 auto iter = lowest_addr.end();
275 std::tie(iter, exists) = lowest_addr.try_emplace(
276 module_name, std::make_pair(module.base_of_image, &module));
278 if (exists && module.base_of_image < iter->second.first)
279 iter->second = std::make_pair(module.base_of_image, &module);
282 filtered_modules.reserve(lowest_addr.size());
283 for (const auto &module : lowest_addr) {
284 filtered_modules.push_back(module.second.second);
287 return filtered_modules;
290 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() {
291 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::Exception);
293 if (data.size() == 0)
296 return MinidumpExceptionStream::Parse(data);
299 llvm::Optional<minidump::Range>
300 MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
301 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryList);
302 llvm::ArrayRef<uint8_t> data64 = GetStream(MinidumpStreamType::Memory64List);
304 if (data.empty() && data64.empty())
308 llvm::ArrayRef<MinidumpMemoryDescriptor> memory_list =
309 MinidumpMemoryDescriptor::ParseMemoryList(data);
311 if (memory_list.empty())
314 for (const auto &memory_desc : memory_list) {
315 const MinidumpLocationDescriptor &loc_desc = memory_desc.memory;
316 const lldb::addr_t range_start = memory_desc.start_of_memory_range;
317 const size_t range_size = loc_desc.data_size;
319 if (loc_desc.rva + loc_desc.data_size > GetData().size())
322 if (range_start <= addr && addr < range_start + range_size) {
323 return minidump::Range(range_start,
324 GetData().slice(loc_desc.rva, range_size));
329 // Some Minidumps have a Memory64ListStream that captures all the heap memory
330 // (full-memory Minidumps). We can't exactly use the same loop as above,
331 // because the Minidump uses slightly different data structures to describe
334 if (!data64.empty()) {
335 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
337 std::tie(memory64_list, base_rva) =
338 MinidumpMemoryDescriptor64::ParseMemory64List(data64);
340 if (memory64_list.empty())
343 for (const auto &memory_desc64 : memory64_list) {
344 const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
345 const size_t range_size = memory_desc64.data_size;
347 if (base_rva + range_size > GetData().size())
350 if (range_start <= addr && addr < range_start + range_size) {
351 return minidump::Range(range_start,
352 GetData().slice(base_rva, range_size));
354 base_rva += range_size;
361 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
363 // I don't have a sense of how frequently this is called or how many memory
364 // ranges a Minidump typically has, so I'm not sure if searching for the
365 // appropriate range linearly each time is stupid. Perhaps we should build
366 // an index for faster lookups.
367 llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
371 // There's at least some overlap between the beginning of the desired range
372 // (addr) and the current range. Figure out where the overlap begins and how
373 // much overlap there is.
375 const size_t offset = addr - range->start;
377 if (addr < range->start || offset >= range->range_ref.size())
380 const size_t overlap = std::min(size, range->range_ref.size() - offset);
381 return range->range_ref.slice(offset, overlap);
384 llvm::Optional<MemoryRegionInfo>
385 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
386 MemoryRegionInfo info;
387 llvm::ArrayRef<uint8_t> data = GetStream(MinidumpStreamType::MemoryInfoList);
391 std::vector<const MinidumpMemoryInfo *> mem_info_list =
392 MinidumpMemoryInfo::ParseMemoryInfoList(data);
393 if (mem_info_list.empty())
396 const auto yes = MemoryRegionInfo::eYes;
397 const auto no = MemoryRegionInfo::eNo;
399 const MinidumpMemoryInfo *next_entry = nullptr;
400 for (const auto &entry : mem_info_list) {
401 const auto head = entry->base_address;
402 const auto tail = head + entry->region_size;
404 if (head <= load_addr && load_addr < tail) {
405 info.GetRange().SetRangeBase(
406 (entry->state != uint32_t(MinidumpMemoryInfoState::MemFree))
409 info.GetRange().SetRangeEnd(tail);
411 const uint32_t PageNoAccess =
412 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageNoAccess);
413 info.SetReadable((entry->protect & PageNoAccess) == 0 ? yes : no);
415 const uint32_t PageWritable =
416 static_cast<uint32_t>(MinidumpMemoryProtectionContants::PageWritable);
417 info.SetWritable((entry->protect & PageWritable) != 0 ? yes : no);
419 const uint32_t PageExecutable = static_cast<uint32_t>(
420 MinidumpMemoryProtectionContants::PageExecutable);
421 info.SetExecutable((entry->protect & PageExecutable) != 0 ? yes : no);
423 const uint32_t MemFree =
424 static_cast<uint32_t>(MinidumpMemoryInfoState::MemFree);
425 info.SetMapped((entry->state != MemFree) ? yes : no);
428 } else if (head > load_addr &&
429 (next_entry == nullptr || head < next_entry->base_address)) {
430 // In case there is no region containing load_addr keep track of the
431 // nearest region after load_addr so we can return the distance to it.
436 // No containing region found. Create an unmapped region that extends to the
437 // next region or LLDB_INVALID_ADDRESS
438 info.GetRange().SetRangeBase(load_addr);
439 info.GetRange().SetRangeEnd((next_entry != nullptr) ? next_entry->base_address
440 : LLDB_INVALID_ADDRESS);
441 info.SetReadable(no);
442 info.SetWritable(no);
443 info.SetExecutable(no);
446 // Note that the memory info list doesn't seem to contain ranges in kernel
447 // space, so if you're walking a stack that has kernel frames, the stack may
452 Status MinidumpParser::Initialize() {
455 lldbassert(m_directory_map.empty());
457 llvm::ArrayRef<uint8_t> header_data(m_data_sp->GetBytes(),
458 sizeof(MinidumpHeader));
459 const MinidumpHeader *header = MinidumpHeader::Parse(header_data);
460 if (header == nullptr) {
461 error.SetErrorString("invalid minidump: can't parse the header");
465 // A minidump without at least one stream is clearly ill-formed
466 if (header->streams_count == 0) {
467 error.SetErrorString("invalid minidump: no streams present");
475 FileRange(uint32_t offset, uint32_t size) : offset(offset), size(size) {}
476 uint32_t end() const { return offset + size; }
479 const uint32_t file_size = m_data_sp->GetByteSize();
481 // Build a global minidump file map, checking for:
482 // - overlapping streams/data structures
483 // - truncation (streams pointing past the end of file)
484 std::vector<FileRange> minidump_map;
486 // Add the minidump header to the file map
487 if (sizeof(MinidumpHeader) > file_size) {
488 error.SetErrorString("invalid minidump: truncated header");
491 minidump_map.emplace_back( 0, sizeof(MinidumpHeader) );
493 // Add the directory entries to the file map
494 FileRange directory_range(header->stream_directory_rva,
495 header->streams_count *
496 sizeof(MinidumpDirectory));
497 if (directory_range.end() > file_size) {
498 error.SetErrorString("invalid minidump: truncated streams directory");
501 minidump_map.push_back(directory_range);
503 // Parse stream directory entries
504 llvm::ArrayRef<uint8_t> directory_data(
505 m_data_sp->GetBytes() + directory_range.offset, directory_range.size);
506 for (uint32_t i = 0; i < header->streams_count; ++i) {
507 const MinidumpDirectory *directory_entry = nullptr;
508 error = consumeObject(directory_data, directory_entry);
511 if (directory_entry->stream_type == 0) {
512 // Ignore dummy streams (technically ill-formed, but a number of
513 // existing minidumps seem to contain such streams)
514 if (directory_entry->location.data_size == 0)
516 error.SetErrorString("invalid minidump: bad stream type");
519 // Update the streams map, checking for duplicate stream types
521 .insert({directory_entry->stream_type, directory_entry->location})
523 error.SetErrorString("invalid minidump: duplicate stream type");
526 // Ignore the zero-length streams for layout checks
527 if (directory_entry->location.data_size != 0) {
528 minidump_map.emplace_back(directory_entry->location.rva,
529 directory_entry->location.data_size);
533 // Sort the file map ranges by start offset
534 std::sort(minidump_map.begin(), minidump_map.end(),
535 [](const FileRange &a, const FileRange &b) {
536 return a.offset < b.offset;
539 // Check for overlapping streams/data structures
540 for (size_t i = 1; i < minidump_map.size(); ++i) {
541 const auto &prev_range = minidump_map[i - 1];
542 if (prev_range.end() > minidump_map[i].offset) {
543 error.SetErrorString("invalid minidump: overlapping streams");
548 // Check for streams past the end of file
549 const auto &last_range = minidump_map.back();
550 if (last_range.end() > file_size) {
551 error.SetErrorString("invalid minidump: truncated stream");