1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MinidumpParser.h"
10 #include "NtStructures.h"
11 #include "RegisterContextMinidump_x86_32.h"
13 #include "Plugins/Process/Utility/LinuxProcMaps.h"
14 #include "lldb/Utility/LLDBAssert.h"
15 #include "lldb/Utility/Log.h"
24 using namespace lldb_private;
25 using namespace minidump;
27 llvm::Expected<MinidumpParser>
28 MinidumpParser::Create(const lldb::DataBufferSP &data_sp) {
29 auto ExpectedFile = llvm::object::MinidumpFile::create(
30 llvm::MemoryBufferRef(toStringRef(data_sp->GetData()), "minidump"));
32 return ExpectedFile.takeError();
34 return MinidumpParser(data_sp, std::move(*ExpectedFile));
37 MinidumpParser::MinidumpParser(lldb::DataBufferSP data_sp,
38 std::unique_ptr<llvm::object::MinidumpFile> file)
39 : m_data_sp(std::move(data_sp)), m_file(std::move(file)) {}
41 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
42 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
43 m_data_sp->GetByteSize());
46 llvm::ArrayRef<uint8_t> MinidumpParser::GetStream(StreamType stream_type) {
47 return m_file->getRawStream(stream_type)
48 .getValueOr(llvm::ArrayRef<uint8_t>());
51 UUID MinidumpParser::GetModuleUUID(const minidump::Module *module) {
53 GetData().slice(module->CvRecord.RVA, module->CvRecord.DataSize);
55 // Read the CV record signature
56 const llvm::support::ulittle32_t *signature = nullptr;
57 Status error = consumeObject(cv_record, signature);
61 const CvSignature cv_signature =
62 static_cast<CvSignature>(static_cast<uint32_t>(*signature));
64 if (cv_signature == CvSignature::Pdb70) {
65 const CvRecordPdb70 *pdb70_uuid = nullptr;
66 Status error = consumeObject(cv_record, pdb70_uuid);
70 CvRecordPdb70 swapped;
71 if (!GetArchitecture().GetTriple().isOSBinFormatELF()) {
72 // LLDB's UUID class treats the data as a sequence of bytes, but breakpad
73 // interprets it as a sequence of little-endian fields, which it converts
74 // to big-endian when converting to text. Swap the bytes to big endian so
75 // that the string representation comes out right.
76 swapped = *pdb70_uuid;
77 llvm::sys::swapByteOrder(swapped.Uuid.Data1);
78 llvm::sys::swapByteOrder(swapped.Uuid.Data2);
79 llvm::sys::swapByteOrder(swapped.Uuid.Data3);
80 llvm::sys::swapByteOrder(swapped.Age);
81 pdb70_uuid = &swapped;
83 if (pdb70_uuid->Age != 0)
84 return UUID::fromOptionalData(pdb70_uuid, sizeof(*pdb70_uuid));
85 return UUID::fromOptionalData(&pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid));
86 } else if (cv_signature == CvSignature::ElfBuildId)
87 return UUID::fromOptionalData(cv_record);
92 llvm::ArrayRef<minidump::Thread> MinidumpParser::GetThreads() {
93 auto ExpectedThreads = GetMinidumpFile().getThreadList();
95 return *ExpectedThreads;
97 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD),
98 ExpectedThreads.takeError(),
99 "Failed to read thread list: {0}");
103 llvm::ArrayRef<uint8_t>
104 MinidumpParser::GetThreadContext(const LocationDescriptor &location) {
105 if (location.RVA + location.DataSize > GetData().size())
107 return GetData().slice(location.RVA, location.DataSize);
110 llvm::ArrayRef<uint8_t>
111 MinidumpParser::GetThreadContext(const minidump::Thread &td) {
112 return GetThreadContext(td.Context);
115 llvm::ArrayRef<uint8_t>
116 MinidumpParser::GetThreadContextWow64(const minidump::Thread &td) {
117 // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
118 // the minidump was captured with a 64-bit debugger, then the CONTEXT we just
119 // grabbed from the mini_dump_thread is the one for the 64-bit "native"
120 // process rather than the 32-bit "guest" process we care about. In this
121 // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
122 // Block) of the 64-bit process.
123 auto teb_mem = GetMemory(td.EnvironmentBlock, sizeof(TEB64));
127 const TEB64 *wow64teb;
128 Status error = consumeObject(teb_mem, wow64teb);
132 // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
133 // that includes the 32-bit CONTEXT (after a ULONG). See:
134 // https://msdn.microsoft.com/en-us/library/ms681670.aspx
136 GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
137 if (context.size() < sizeof(MinidumpContext_x86_32))
141 // NOTE: We don't currently use the TEB for anything else. If we
142 // need it in the future, the 32-bit TEB is located according to the address
143 // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]).
146 ArchSpec MinidumpParser::GetArchitecture() {
147 if (m_arch.IsValid())
150 // Set the architecture in m_arch
151 llvm::Expected<const SystemInfo &> system_info = m_file->getSystemInfo();
154 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS),
155 system_info.takeError(),
156 "Failed to read SystemInfo stream: {0}");
160 // TODO what to do about big endiand flavors of arm ?
161 // TODO set the arm subarch stuff if the minidump has info about it
164 triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
166 switch (system_info->ProcessorArch) {
167 case ProcessorArchitecture::X86:
168 triple.setArch(llvm::Triple::ArchType::x86);
170 case ProcessorArchitecture::AMD64:
171 triple.setArch(llvm::Triple::ArchType::x86_64);
173 case ProcessorArchitecture::ARM:
174 triple.setArch(llvm::Triple::ArchType::arm);
176 case ProcessorArchitecture::ARM64:
177 triple.setArch(llvm::Triple::ArchType::aarch64);
180 triple.setArch(llvm::Triple::ArchType::UnknownArch);
184 // TODO add all of the OSes that Minidump/breakpad distinguishes?
185 switch (system_info->PlatformId) {
186 case OSPlatform::Win32S:
187 case OSPlatform::Win32Windows:
188 case OSPlatform::Win32NT:
189 case OSPlatform::Win32CE:
190 triple.setOS(llvm::Triple::OSType::Win32);
192 case OSPlatform::Linux:
193 triple.setOS(llvm::Triple::OSType::Linux);
195 case OSPlatform::MacOSX:
196 triple.setOS(llvm::Triple::OSType::MacOSX);
197 triple.setVendor(llvm::Triple::Apple);
199 case OSPlatform::IOS:
200 triple.setOS(llvm::Triple::OSType::IOS);
201 triple.setVendor(llvm::Triple::Apple);
203 case OSPlatform::Android:
204 triple.setOS(llvm::Triple::OSType::Linux);
205 triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
208 triple.setOS(llvm::Triple::OSType::UnknownOS);
209 auto ExpectedCSD = m_file->getString(system_info->CSDVersionRVA);
211 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS),
212 ExpectedCSD.takeError(),
213 "Failed to CSD Version string: {0}");
215 if (ExpectedCSD->find("Linux") != std::string::npos)
216 triple.setOS(llvm::Triple::OSType::Linux);
221 m_arch.SetTriple(triple);
225 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
226 llvm::ArrayRef<uint8_t> data = GetStream(StreamType::MiscInfo);
228 if (data.size() == 0)
231 return MinidumpMiscInfo::Parse(data);
234 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
235 llvm::ArrayRef<uint8_t> data = GetStream(StreamType::LinuxProcStatus);
237 if (data.size() == 0)
240 return LinuxProcStatus::Parse(data);
243 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
244 const MinidumpMiscInfo *misc_info = GetMiscInfo();
245 if (misc_info != nullptr) {
246 return misc_info->GetPid();
249 llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
250 if (proc_status.hasValue()) {
251 return proc_status->GetPid();
257 llvm::ArrayRef<minidump::Module> MinidumpParser::GetModuleList() {
258 auto ExpectedModules = GetMinidumpFile().getModuleList();
260 return *ExpectedModules;
262 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES),
263 ExpectedModules.takeError(),
264 "Failed to read module list: {0}");
268 std::vector<const minidump::Module *> MinidumpParser::GetFilteredModuleList() {
269 Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES);
270 auto ExpectedModules = GetMinidumpFile().getModuleList();
271 if (!ExpectedModules) {
272 LLDB_LOG_ERROR(log, ExpectedModules.takeError(),
273 "Failed to read module list: {0}");
277 // map module_name -> filtered_modules index
278 typedef llvm::StringMap<size_t> MapType;
279 MapType module_name_to_filtered_index;
281 std::vector<const minidump::Module *> filtered_modules;
283 for (const auto &module : *ExpectedModules) {
284 auto ExpectedName = m_file->getString(module.ModuleNameRVA);
286 LLDB_LOG_ERROR(log, ExpectedName.takeError(),
287 "Failed to get module name: {0}");
291 MapType::iterator iter;
293 // See if we have inserted this module aready into filtered_modules. If we
294 // haven't insert an entry into module_name_to_filtered_index with the
295 // index where we will insert it if it isn't in the vector already.
296 std::tie(iter, inserted) = module_name_to_filtered_index.try_emplace(
297 *ExpectedName, filtered_modules.size());
300 // This module has not been seen yet, insert it into filtered_modules at
301 // the index that was inserted into module_name_to_filtered_index using
302 // "filtered_modules.size()" above.
303 filtered_modules.push_back(&module);
305 // This module has been seen. Modules are sometimes mentioned multiple
306 // times when they are mapped discontiguously, so find the module with
307 // the lowest "base_of_image" and use that as the filtered module.
308 auto dup_module = filtered_modules[iter->second];
309 if (module.BaseOfImage < dup_module->BaseOfImage)
310 filtered_modules[iter->second] = &module;
313 return filtered_modules;
316 const MinidumpExceptionStream *MinidumpParser::GetExceptionStream() {
317 llvm::ArrayRef<uint8_t> data = GetStream(StreamType::Exception);
319 if (data.size() == 0)
322 return MinidumpExceptionStream::Parse(data);
325 llvm::Optional<minidump::Range>
326 MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
327 llvm::ArrayRef<uint8_t> data64 = GetStream(StreamType::Memory64List);
328 Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES);
330 auto ExpectedMemory = GetMinidumpFile().getMemoryList();
331 if (!ExpectedMemory) {
332 LLDB_LOG_ERROR(log, ExpectedMemory.takeError(),
333 "Failed to read memory list: {0}");
335 for (const auto &memory_desc : *ExpectedMemory) {
336 const LocationDescriptor &loc_desc = memory_desc.Memory;
337 const lldb::addr_t range_start = memory_desc.StartOfMemoryRange;
338 const size_t range_size = loc_desc.DataSize;
340 if (loc_desc.RVA + loc_desc.DataSize > GetData().size())
343 if (range_start <= addr && addr < range_start + range_size) {
344 auto ExpectedSlice = GetMinidumpFile().getRawData(loc_desc);
345 if (!ExpectedSlice) {
346 LLDB_LOG_ERROR(log, ExpectedSlice.takeError(),
347 "Failed to get memory slice: {0}");
350 return minidump::Range(range_start, *ExpectedSlice);
355 // Some Minidumps have a Memory64ListStream that captures all the heap memory
356 // (full-memory Minidumps). We can't exactly use the same loop as above,
357 // because the Minidump uses slightly different data structures to describe
360 if (!data64.empty()) {
361 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
363 std::tie(memory64_list, base_rva) =
364 MinidumpMemoryDescriptor64::ParseMemory64List(data64);
366 if (memory64_list.empty())
369 for (const auto &memory_desc64 : memory64_list) {
370 const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
371 const size_t range_size = memory_desc64.data_size;
373 if (base_rva + range_size > GetData().size())
376 if (range_start <= addr && addr < range_start + range_size) {
377 return minidump::Range(range_start,
378 GetData().slice(base_rva, range_size));
380 base_rva += range_size;
387 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
389 // I don't have a sense of how frequently this is called or how many memory
390 // ranges a Minidump typically has, so I'm not sure if searching for the
391 // appropriate range linearly each time is stupid. Perhaps we should build
392 // an index for faster lookups.
393 llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
397 // There's at least some overlap between the beginning of the desired range
398 // (addr) and the current range. Figure out where the overlap begins and how
399 // much overlap there is.
401 const size_t offset = addr - range->start;
403 if (addr < range->start || offset >= range->range_ref.size())
406 const size_t overlap = std::min(size, range->range_ref.size() - offset);
407 return range->range_ref.slice(offset, overlap);
411 CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser,
412 std::vector<MemoryRegionInfo> ®ions) {
413 auto data = parser.GetStream(StreamType::LinuxMaps);
416 ParseLinuxMapRegions(llvm::toStringRef(data),
417 [&](const lldb_private::MemoryRegionInfo ®ion,
418 const lldb_private::Status &status) -> bool {
419 if (status.Success())
420 regions.push_back(region);
423 return !regions.empty();
427 CreateRegionsCacheFromMemoryInfoList(MinidumpParser &parser,
428 std::vector<MemoryRegionInfo> ®ions) {
429 auto data = parser.GetStream(StreamType::MemoryInfoList);
432 auto mem_info_list = MinidumpMemoryInfo::ParseMemoryInfoList(data);
433 if (mem_info_list.empty())
435 constexpr auto yes = MemoryRegionInfo::eYes;
436 constexpr auto no = MemoryRegionInfo::eNo;
437 regions.reserve(mem_info_list.size());
438 for (const auto &entry : mem_info_list) {
439 MemoryRegionInfo region;
440 region.GetRange().SetRangeBase(entry->base_address);
441 region.GetRange().SetByteSize(entry->region_size);
442 region.SetReadable(entry->isReadable() ? yes : no);
443 region.SetWritable(entry->isWritable() ? yes : no);
444 region.SetExecutable(entry->isExecutable() ? yes : no);
445 region.SetMapped(entry->isMapped() ? yes : no);
446 regions.push_back(region);
448 return !regions.empty();
452 CreateRegionsCacheFromMemoryList(MinidumpParser &parser,
453 std::vector<MemoryRegionInfo> ®ions) {
454 Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES);
455 auto ExpectedMemory = parser.GetMinidumpFile().getMemoryList();
456 if (!ExpectedMemory) {
457 LLDB_LOG_ERROR(log, ExpectedMemory.takeError(),
458 "Failed to read memory list: {0}");
461 regions.reserve(ExpectedMemory->size());
462 for (const MemoryDescriptor &memory_desc : *ExpectedMemory) {
463 if (memory_desc.Memory.DataSize == 0)
465 MemoryRegionInfo region;
466 region.GetRange().SetRangeBase(memory_desc.StartOfMemoryRange);
467 region.GetRange().SetByteSize(memory_desc.Memory.DataSize);
468 region.SetReadable(MemoryRegionInfo::eYes);
469 region.SetMapped(MemoryRegionInfo::eYes);
470 regions.push_back(region);
472 regions.shrink_to_fit();
473 return !regions.empty();
477 CreateRegionsCacheFromMemory64List(MinidumpParser &parser,
478 std::vector<MemoryRegionInfo> ®ions) {
479 llvm::ArrayRef<uint8_t> data =
480 parser.GetStream(StreamType::Memory64List);
483 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
485 std::tie(memory64_list, base_rva) =
486 MinidumpMemoryDescriptor64::ParseMemory64List(data);
488 if (memory64_list.empty())
491 regions.reserve(memory64_list.size());
492 for (const auto &memory_desc : memory64_list) {
493 if (memory_desc.data_size == 0)
495 MemoryRegionInfo region;
496 region.GetRange().SetRangeBase(memory_desc.start_of_memory_range);
497 region.GetRange().SetByteSize(memory_desc.data_size);
498 region.SetReadable(MemoryRegionInfo::eYes);
499 region.SetMapped(MemoryRegionInfo::eYes);
500 regions.push_back(region);
502 regions.shrink_to_fit();
503 return !regions.empty();
507 MinidumpParser::FindMemoryRegion(lldb::addr_t load_addr) const {
508 auto begin = m_regions.begin();
509 auto end = m_regions.end();
510 auto pos = std::lower_bound(begin, end, load_addr);
511 if (pos != end && pos->GetRange().Contains(load_addr))
514 MemoryRegionInfo region;
516 region.GetRange().SetRangeBase(0);
519 if (prev->GetRange().Contains(load_addr))
521 region.GetRange().SetRangeBase(prev->GetRange().GetRangeEnd());
524 region.GetRange().SetRangeEnd(UINT64_MAX);
526 region.GetRange().SetRangeEnd(pos->GetRange().GetRangeBase());
527 region.SetReadable(MemoryRegionInfo::eNo);
528 region.SetWritable(MemoryRegionInfo::eNo);
529 region.SetExecutable(MemoryRegionInfo::eNo);
530 region.SetMapped(MemoryRegionInfo::eNo);
535 MinidumpParser::GetMemoryRegionInfo(lldb::addr_t load_addr) {
536 if (!m_parsed_regions)
538 return FindMemoryRegion(load_addr);
541 const MemoryRegionInfos &MinidumpParser::GetMemoryRegions() {
542 if (!m_parsed_regions) {
543 m_parsed_regions = true;
544 // We haven't cached our memory regions yet we will create the region cache
545 // once. We create the region cache using the best source. We start with
546 // the linux maps since they are the most complete and have names for the
547 // regions. Next we try the MemoryInfoList since it has
548 // read/write/execute/map data, and then fall back to the MemoryList and
549 // Memory64List to just get a list of the memory that is mapped in this
551 if (!CreateRegionsCacheFromLinuxMaps(*this, m_regions))
552 if (!CreateRegionsCacheFromMemoryInfoList(*this, m_regions))
553 if (!CreateRegionsCacheFromMemoryList(*this, m_regions))
554 CreateRegionsCacheFromMemory64List(*this, m_regions);
555 llvm::sort(m_regions.begin(), m_regions.end());
560 #define ENUM_TO_CSTR(ST) \
561 case StreamType::ST: \
565 MinidumpParser::GetStreamTypeAsString(StreamType stream_type) {
566 switch (stream_type) {
567 ENUM_TO_CSTR(Unused);
568 ENUM_TO_CSTR(ThreadList);
569 ENUM_TO_CSTR(ModuleList);
570 ENUM_TO_CSTR(MemoryList);
571 ENUM_TO_CSTR(Exception);
572 ENUM_TO_CSTR(SystemInfo);
573 ENUM_TO_CSTR(ThreadExList);
574 ENUM_TO_CSTR(Memory64List);
575 ENUM_TO_CSTR(CommentA);
576 ENUM_TO_CSTR(CommentW);
577 ENUM_TO_CSTR(HandleData);
578 ENUM_TO_CSTR(FunctionTable);
579 ENUM_TO_CSTR(UnloadedModuleList);
580 ENUM_TO_CSTR(MiscInfo);
581 ENUM_TO_CSTR(MemoryInfoList);
582 ENUM_TO_CSTR(ThreadInfoList);
583 ENUM_TO_CSTR(HandleOperationList);
585 ENUM_TO_CSTR(JavascriptData);
586 ENUM_TO_CSTR(SystemMemoryInfo);
587 ENUM_TO_CSTR(ProcessVMCounters);
588 ENUM_TO_CSTR(LastReserved);
589 ENUM_TO_CSTR(BreakpadInfo);
590 ENUM_TO_CSTR(AssertionInfo);
591 ENUM_TO_CSTR(LinuxCPUInfo);
592 ENUM_TO_CSTR(LinuxProcStatus);
593 ENUM_TO_CSTR(LinuxLSBRelease);
594 ENUM_TO_CSTR(LinuxCMDLine);
595 ENUM_TO_CSTR(LinuxEnviron);
596 ENUM_TO_CSTR(LinuxAuxv);
597 ENUM_TO_CSTR(LinuxMaps);
598 ENUM_TO_CSTR(LinuxDSODebug);
599 ENUM_TO_CSTR(LinuxProcStat);
600 ENUM_TO_CSTR(LinuxProcUptime);
601 ENUM_TO_CSTR(LinuxProcFD);
602 ENUM_TO_CSTR(FacebookAppCustomData);
603 ENUM_TO_CSTR(FacebookBuildID);
604 ENUM_TO_CSTR(FacebookAppVersionName);
605 ENUM_TO_CSTR(FacebookJavaStack);
606 ENUM_TO_CSTR(FacebookDalvikInfo);
607 ENUM_TO_CSTR(FacebookUnwindSymbols);
608 ENUM_TO_CSTR(FacebookDumpErrorLog);
609 ENUM_TO_CSTR(FacebookAppStateLog);
610 ENUM_TO_CSTR(FacebookAbortReason);
611 ENUM_TO_CSTR(FacebookThreadName);
612 ENUM_TO_CSTR(FacebookLogcat);
614 return "unknown stream type";