1 //===-- MinidumpParser.cpp ---------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "MinidumpParser.h"
10 #include "NtStructures.h"
11 #include "RegisterContextMinidump_x86_32.h"
13 #include "Plugins/Process/Utility/LinuxProcMaps.h"
14 #include "lldb/Utility/LLDBAssert.h"
15 #include "lldb/Utility/Log.h"
24 using namespace lldb_private;
25 using namespace minidump;
27 llvm::Expected<MinidumpParser>
28 MinidumpParser::Create(const lldb::DataBufferSP &data_sp) {
29 auto ExpectedFile = llvm::object::MinidumpFile::create(
30 llvm::MemoryBufferRef(toStringRef(data_sp->GetData()), "minidump"));
32 return ExpectedFile.takeError();
34 return MinidumpParser(data_sp, std::move(*ExpectedFile));
37 MinidumpParser::MinidumpParser(lldb::DataBufferSP data_sp,
38 std::unique_ptr<llvm::object::MinidumpFile> file)
39 : m_data_sp(std::move(data_sp)), m_file(std::move(file)) {}
41 llvm::ArrayRef<uint8_t> MinidumpParser::GetData() {
42 return llvm::ArrayRef<uint8_t>(m_data_sp->GetBytes(),
43 m_data_sp->GetByteSize());
46 llvm::ArrayRef<uint8_t> MinidumpParser::GetStream(StreamType stream_type) {
47 return m_file->getRawStream(stream_type)
48 .getValueOr(llvm::ArrayRef<uint8_t>());
51 UUID MinidumpParser::GetModuleUUID(const minidump::Module *module) {
53 GetData().slice(module->CvRecord.RVA, module->CvRecord.DataSize);
55 // Read the CV record signature
56 const llvm::support::ulittle32_t *signature = nullptr;
57 Status error = consumeObject(cv_record, signature);
61 const CvSignature cv_signature =
62 static_cast<CvSignature>(static_cast<uint32_t>(*signature));
64 if (cv_signature == CvSignature::Pdb70) {
65 const CvRecordPdb70 *pdb70_uuid = nullptr;
66 Status error = consumeObject(cv_record, pdb70_uuid);
70 CvRecordPdb70 swapped;
71 if (!GetArchitecture().GetTriple().isOSBinFormatELF()) {
72 // LLDB's UUID class treats the data as a sequence of bytes, but breakpad
73 // interprets it as a sequence of little-endian fields, which it converts
74 // to big-endian when converting to text. Swap the bytes to big endian so
75 // that the string representation comes out right.
76 swapped = *pdb70_uuid;
77 llvm::sys::swapByteOrder(swapped.Uuid.Data1);
78 llvm::sys::swapByteOrder(swapped.Uuid.Data2);
79 llvm::sys::swapByteOrder(swapped.Uuid.Data3);
80 llvm::sys::swapByteOrder(swapped.Age);
81 pdb70_uuid = &swapped;
83 if (pdb70_uuid->Age != 0)
84 return UUID::fromOptionalData(pdb70_uuid, sizeof(*pdb70_uuid));
85 return UUID::fromOptionalData(&pdb70_uuid->Uuid, sizeof(pdb70_uuid->Uuid));
86 } else if (cv_signature == CvSignature::ElfBuildId)
87 return UUID::fromOptionalData(cv_record);
92 llvm::ArrayRef<minidump::Thread> MinidumpParser::GetThreads() {
93 auto ExpectedThreads = GetMinidumpFile().getThreadList();
95 return *ExpectedThreads;
97 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_THREAD),
98 ExpectedThreads.takeError(),
99 "Failed to read thread list: {0}");
103 llvm::ArrayRef<uint8_t>
104 MinidumpParser::GetThreadContext(const LocationDescriptor &location) {
105 if (location.RVA + location.DataSize > GetData().size())
107 return GetData().slice(location.RVA, location.DataSize);
110 llvm::ArrayRef<uint8_t>
111 MinidumpParser::GetThreadContext(const minidump::Thread &td) {
112 return GetThreadContext(td.Context);
115 llvm::ArrayRef<uint8_t>
116 MinidumpParser::GetThreadContextWow64(const minidump::Thread &td) {
117 // On Windows, a 32-bit process can run on a 64-bit machine under WOW64. If
118 // the minidump was captured with a 64-bit debugger, then the CONTEXT we just
119 // grabbed from the mini_dump_thread is the one for the 64-bit "native"
120 // process rather than the 32-bit "guest" process we care about. In this
121 // case, we can get the 32-bit CONTEXT from the TEB (Thread Environment
122 // Block) of the 64-bit process.
123 auto teb_mem = GetMemory(td.EnvironmentBlock, sizeof(TEB64));
127 const TEB64 *wow64teb;
128 Status error = consumeObject(teb_mem, wow64teb);
132 // Slot 1 of the thread-local storage in the 64-bit TEB points to a structure
133 // that includes the 32-bit CONTEXT (after a ULONG). See:
134 // https://msdn.microsoft.com/en-us/library/ms681670.aspx
136 GetMemory(wow64teb->tls_slots[1] + 4, sizeof(MinidumpContext_x86_32));
137 if (context.size() < sizeof(MinidumpContext_x86_32))
141 // NOTE: We don't currently use the TEB for anything else. If we
142 // need it in the future, the 32-bit TEB is located according to the address
143 // stored in the first slot of the 64-bit TEB (wow64teb.Reserved1[0]).
146 ArchSpec MinidumpParser::GetArchitecture() {
147 if (m_arch.IsValid())
150 // Set the architecture in m_arch
151 llvm::Expected<const SystemInfo &> system_info = m_file->getSystemInfo();
154 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS),
155 system_info.takeError(),
156 "Failed to read SystemInfo stream: {0}");
160 // TODO what to do about big endiand flavors of arm ?
161 // TODO set the arm subarch stuff if the minidump has info about it
164 triple.setVendor(llvm::Triple::VendorType::UnknownVendor);
166 switch (system_info->ProcessorArch) {
167 case ProcessorArchitecture::X86:
168 triple.setArch(llvm::Triple::ArchType::x86);
170 case ProcessorArchitecture::AMD64:
171 triple.setArch(llvm::Triple::ArchType::x86_64);
173 case ProcessorArchitecture::ARM:
174 triple.setArch(llvm::Triple::ArchType::arm);
176 case ProcessorArchitecture::ARM64:
177 case ProcessorArchitecture::BP_ARM64:
178 triple.setArch(llvm::Triple::ArchType::aarch64);
181 triple.setArch(llvm::Triple::ArchType::UnknownArch);
185 // TODO add all of the OSes that Minidump/breakpad distinguishes?
186 switch (system_info->PlatformId) {
187 case OSPlatform::Win32S:
188 case OSPlatform::Win32Windows:
189 case OSPlatform::Win32NT:
190 case OSPlatform::Win32CE:
191 triple.setOS(llvm::Triple::OSType::Win32);
192 triple.setVendor(llvm::Triple::VendorType::PC);
194 case OSPlatform::Linux:
195 triple.setOS(llvm::Triple::OSType::Linux);
197 case OSPlatform::MacOSX:
198 triple.setOS(llvm::Triple::OSType::MacOSX);
199 triple.setVendor(llvm::Triple::Apple);
201 case OSPlatform::IOS:
202 triple.setOS(llvm::Triple::OSType::IOS);
203 triple.setVendor(llvm::Triple::Apple);
205 case OSPlatform::Android:
206 triple.setOS(llvm::Triple::OSType::Linux);
207 triple.setEnvironment(llvm::Triple::EnvironmentType::Android);
210 triple.setOS(llvm::Triple::OSType::UnknownOS);
211 auto ExpectedCSD = m_file->getString(system_info->CSDVersionRVA);
213 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS),
214 ExpectedCSD.takeError(),
215 "Failed to CSD Version string: {0}");
217 if (ExpectedCSD->find("Linux") != std::string::npos)
218 triple.setOS(llvm::Triple::OSType::Linux);
223 m_arch.SetTriple(triple);
227 const MinidumpMiscInfo *MinidumpParser::GetMiscInfo() {
228 llvm::ArrayRef<uint8_t> data = GetStream(StreamType::MiscInfo);
230 if (data.size() == 0)
233 return MinidumpMiscInfo::Parse(data);
236 llvm::Optional<LinuxProcStatus> MinidumpParser::GetLinuxProcStatus() {
237 llvm::ArrayRef<uint8_t> data = GetStream(StreamType::LinuxProcStatus);
239 if (data.size() == 0)
242 return LinuxProcStatus::Parse(data);
245 llvm::Optional<lldb::pid_t> MinidumpParser::GetPid() {
246 const MinidumpMiscInfo *misc_info = GetMiscInfo();
247 if (misc_info != nullptr) {
248 return misc_info->GetPid();
251 llvm::Optional<LinuxProcStatus> proc_status = GetLinuxProcStatus();
252 if (proc_status.hasValue()) {
253 return proc_status->GetPid();
259 llvm::ArrayRef<minidump::Module> MinidumpParser::GetModuleList() {
260 auto ExpectedModules = GetMinidumpFile().getModuleList();
262 return *ExpectedModules;
264 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES),
265 ExpectedModules.takeError(),
266 "Failed to read module list: {0}");
270 std::vector<const minidump::Module *> MinidumpParser::GetFilteredModuleList() {
271 Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES);
272 auto ExpectedModules = GetMinidumpFile().getModuleList();
273 if (!ExpectedModules) {
274 LLDB_LOG_ERROR(log, ExpectedModules.takeError(),
275 "Failed to read module list: {0}");
279 // map module_name -> filtered_modules index
280 typedef llvm::StringMap<size_t> MapType;
281 MapType module_name_to_filtered_index;
283 std::vector<const minidump::Module *> filtered_modules;
285 for (const auto &module : *ExpectedModules) {
286 auto ExpectedName = m_file->getString(module.ModuleNameRVA);
288 LLDB_LOG_ERROR(log, ExpectedName.takeError(),
289 "Failed to get module name: {0}");
293 MapType::iterator iter;
295 // See if we have inserted this module aready into filtered_modules. If we
296 // haven't insert an entry into module_name_to_filtered_index with the
297 // index where we will insert it if it isn't in the vector already.
298 std::tie(iter, inserted) = module_name_to_filtered_index.try_emplace(
299 *ExpectedName, filtered_modules.size());
302 // This module has not been seen yet, insert it into filtered_modules at
303 // the index that was inserted into module_name_to_filtered_index using
304 // "filtered_modules.size()" above.
305 filtered_modules.push_back(&module);
307 // This module has been seen. Modules are sometimes mentioned multiple
308 // times when they are mapped discontiguously, so find the module with
309 // the lowest "base_of_image" and use that as the filtered module.
310 auto dup_module = filtered_modules[iter->second];
311 if (module.BaseOfImage < dup_module->BaseOfImage)
312 filtered_modules[iter->second] = &module;
315 return filtered_modules;
318 const minidump::ExceptionStream *MinidumpParser::GetExceptionStream() {
319 auto ExpectedStream = GetMinidumpFile().getExceptionStream();
321 return &*ExpectedStream;
323 LLDB_LOG_ERROR(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS),
324 ExpectedStream.takeError(),
325 "Failed to read minidump exception stream: {0}");
329 llvm::Optional<minidump::Range>
330 MinidumpParser::FindMemoryRange(lldb::addr_t addr) {
331 llvm::ArrayRef<uint8_t> data64 = GetStream(StreamType::Memory64List);
332 Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES);
334 auto ExpectedMemory = GetMinidumpFile().getMemoryList();
335 if (!ExpectedMemory) {
336 LLDB_LOG_ERROR(log, ExpectedMemory.takeError(),
337 "Failed to read memory list: {0}");
339 for (const auto &memory_desc : *ExpectedMemory) {
340 const LocationDescriptor &loc_desc = memory_desc.Memory;
341 const lldb::addr_t range_start = memory_desc.StartOfMemoryRange;
342 const size_t range_size = loc_desc.DataSize;
344 if (loc_desc.RVA + loc_desc.DataSize > GetData().size())
347 if (range_start <= addr && addr < range_start + range_size) {
348 auto ExpectedSlice = GetMinidumpFile().getRawData(loc_desc);
349 if (!ExpectedSlice) {
350 LLDB_LOG_ERROR(log, ExpectedSlice.takeError(),
351 "Failed to get memory slice: {0}");
354 return minidump::Range(range_start, *ExpectedSlice);
359 // Some Minidumps have a Memory64ListStream that captures all the heap memory
360 // (full-memory Minidumps). We can't exactly use the same loop as above,
361 // because the Minidump uses slightly different data structures to describe
364 if (!data64.empty()) {
365 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
367 std::tie(memory64_list, base_rva) =
368 MinidumpMemoryDescriptor64::ParseMemory64List(data64);
370 if (memory64_list.empty())
373 for (const auto &memory_desc64 : memory64_list) {
374 const lldb::addr_t range_start = memory_desc64.start_of_memory_range;
375 const size_t range_size = memory_desc64.data_size;
377 if (base_rva + range_size > GetData().size())
380 if (range_start <= addr && addr < range_start + range_size) {
381 return minidump::Range(range_start,
382 GetData().slice(base_rva, range_size));
384 base_rva += range_size;
391 llvm::ArrayRef<uint8_t> MinidumpParser::GetMemory(lldb::addr_t addr,
393 // I don't have a sense of how frequently this is called or how many memory
394 // ranges a Minidump typically has, so I'm not sure if searching for the
395 // appropriate range linearly each time is stupid. Perhaps we should build
396 // an index for faster lookups.
397 llvm::Optional<minidump::Range> range = FindMemoryRange(addr);
401 // There's at least some overlap between the beginning of the desired range
402 // (addr) and the current range. Figure out where the overlap begins and how
403 // much overlap there is.
405 const size_t offset = addr - range->start;
407 if (addr < range->start || offset >= range->range_ref.size())
410 const size_t overlap = std::min(size, range->range_ref.size() - offset);
411 return range->range_ref.slice(offset, overlap);
415 CreateRegionsCacheFromLinuxMaps(MinidumpParser &parser,
416 std::vector<MemoryRegionInfo> ®ions) {
417 auto data = parser.GetStream(StreamType::LinuxMaps);
420 ParseLinuxMapRegions(llvm::toStringRef(data),
421 [&](const lldb_private::MemoryRegionInfo ®ion,
422 const lldb_private::Status &status) -> bool {
423 if (status.Success())
424 regions.push_back(region);
427 return !regions.empty();
431 CreateRegionsCacheFromMemoryInfoList(MinidumpParser &parser,
432 std::vector<MemoryRegionInfo> ®ions) {
433 Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES);
434 auto ExpectedInfo = parser.GetMinidumpFile().getMemoryInfoList();
436 LLDB_LOG_ERROR(log, ExpectedInfo.takeError(),
437 "Failed to read memory info list: {0}");
440 constexpr auto yes = MemoryRegionInfo::eYes;
441 constexpr auto no = MemoryRegionInfo::eNo;
442 for (const MemoryInfo &entry : *ExpectedInfo) {
443 MemoryRegionInfo region;
444 region.GetRange().SetRangeBase(entry.BaseAddress);
445 region.GetRange().SetByteSize(entry.RegionSize);
447 MemoryProtection prot = entry.Protect;
448 region.SetReadable(bool(prot & MemoryProtection::NoAccess) ? no : yes);
450 bool(prot & (MemoryProtection::ReadWrite | MemoryProtection::WriteCopy |
451 MemoryProtection::ExecuteReadWrite |
452 MemoryProtection::ExeciteWriteCopy))
455 region.SetExecutable(
456 bool(prot & (MemoryProtection::Execute | MemoryProtection::ExecuteRead |
457 MemoryProtection::ExecuteReadWrite |
458 MemoryProtection::ExeciteWriteCopy))
461 region.SetMapped(entry.State != MemoryState::Free ? yes : no);
462 regions.push_back(region);
464 return !regions.empty();
468 CreateRegionsCacheFromMemoryList(MinidumpParser &parser,
469 std::vector<MemoryRegionInfo> ®ions) {
470 Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_MODULES);
471 auto ExpectedMemory = parser.GetMinidumpFile().getMemoryList();
472 if (!ExpectedMemory) {
473 LLDB_LOG_ERROR(log, ExpectedMemory.takeError(),
474 "Failed to read memory list: {0}");
477 regions.reserve(ExpectedMemory->size());
478 for (const MemoryDescriptor &memory_desc : *ExpectedMemory) {
479 if (memory_desc.Memory.DataSize == 0)
481 MemoryRegionInfo region;
482 region.GetRange().SetRangeBase(memory_desc.StartOfMemoryRange);
483 region.GetRange().SetByteSize(memory_desc.Memory.DataSize);
484 region.SetReadable(MemoryRegionInfo::eYes);
485 region.SetMapped(MemoryRegionInfo::eYes);
486 regions.push_back(region);
488 regions.shrink_to_fit();
489 return !regions.empty();
493 CreateRegionsCacheFromMemory64List(MinidumpParser &parser,
494 std::vector<MemoryRegionInfo> ®ions) {
495 llvm::ArrayRef<uint8_t> data =
496 parser.GetStream(StreamType::Memory64List);
499 llvm::ArrayRef<MinidumpMemoryDescriptor64> memory64_list;
501 std::tie(memory64_list, base_rva) =
502 MinidumpMemoryDescriptor64::ParseMemory64List(data);
504 if (memory64_list.empty())
507 regions.reserve(memory64_list.size());
508 for (const auto &memory_desc : memory64_list) {
509 if (memory_desc.data_size == 0)
511 MemoryRegionInfo region;
512 region.GetRange().SetRangeBase(memory_desc.start_of_memory_range);
513 region.GetRange().SetByteSize(memory_desc.data_size);
514 region.SetReadable(MemoryRegionInfo::eYes);
515 region.SetMapped(MemoryRegionInfo::eYes);
516 regions.push_back(region);
518 regions.shrink_to_fit();
519 return !regions.empty();
522 std::pair<MemoryRegionInfos, bool> MinidumpParser::BuildMemoryRegions() {
523 // We create the region cache using the best source. We start with
524 // the linux maps since they are the most complete and have names for the
525 // regions. Next we try the MemoryInfoList since it has
526 // read/write/execute/map data, and then fall back to the MemoryList and
527 // Memory64List to just get a list of the memory that is mapped in this
529 MemoryRegionInfos result;
530 const auto &return_sorted = [&](bool is_complete) {
532 return std::make_pair(std::move(result), is_complete);
534 if (CreateRegionsCacheFromLinuxMaps(*this, result))
535 return return_sorted(true);
536 if (CreateRegionsCacheFromMemoryInfoList(*this, result))
537 return return_sorted(true);
538 if (CreateRegionsCacheFromMemoryList(*this, result))
539 return return_sorted(false);
540 CreateRegionsCacheFromMemory64List(*this, result);
541 return return_sorted(false);
544 #define ENUM_TO_CSTR(ST) \
545 case StreamType::ST: \
549 MinidumpParser::GetStreamTypeAsString(StreamType stream_type) {
550 switch (stream_type) {
551 ENUM_TO_CSTR(Unused);
552 ENUM_TO_CSTR(ThreadList);
553 ENUM_TO_CSTR(ModuleList);
554 ENUM_TO_CSTR(MemoryList);
555 ENUM_TO_CSTR(Exception);
556 ENUM_TO_CSTR(SystemInfo);
557 ENUM_TO_CSTR(ThreadExList);
558 ENUM_TO_CSTR(Memory64List);
559 ENUM_TO_CSTR(CommentA);
560 ENUM_TO_CSTR(CommentW);
561 ENUM_TO_CSTR(HandleData);
562 ENUM_TO_CSTR(FunctionTable);
563 ENUM_TO_CSTR(UnloadedModuleList);
564 ENUM_TO_CSTR(MiscInfo);
565 ENUM_TO_CSTR(MemoryInfoList);
566 ENUM_TO_CSTR(ThreadInfoList);
567 ENUM_TO_CSTR(HandleOperationList);
569 ENUM_TO_CSTR(JavascriptData);
570 ENUM_TO_CSTR(SystemMemoryInfo);
571 ENUM_TO_CSTR(ProcessVMCounters);
572 ENUM_TO_CSTR(LastReserved);
573 ENUM_TO_CSTR(BreakpadInfo);
574 ENUM_TO_CSTR(AssertionInfo);
575 ENUM_TO_CSTR(LinuxCPUInfo);
576 ENUM_TO_CSTR(LinuxProcStatus);
577 ENUM_TO_CSTR(LinuxLSBRelease);
578 ENUM_TO_CSTR(LinuxCMDLine);
579 ENUM_TO_CSTR(LinuxEnviron);
580 ENUM_TO_CSTR(LinuxAuxv);
581 ENUM_TO_CSTR(LinuxMaps);
582 ENUM_TO_CSTR(LinuxDSODebug);
583 ENUM_TO_CSTR(LinuxProcStat);
584 ENUM_TO_CSTR(LinuxProcUptime);
585 ENUM_TO_CSTR(LinuxProcFD);
586 ENUM_TO_CSTR(FacebookAppCustomData);
587 ENUM_TO_CSTR(FacebookBuildID);
588 ENUM_TO_CSTR(FacebookAppVersionName);
589 ENUM_TO_CSTR(FacebookJavaStack);
590 ENUM_TO_CSTR(FacebookDalvikInfo);
591 ENUM_TO_CSTR(FacebookUnwindSymbols);
592 ENUM_TO_CSTR(FacebookDumpErrorLog);
593 ENUM_TO_CSTR(FacebookAppStateLog);
594 ENUM_TO_CSTR(FacebookAbortReason);
595 ENUM_TO_CSTR(FacebookThreadName);
596 ENUM_TO_CSTR(FacebookLogcat);
598 return "unknown stream type";