contrib/llvm/tools/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp

   1 //===-- ObjectFileBreakpad.cpp -------------------------------- -*- C++ -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 #include "Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h"
  11 #include "lldb/Core/ModuleSpec.h"
  12 #include "lldb/Core/PluginManager.h"
  13 #include "lldb/Core/Section.h"
  14 #include "lldb/Utility/DataBuffer.h"
  15 #include "llvm/ADT/StringExtras.h"
  16
  17 using namespace lldb;
  18 using namespace lldb_private;
  19 using namespace lldb_private::breakpad;
  20
  21 namespace {
  22 struct Header {
  23   ArchSpec arch;
  24   UUID uuid;
  25   static llvm::Optional<Header> parse(llvm::StringRef text);
  26 };
  27
  28 enum class Token { Unknown, Module, Info, File, Func, Public, Stack };
  29 } // namespace
  30
  31 static Token toToken(llvm::StringRef str) {
  32   return llvm::StringSwitch<Token>(str)
  33       .Case("MODULE", Token::Module)
  34       .Case("INFO", Token::Info)
  35       .Case("FILE", Token::File)
  36       .Case("FUNC", Token::Func)
  37       .Case("PUBLIC", Token::Public)
  38       .Case("STACK", Token::Stack)
  39       .Default(Token::Unknown);
  40 }
  41
  42 static llvm::StringRef toString(Token t) {
  43   switch (t) {
  44   case Token::Unknown:
  45     return "";
  46   case Token::Module:
  47     return "MODULE";
  48   case Token::Info:
  49     return "INFO";
  50   case Token::File:
  51     return "FILE";
  52   case Token::Func:
  53     return "FUNC";
  54   case Token::Public:
  55     return "PUBLIC";
  56   case Token::Stack:
  57     return "STACK";
  58   }
  59   llvm_unreachable("Unknown token!");
  60 }
  61
  62 static llvm::Triple::OSType toOS(llvm::StringRef str) {
  63   using llvm::Triple;
  64   return llvm::StringSwitch<Triple::OSType>(str)
  65       .Case("Linux", Triple::Linux)
  66       .Case("mac", Triple::MacOSX)
  67       .Case("windows", Triple::Win32)
  68       .Default(Triple::UnknownOS);
  69 }
  70
  71 static llvm::Triple::ArchType toArch(llvm::StringRef str) {
  72   using llvm::Triple;
  73   return llvm::StringSwitch<Triple::ArchType>(str)
  74       .Case("arm", Triple::arm)
  75       .Case("arm64", Triple::aarch64)
  76       .Case("mips", Triple::mips)
  77       .Case("ppc", Triple::ppc)
  78       .Case("ppc64", Triple::ppc64)
  79       .Case("s390", Triple::systemz)
  80       .Case("sparc", Triple::sparc)
  81       .Case("sparcv9", Triple::sparcv9)
  82       .Case("x86", Triple::x86)
  83       .Case("x86_64", Triple::x86_64)
  84       .Default(Triple::UnknownArch);
  85 }
  86
  87 static llvm::StringRef consume_front(llvm::StringRef &str, size_t n) {
  88   llvm::StringRef result = str.take_front(n);
  89   str = str.drop_front(n);
  90   return result;
  91 }
  92
  93 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
  94   struct uuid_data {
  95     llvm::support::ulittle32_t uuid1;
  96     llvm::support::ulittle16_t uuid2[2];
  97     uint8_t uuid3[8];
  98     llvm::support::ulittle32_t age;
  99   } data;
 100   static_assert(sizeof(data) == 20, "");
 101   // The textual module id encoding should be between 33 and 40 bytes long,
 102   // depending on the size of the age field, which is of variable length.
 103   // The first three chunks of the id are encoded in big endian, so we need to
 104   // byte-swap those.
 105   if (str.size() < 33 || str.size() > 40)
 106     return UUID();
 107   uint32_t t;
 108   if (to_integer(consume_front(str, 8), t, 16))
 109     data.uuid1 = t;
 110   else
 111     return UUID();
 112   for (int i = 0; i < 2; ++i) {
 113     if (to_integer(consume_front(str, 4), t, 16))
 114       data.uuid2[i] = t;
 115     else
 116       return UUID();
 117   }
 118   for (int i = 0; i < 8; ++i) {
 119     if (!to_integer(consume_front(str, 2), data.uuid3[i], 16))
 120       return UUID();
 121   }
 122   if (to_integer(str, t, 16))
 123     data.age = t;
 124   else
 125     return UUID();
 126
 127   // On non-windows, the age field should always be zero, so we don't include to
 128   // match the native uuid format of these platforms.
 129   return UUID::fromData(&data, os == llvm::Triple::Win32 ? 20 : 16);
 130 }
 131
 132 llvm::Optional<Header> Header::parse(llvm::StringRef text) {
 133   // A valid module should start with something like:
 134   // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
 135   // optionally followed by
 136   // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
 137   llvm::StringRef token, line;
 138   std::tie(line, text) = text.split('\n');
 139   std::tie(token, line) = getToken(line);
 140   if (toToken(token) != Token::Module)
 141     return llvm::None;
 142
 143   std::tie(token, line) = getToken(line);
 144   llvm::Triple triple;
 145   triple.setOS(toOS(token));
 146   if (triple.getOS() == llvm::Triple::UnknownOS)
 147     return llvm::None;
 148
 149   std::tie(token, line) = getToken(line);
 150   triple.setArch(toArch(token));
 151   if (triple.getArch() == llvm::Triple::UnknownArch)
 152     return llvm::None;
 153
 154   llvm::StringRef module_id;
 155   std::tie(module_id, line) = getToken(line);
 156
 157   std::tie(line, text) = text.split('\n');
 158   std::tie(token, line) = getToken(line);
 159   if (token == "INFO") {
 160     std::tie(token, line) = getToken(line);
 161     if (token != "CODE_ID")
 162       return llvm::None;
 163
 164     std::tie(token, line) = getToken(line);
 165     // If we don't have any text following the code id (e.g. on linux), we
 166     // should use the module id as UUID. Otherwise, we revert back to the module
 167     // id.
 168     if (line.trim().empty()) {
 169       UUID uuid;
 170       if (uuid.SetFromStringRef(token, token.size() / 2) != token.size())
 171         return llvm::None;
 172
 173       return Header{ArchSpec(triple), uuid};
 174     }
 175   }
 176
 177   // We reach here if we don't have a INFO CODE_ID section, or we chose not to
 178   // use it. In either case, we need to properly decode the module id, whose
 179   // fields are encoded in big-endian.
 180   UUID uuid = parseModuleId(triple.getOS(), module_id);
 181   if (!uuid)
 182     return llvm::None;
 183
 184   return Header{ArchSpec(triple), uuid};
 185 }
 186
 187 void ObjectFileBreakpad::Initialize() {
 188   PluginManager::RegisterPlugin(GetPluginNameStatic(),
 189                                 GetPluginDescriptionStatic(), CreateInstance,
 190                                 CreateMemoryInstance, GetModuleSpecifications);
 191 }
 192
 193 void ObjectFileBreakpad::Terminate() {
 194   PluginManager::UnregisterPlugin(CreateInstance);
 195 }
 196
 197 ConstString ObjectFileBreakpad::GetPluginNameStatic() {
 198   static ConstString g_name("breakpad");
 199   return g_name;
 200 }
 201
 202 ObjectFile *ObjectFileBreakpad::CreateInstance(
 203     const ModuleSP &module_sp, DataBufferSP &data_sp, offset_t data_offset,
 204     const FileSpec *file, offset_t file_offset, offset_t length) {
 205   if (!data_sp) {
 206     data_sp = MapFileData(*file, length, file_offset);
 207     if (!data_sp)
 208       return nullptr;
 209     data_offset = 0;
 210   }
 211   auto text = toStringRef(data_sp->GetData());
 212   llvm::Optional<Header> header = Header::parse(text);
 213   if (!header)
 214     return nullptr;
 215
 216   // Update the data to contain the entire file if it doesn't already
 217   if (data_sp->GetByteSize() < length) {
 218     data_sp = MapFileData(*file, length, file_offset);
 219     if (!data_sp)
 220       return nullptr;
 221     data_offset = 0;
 222   }
 223
 224   return new ObjectFileBreakpad(module_sp, data_sp, data_offset, file,
 225                                 file_offset, length, std::move(header->arch),
 226                                 std::move(header->uuid));
 227 }
 228
 229 ObjectFile *ObjectFileBreakpad::CreateMemoryInstance(
 230     const ModuleSP &module_sp, DataBufferSP &data_sp,
 231     const ProcessSP &process_sp, addr_t header_addr) {
 232   return nullptr;
 233 }
 234
 235 size_t ObjectFileBreakpad::GetModuleSpecifications(
 236     const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
 237     offset_t file_offset, offset_t length, ModuleSpecList &specs) {
 238   auto text = toStringRef(data_sp->GetData());
 239   llvm::Optional<Header> header = Header::parse(text);
 240   if (!header)
 241     return 0;
 242   ModuleSpec spec(file, std::move(header->arch));
 243   spec.GetUUID() = std::move(header->uuid);
 244   specs.Append(spec);
 245   return 1;
 246 }
 247
 248 ObjectFileBreakpad::ObjectFileBreakpad(const ModuleSP &module_sp,
 249                                        DataBufferSP &data_sp,
 250                                        offset_t data_offset,
 251                                        const FileSpec *file, offset_t offset,
 252                                        offset_t length, ArchSpec arch,
 253                                        UUID uuid)
 254     : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
 255       m_arch(std::move(arch)), m_uuid(std::move(uuid)) {}
 256
 257 bool ObjectFileBreakpad::ParseHeader() {
 258   // We already parsed the header during initialization.
 259   return true;
 260 }
 261
 262 Symtab *ObjectFileBreakpad::GetSymtab() {
 263   // TODO
 264   return nullptr;
 265 }
 266
 267 bool ObjectFileBreakpad::GetUUID(UUID *uuid) {
 268   *uuid = m_uuid;
 269   return true;
 270 }
 271
 272 void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
 273   if (m_sections_ap)
 274     return;
 275   m_sections_ap = llvm::make_unique<SectionList>();
 276
 277   Token current_section = Token::Unknown;
 278   offset_t section_start;
 279   llvm::StringRef text = toStringRef(m_data.GetData());
 280   uint32_t next_section_id = 1;
 281   auto maybe_add_section = [&](const uint8_t *end_ptr) {
 282     if (current_section == Token::Unknown)
 283       return; // We have been called before parsing the first line.
 284
 285     offset_t end_offset = end_ptr - m_data.GetDataStart();
 286     auto section_sp = std::make_shared<Section>(
 287         GetModule(), this, next_section_id++,
 288         ConstString(toString(current_section)), eSectionTypeOther,
 289         /*file_vm_addr*/ 0, /*vm_size*/ 0, section_start,
 290         end_offset - section_start, /*log2align*/ 0, /*flags*/ 0);
 291     m_sections_ap->AddSection(section_sp);
 292     unified_section_list.AddSection(section_sp);
 293   };
 294   while (!text.empty()) {
 295     llvm::StringRef line;
 296     std::tie(line, text) = text.split('\n');
 297
 298     Token token = toToken(getToken(line).first);
 299     if (token == Token::Unknown) {
 300       // We assume this is a line record, which logically belongs to the Func
 301       // section. Errors will be handled when parsing the Func section.
 302       token = Token::Func;
 303     }
 304     if (token == current_section)
 305       continue;
 306
 307     // Changing sections, finish off the previous one, if there was any.
 308     maybe_add_section(line.bytes_begin());
 309     // And start a new one.
 310     current_section = token;
 311     section_start = line.bytes_begin() - m_data.GetDataStart();
 312   }
 313   // Finally, add the last section.
 314   maybe_add_section(m_data.GetDataEnd());
 315 }