contrib/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp

   1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
   2 //
   3 //                             The LLVM Linker
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9
  10 ///
  11 /// \file Converts from in-memory normalized mach-o to in-memory Atoms.
  12 ///
  13 ///                  +------------+
  14 ///                  | normalized |
  15 ///                  +------------+
  16 ///                        |
  17 ///                        |
  18 ///                        v
  19 ///                    +-------+
  20 ///                    | Atoms |
  21 ///                    +-------+
  22
  23 #include "ArchHandler.h"
  24 #include "Atoms.h"
  25 #include "File.h"
  26 #include "MachONormalizedFile.h"
  27 #include "MachONormalizedFileBinaryUtils.h"
  28 #include "lld/Common/LLVM.h"
  29 #include "lld/Core/Error.h"
  30 #include "llvm/BinaryFormat/Dwarf.h"
  31 #include "llvm/BinaryFormat/MachO.h"
  32 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
  33 #include "llvm/Support/DataExtractor.h"
  34 #include "llvm/Support/Debug.h"
  35 #include "llvm/Support/Error.h"
  36 #include "llvm/Support/Format.h"
  37 #include "llvm/Support/LEB128.h"
  38 #include "llvm/Support/raw_ostream.h"
  39
  40 using namespace llvm::MachO;
  41 using namespace lld::mach_o::normalized;
  42
  43 #define DEBUG_TYPE "normalized-file-to-atoms"
  44
  45 namespace lld {
  46 namespace mach_o {
  47
  48
  49 namespace { // anonymous
  50
  51
  52 #define ENTRY(seg, sect, type, atomType) \
  53   {seg, sect, type, DefinedAtom::atomType }
  54
  55 struct MachORelocatableSectionToAtomType {
  56   StringRef                 segmentName;
  57   StringRef                 sectionName;
  58   SectionType               sectionType;
  59   DefinedAtom::ContentType  atomType;
  60 };
  61
  62 const MachORelocatableSectionToAtomType sectsToAtomType[] = {
  63   ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
  64   ENTRY("__TEXT", "__text",           S_REGULAR,          typeResolver),
  65   ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
  66   ENTRY("",       "",                 S_CSTRING_LITERALS, typeCString),
  67   ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
  68   ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
  69   ENTRY("__TEXT", "__const_coal",     S_COALESCED,        typeConstant),
  70   ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
  71   ENTRY("__TEXT", "__eh_frame",       S_REGULAR,          typeCFI),
  72   ENTRY("__TEXT", "__literal4",       S_4BYTE_LITERALS,   typeLiteral4),
  73   ENTRY("__TEXT", "__literal8",       S_8BYTE_LITERALS,   typeLiteral8),
  74   ENTRY("__TEXT", "__literal16",      S_16BYTE_LITERALS,  typeLiteral16),
  75   ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
  76   ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
  77   ENTRY("__DATA", "__datacoal_nt",    S_COALESCED,        typeData),
  78   ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
  79   ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
  80   ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
  81                                                           typeInitializerPtr),
  82   ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
  83                                                           typeTerminatorPtr),
  84   ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
  85                                                           typeGOT),
  86   ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
  87   ENTRY("",       "",                 S_NON_LAZY_SYMBOL_POINTERS,
  88                                                           typeGOT),
  89   ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
  90   ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
  91                                                           typeThunkTLV),
  92   ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
  93   ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
  94                                                         typeTLVInitialZeroFill),
  95   ENTRY("__DATA", "__objc_imageinfo", S_REGULAR,          typeObjCImageInfo),
  96   ENTRY("__DATA", "__objc_catlist",   S_REGULAR,          typeObjC2CategoryList),
  97   ENTRY("",       "",                 S_INTERPOSING,      typeInterposingTuples),
  98   ENTRY("__LD",   "__compact_unwind", S_REGULAR,
  99                                                          typeCompactUnwindInfo),
 100   ENTRY("",       "",                 S_REGULAR,          typeUnknown)
 101 };
 102 #undef ENTRY
 103
 104
 105 /// Figures out ContentType of a mach-o section.
 106 DefinedAtom::ContentType atomTypeFromSection(const Section &section,
 107                                              bool &customSectionName) {
 108   // First look for match of name and type. Empty names in table are wildcards.
 109   customSectionName = false;
 110   for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
 111                                  p->atomType != DefinedAtom::typeUnknown; ++p) {
 112     if (p->sectionType != section.type)
 113       continue;
 114     if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
 115       continue;
 116     if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
 117       continue;
 118     customSectionName = p->segmentName.empty() && p->sectionName.empty();
 119     return p->atomType;
 120   }
 121   // Look for code denoted by section attributes
 122   if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
 123     return DefinedAtom::typeCode;
 124
 125   return DefinedAtom::typeUnknown;
 126 }
 127
 128 enum AtomizeModel {
 129   atomizeAtSymbols,
 130   atomizeFixedSize,
 131   atomizePointerSize,
 132   atomizeUTF8,
 133   atomizeUTF16,
 134   atomizeCFI,
 135   atomizeCU,
 136   atomizeCFString
 137 };
 138
 139 /// Returns info on how to atomize a section of the specified ContentType.
 140 void sectionParseInfo(DefinedAtom::ContentType atomType,
 141                       unsigned int &sizeMultiple,
 142                       DefinedAtom::Scope &scope,
 143                       DefinedAtom::Merge &merge,
 144                       AtomizeModel &atomizeModel) {
 145   struct ParseInfo {
 146     DefinedAtom::ContentType  atomType;
 147     unsigned int              sizeMultiple;
 148     DefinedAtom::Scope        scope;
 149     DefinedAtom::Merge        merge;
 150     AtomizeModel              atomizeModel;
 151   };
 152
 153   #define ENTRY(type, size, scope, merge, model) \
 154     {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
 155
 156   static const ParseInfo parseInfo[] = {
 157     ENTRY(typeCode,              1, scopeGlobal,          mergeNo,
 158                                                             atomizeAtSymbols),
 159     ENTRY(typeData,              1, scopeGlobal,          mergeNo,
 160                                                             atomizeAtSymbols),
 161     ENTRY(typeConstData,         1, scopeGlobal,          mergeNo,
 162                                                             atomizeAtSymbols),
 163     ENTRY(typeZeroFill,          1, scopeGlobal,          mergeNo,
 164                                                             atomizeAtSymbols),
 165     ENTRY(typeConstant,          1, scopeGlobal,          mergeNo,
 166                                                             atomizeAtSymbols),
 167     ENTRY(typeCString,           1, scopeLinkageUnit,     mergeByContent,
 168                                                             atomizeUTF8),
 169     ENTRY(typeUTF16String,       1, scopeLinkageUnit,     mergeByContent,
 170                                                             atomizeUTF16),
 171     ENTRY(typeCFI,               4, scopeTranslationUnit, mergeNo,
 172                                                             atomizeCFI),
 173     ENTRY(typeLiteral4,          4, scopeLinkageUnit,     mergeByContent,
 174                                                             atomizeFixedSize),
 175     ENTRY(typeLiteral8,          8, scopeLinkageUnit,     mergeByContent,
 176                                                             atomizeFixedSize),
 177     ENTRY(typeLiteral16,        16, scopeLinkageUnit,     mergeByContent,
 178                                                             atomizeFixedSize),
 179     ENTRY(typeCFString,          4, scopeLinkageUnit,     mergeByContent,
 180                                                             atomizeCFString),
 181     ENTRY(typeInitializerPtr,    4, scopeTranslationUnit, mergeNo,
 182                                                             atomizePointerSize),
 183     ENTRY(typeTerminatorPtr,     4, scopeTranslationUnit, mergeNo,
 184                                                             atomizePointerSize),
 185     ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
 186                                                             atomizeCU),
 187     ENTRY(typeGOT,               4, scopeLinkageUnit,     mergeByContent,
 188                                                             atomizePointerSize),
 189     ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
 190                                                             atomizePointerSize),
 191     ENTRY(typeUnknown,           1, scopeGlobal,          mergeNo,
 192                                                             atomizeAtSymbols)
 193   };
 194   #undef ENTRY
 195   const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
 196   for (int i=0; i < tableLen; ++i) {
 197     if (parseInfo[i].atomType == atomType) {
 198       sizeMultiple = parseInfo[i].sizeMultiple;
 199       scope        = parseInfo[i].scope;
 200       merge        = parseInfo[i].merge;
 201       atomizeModel = parseInfo[i].atomizeModel;
 202       return;
 203     }
 204   }
 205
 206   // Unknown type is atomized by symbols.
 207   sizeMultiple = 1;
 208   scope = DefinedAtom::scopeGlobal;
 209   merge = DefinedAtom::mergeNo;
 210   atomizeModel = atomizeAtSymbols;
 211 }
 212
 213
 214 Atom::Scope atomScope(uint8_t scope) {
 215   switch (scope) {
 216   case N_EXT:
 217     return Atom::scopeGlobal;
 218   case N_PEXT:
 219   case N_PEXT | N_EXT:
 220     return Atom::scopeLinkageUnit;
 221   case 0:
 222     return Atom::scopeTranslationUnit;
 223   }
 224   llvm_unreachable("unknown scope value!");
 225 }
 226
 227 void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
 228                             uint32_t sectionIndex,
 229                             SmallVector<const Symbol *, 64> &outSyms) {
 230   for (const Symbol &sym : inSymbols) {
 231     // Only look at definition symbols.
 232     if ((sym.type & N_TYPE) != N_SECT)
 233       continue;
 234     if (sym.sect != sectionIndex)
 235       continue;
 236     outSyms.push_back(&sym);
 237   }
 238 }
 239
 240 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
 241                     MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
 242                     uint16_t symbolDescFlags, Atom::Scope symbolScope,
 243                     uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
 244   // Mach-O symbol table does have size in it. Instead the size is the
 245   // difference between this and the next symbol.
 246   uint64_t size = nextSymbolAddr - symbolAddr;
 247   uint64_t offset = symbolAddr - section.address;
 248   bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
 249   if (isZeroFillSection(section.type)) {
 250     file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
 251                                 noDeadStrip, copyRefs, &section);
 252   } else {
 253     DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
 254                               ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
 255     bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
 256     if (atomType == DefinedAtom::typeUnknown) {
 257       // Mach-O needs a segment and section name.  Concatentate those two
 258       // with a / separator (e.g. "seg/sect") to fit into the lld model
 259       // of just a section name.
 260       std::string segSectName = section.segmentName.str()
 261                                 + "/" + section.sectionName.str();
 262       file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
 263                                          merge, thumb, noDeadStrip, offset,
 264                                          size, segSectName, true, &section);
 265     } else {
 266       if ((atomType == lld::DefinedAtom::typeCode) &&
 267           (symbolDescFlags & N_SYMBOL_RESOLVER)) {
 268         atomType = lld::DefinedAtom::typeResolver;
 269       }
 270       file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
 271                           offset, size, thumb, noDeadStrip, copyRefs, &section);
 272     }
 273   }
 274 }
 275
 276 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
 277                                    const Section &section,
 278                                    const NormalizedFile &normalizedFile,
 279                                    MachOFile &file, bool scatterable,
 280                                    bool copyRefs) {
 281   // Find section's index.
 282   uint32_t sectIndex = 1;
 283   for (auto &sect : normalizedFile.sections) {
 284     if (&sect == &section)
 285       break;
 286     ++sectIndex;
 287   }
 288
 289   // Find all symbols in this section.
 290   SmallVector<const Symbol *, 64> symbols;
 291   appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
 292   appendSymbolsInSection(normalizedFile.localSymbols,  sectIndex, symbols);
 293
 294   // Sort symbols.
 295   std::sort(symbols.begin(), symbols.end(),
 296             [](const Symbol *lhs, const Symbol *rhs) -> bool {
 297               if (lhs == rhs)
 298                 return false;
 299               // First by address.
 300               uint64_t lhsAddr = lhs->value;
 301               uint64_t rhsAddr = rhs->value;
 302               if (lhsAddr != rhsAddr)
 303                 return lhsAddr < rhsAddr;
 304                // If same address, one is an alias so sort by scope.
 305               Atom::Scope lScope = atomScope(lhs->scope);
 306               Atom::Scope rScope = atomScope(rhs->scope);
 307               if (lScope != rScope)
 308                 return lScope < rScope;
 309               // If same address and scope, see if one might be better as
 310               // the alias.
 311               bool lPrivate = (lhs->name.front() == 'l');
 312               bool rPrivate = (rhs->name.front() == 'l');
 313               if (lPrivate != rPrivate)
 314                 return lPrivate;
 315               // If same address and scope, sort by name.
 316               return lhs->name < rhs->name;
 317             });
 318
 319   // Debug logging of symbols.
 320   //for (const Symbol *sym : symbols)
 321   //  llvm::errs() << "  sym: "
 322   //    << llvm::format("0x%08llx ", (uint64_t)sym->value)
 323   //    << ", " << sym->name << "\n";
 324
 325   // If section has no symbols and no content, there are no atoms.
 326   if (symbols.empty() && section.content.empty())
 327     return llvm::Error::success();
 328
 329   if (symbols.empty()) {
 330     // Section has no symbols, put all content in one anoymous atom.
 331     atomFromSymbol(atomType, section, file, section.address, StringRef(),
 332                   0, Atom::scopeTranslationUnit,
 333                   section.address + section.content.size(),
 334                   scatterable, copyRefs);
 335   }
 336   else if (symbols.front()->value != section.address) {
 337     // Section has anonymous content before first symbol.
 338     atomFromSymbol(atomType, section, file, section.address, StringRef(),
 339                    0, Atom::scopeTranslationUnit, symbols.front()->value,
 340                    scatterable, copyRefs);
 341   }
 342
 343   const Symbol *lastSym = nullptr;
 344   for (const Symbol *sym : symbols) {
 345     if (lastSym != nullptr) {
 346       // Ignore any assembler added "ltmpNNN" symbol at start of section
 347       // if there is another symbol at the start.
 348       if ((lastSym->value != sym->value)
 349           || lastSym->value != section.address
 350           || !lastSym->name.startswith("ltmp")) {
 351         atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
 352                        lastSym->desc, atomScope(lastSym->scope), sym->value,
 353                        scatterable, copyRefs);
 354       }
 355     }
 356     lastSym = sym;
 357   }
 358   if (lastSym != nullptr) {
 359     atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
 360                    lastSym->desc, atomScope(lastSym->scope),
 361                    section.address + section.content.size(),
 362                    scatterable, copyRefs);
 363   }
 364
 365   // If object built without .subsections_via_symbols, add reference chain.
 366   if (!scatterable) {
 367     MachODefinedAtom *prevAtom = nullptr;
 368     file.eachAtomInSection(section,
 369                            [&](MachODefinedAtom *atom, uint64_t offset)->void {
 370       if (prevAtom)
 371         prevAtom->addReference(Reference::KindNamespace::all,
 372                                Reference::KindArch::all,
 373                                Reference::kindLayoutAfter, 0, atom, 0);
 374       prevAtom = atom;
 375     });
 376   }
 377
 378   return llvm::Error::success();
 379 }
 380
 381 llvm::Error processSection(DefinedAtom::ContentType atomType,
 382                            const Section &section,
 383                            bool customSectionName,
 384                            const NormalizedFile &normalizedFile,
 385                            MachOFile &file, bool scatterable,
 386                            bool copyRefs) {
 387   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
 388   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
 389
 390   // Get info on how to atomize section.
 391   unsigned int       sizeMultiple;
 392   DefinedAtom::Scope scope;
 393   DefinedAtom::Merge merge;
 394   AtomizeModel       atomizeModel;
 395   sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
 396
 397   // Validate section size.
 398   if ((section.content.size() % sizeMultiple) != 0)
 399     return llvm::make_error<GenericError>(Twine("Section ")
 400                                           + section.segmentName
 401                                           + "/" + section.sectionName
 402                                           + " has size ("
 403                                           + Twine(section.content.size())
 404                                           + ") which is not a multiple of "
 405                                           + Twine(sizeMultiple));
 406
 407   if (atomizeModel == atomizeAtSymbols) {
 408     // Break section up into atoms each with a fixed size.
 409     return processSymboledSection(atomType, section, normalizedFile, file,
 410                                   scatterable, copyRefs);
 411   } else {
 412     unsigned int size;
 413     for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
 414       switch (atomizeModel) {
 415       case atomizeFixedSize:
 416         // Break section up into atoms each with a fixed size.
 417         size = sizeMultiple;
 418         break;
 419       case atomizePointerSize:
 420         // Break section up into atoms each the size of a pointer.
 421         size = is64 ? 8 : 4;
 422         break;
 423       case atomizeUTF8:
 424         // Break section up into zero terminated c-strings.
 425         size = 0;
 426         for (unsigned int i = offset; i < e; ++i) {
 427           if (section.content[i] == 0) {
 428             size = i + 1 - offset;
 429             break;
 430           }
 431         }
 432         break;
 433       case atomizeUTF16:
 434         // Break section up into zero terminated UTF16 strings.
 435         size = 0;
 436         for (unsigned int i = offset; i < e; i += 2) {
 437           if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
 438             size = i + 2 - offset;
 439             break;
 440           }
 441         }
 442         break;
 443       case atomizeCFI:
 444         // Break section up into dwarf unwind CFIs (FDE or CIE).
 445         size = read32(&section.content[offset], isBig) + 4;
 446         if (offset+size > section.content.size()) {
 447           return llvm::make_error<GenericError>(Twine("Section ")
 448                                                 + section.segmentName
 449                                                 + "/" + section.sectionName
 450                                                 + " is malformed.  Size of CFI "
 451                                                 "starting at offset ("
 452                                                 + Twine(offset)
 453                                                 + ") is past end of section.");
 454         }
 455         break;
 456       case atomizeCU:
 457         // Break section up into compact unwind entries.
 458         size = is64 ? 32 : 20;
 459         break;
 460       case atomizeCFString:
 461         // Break section up into NS/CFString objects.
 462         size = is64 ? 32 : 16;
 463         break;
 464       case atomizeAtSymbols:
 465         break;
 466       }
 467       if (size == 0) {
 468         return llvm::make_error<GenericError>(Twine("Section ")
 469                                               + section.segmentName
 470                                               + "/" + section.sectionName
 471                                               + " is malformed.  The last atom "
 472                                               "is not zero terminated.");
 473       }
 474       if (customSectionName) {
 475         // Mach-O needs a segment and section name.  Concatentate those two
 476         // with a / separator (e.g. "seg/sect") to fit into the lld model
 477         // of just a section name.
 478         std::string segSectName = section.segmentName.str()
 479                                   + "/" + section.sectionName.str();
 480         file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
 481                                            merge, false, false, offset,
 482                                            size, segSectName, true, &section);
 483       } else {
 484         file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
 485                             false, false, copyRefs, &section);
 486       }
 487       offset += size;
 488     }
 489   }
 490   return llvm::Error::success();
 491 }
 492
 493 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
 494                                           uint64_t address) {
 495   for (const Section &s : normalizedFile.sections) {
 496     uint64_t sAddr = s.address;
 497     if ((sAddr <= address) && (address < sAddr+s.content.size())) {
 498       return &s;
 499     }
 500   }
 501   return nullptr;
 502 }
 503
 504 const MachODefinedAtom *
 505 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
 506                         uint64_t addr, Reference::Addend &addend) {
 507   const Section *sect = nullptr;
 508   sect = findSectionCoveringAddress(normalizedFile, addr);
 509   if (!sect)
 510     return nullptr;
 511
 512   uint32_t offsetInTarget;
 513   uint64_t offsetInSect = addr - sect->address;
 514   auto atom =
 515       file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
 516   addend = offsetInTarget;
 517   return atom;
 518 }
 519
 520 // Walks all relocations for a section in a normalized .o file and
 521 // creates corresponding lld::Reference objects.
 522 llvm::Error convertRelocs(const Section &section,
 523                           const NormalizedFile &normalizedFile,
 524                           bool scatterable,
 525                           MachOFile &file,
 526                           ArchHandler &handler) {
 527   // Utility function for ArchHandler to find atom by its address.
 528   auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
 529                          const lld::Atom **atom, Reference::Addend *addend)
 530                          -> llvm::Error {
 531     if (sectIndex > normalizedFile.sections.size())
 532       return llvm::make_error<GenericError>(Twine("out of range section "
 533                                      "index (") + Twine(sectIndex) + ")");
 534     const Section *sect = nullptr;
 535     if (sectIndex == 0) {
 536       sect = findSectionCoveringAddress(normalizedFile, addr);
 537       if (!sect)
 538         return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
 539                                        + ") is not in any section"));
 540     } else {
 541       sect = &normalizedFile.sections[sectIndex-1];
 542     }
 543     uint32_t offsetInTarget;
 544     uint64_t offsetInSect = addr - sect->address;
 545     *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
 546     *addend = offsetInTarget;
 547     return llvm::Error::success();
 548   };
 549
 550   // Utility function for ArchHandler to find atom by its symbol index.
 551   auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
 552                            -> llvm::Error {
 553     // Find symbol from index.
 554     const Symbol *sym = nullptr;
 555     uint32_t numStabs  = normalizedFile.stabsSymbols.size();
 556     uint32_t numLocal  = normalizedFile.localSymbols.size();
 557     uint32_t numGlobal = normalizedFile.globalSymbols.size();
 558     uint32_t numUndef  = normalizedFile.undefinedSymbols.size();
 559     assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
 560     if (symbolIndex < numStabs+numLocal) {
 561       sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
 562     } else if (symbolIndex < numStabs+numLocal+numGlobal) {
 563       sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
 564     } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
 565       sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
 566                                              numGlobal];
 567     } else {
 568       return llvm::make_error<GenericError>(Twine("symbol index (")
 569                                      + Twine(symbolIndex) + ") out of range");
 570     }
 571
 572     // Find atom from symbol.
 573     if ((sym->type & N_TYPE) == N_SECT) {
 574       if (sym->sect > normalizedFile.sections.size())
 575         return llvm::make_error<GenericError>(Twine("symbol section index (")
 576                                         + Twine(sym->sect) + ") out of range ");
 577       const Section &symSection = normalizedFile.sections[sym->sect-1];
 578       uint64_t targetOffsetInSect = sym->value - symSection.address;
 579       MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
 580                                                             targetOffsetInSect);
 581       if (target) {
 582         *result = target;
 583         return llvm::Error::success();
 584       }
 585       return llvm::make_error<GenericError>("no atom found for defined symbol");
 586     } else if ((sym->type & N_TYPE) == N_UNDF) {
 587       const lld::Atom *target = file.findUndefAtom(sym->name);
 588       if (target) {
 589         *result = target;
 590         return llvm::Error::success();
 591       }
 592       return llvm::make_error<GenericError>("no undefined atom found for sym");
 593     } else {
 594       // Search undefs
 595       return llvm::make_error<GenericError>("no atom found for symbol");
 596     }
 597   };
 598
 599   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
 600   // Use old-school iterator so that paired relocations can be grouped.
 601   for (auto it=section.relocations.begin(), e=section.relocations.end();
 602                                                                 it != e; ++it) {
 603     const Relocation &reloc = *it;
 604     // Find atom this relocation is in.
 605     if (reloc.offset > section.content.size())
 606       return llvm::make_error<GenericError>(
 607                                     Twine("r_address (") + Twine(reloc.offset)
 608                                     + ") is larger than section size ("
 609                                     + Twine(section.content.size()) + ")");
 610     uint32_t offsetInAtom;
 611     MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
 612                                                             reloc.offset,
 613                                                             &offsetInAtom);
 614     assert(inAtom && "r_address in range, should have found atom");
 615     uint64_t fixupAddress = section.address + reloc.offset;
 616
 617     const lld::Atom *target = nullptr;
 618     Reference::Addend addend = 0;
 619     Reference::KindValue kind;
 620     if (handler.isPairedReloc(reloc)) {
 621       // Handle paired relocations together.
 622       const Relocation &reloc2 = *++it;
 623       auto relocErr = handler.getPairReferenceInfo(
 624           reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
 625           atomByAddr, atomBySymbol, &kind, &target, &addend);
 626       if (relocErr) {
 627         return handleErrors(std::move(relocErr),
 628                             [&](std::unique_ptr<GenericError> GE) {
 629           return llvm::make_error<GenericError>(
 630             Twine("bad relocation (") + GE->getMessage()
 631              + ") in section "
 632              + section.segmentName + "/" + section.sectionName
 633              + " (r1_address=" + Twine::utohexstr(reloc.offset)
 634              + ", r1_type=" + Twine(reloc.type)
 635              + ", r1_extern=" + Twine(reloc.isExtern)
 636              + ", r1_length=" + Twine((int)reloc.length)
 637              + ", r1_pcrel=" + Twine(reloc.pcRel)
 638              + (!reloc.scattered ? (Twine(", r1_symbolnum=")
 639                                     + Twine(reloc.symbol))
 640                                  : (Twine(", r1_scattered=1, r1_value=")
 641                                     + Twine(reloc.value)))
 642              + ")"
 643              + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
 644              + ", r2_type=" + Twine(reloc2.type)
 645              + ", r2_extern=" + Twine(reloc2.isExtern)
 646              + ", r2_length=" + Twine((int)reloc2.length)
 647              + ", r2_pcrel=" + Twine(reloc2.pcRel)
 648              + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
 649                                      + Twine(reloc2.symbol))
 650                                   : (Twine(", r2_scattered=1, r2_value=")
 651                                      + Twine(reloc2.value)))
 652              + ")" );
 653           });
 654       }
 655     }
 656     else {
 657       // Use ArchHandler to convert relocation record into information
 658       // needed to instantiate an lld::Reference object.
 659       auto relocErr = handler.getReferenceInfo(
 660           reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
 661           atomBySymbol, &kind, &target, &addend);
 662       if (relocErr) {
 663         return handleErrors(std::move(relocErr),
 664                             [&](std::unique_ptr<GenericError> GE) {
 665           return llvm::make_error<GenericError>(
 666             Twine("bad relocation (") + GE->getMessage()
 667              + ") in section "
 668              + section.segmentName + "/" + section.sectionName
 669              + " (r_address=" + Twine::utohexstr(reloc.offset)
 670              + ", r_type=" + Twine(reloc.type)
 671              + ", r_extern=" + Twine(reloc.isExtern)
 672              + ", r_length=" + Twine((int)reloc.length)
 673              + ", r_pcrel=" + Twine(reloc.pcRel)
 674              + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
 675                                  : (Twine(", r_scattered=1, r_value=")
 676                                     + Twine(reloc.value)))
 677              + ")" );
 678           });
 679       }
 680     }
 681     // Instantiate an lld::Reference object and add to its atom.
 682     inAtom->addReference(Reference::KindNamespace::mach_o,
 683                          handler.kindArch(),
 684                          kind, offsetInAtom, target, addend);
 685   }
 686
 687   return llvm::Error::success();
 688 }
 689
 690 bool isDebugInfoSection(const Section &section) {
 691   if ((section.attributes & S_ATTR_DEBUG) == 0)
 692     return false;
 693   return section.segmentName.equals("__DWARF");
 694 }
 695
 696 static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
 697   std::string strName = name.str();
 698   for (auto *atom : file.defined())
 699     if (atom->name() == strName)
 700       return atom;
 701   return nullptr;
 702 }
 703
 704 static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
 705   char *strCopy = alloc.Allocate<char>(str.size() + 1);
 706   memcpy(strCopy, str.data(), str.size());
 707   strCopy[str.size()] = '\0';
 708   return strCopy;
 709 }
 710
 711 llvm::Error parseStabs(MachOFile &file,
 712                        const NormalizedFile &normalizedFile,
 713                        bool copyRefs) {
 714
 715   if (normalizedFile.stabsSymbols.empty())
 716     return llvm::Error::success();
 717
 718   // FIXME: Kill this off when we can move to sane yaml parsing.
 719   std::unique_ptr<BumpPtrAllocator> allocator;
 720   if (copyRefs)
 721     allocator = llvm::make_unique<BumpPtrAllocator>();
 722
 723   enum { start, inBeginEnd } state = start;
 724
 725   const Atom *currentAtom = nullptr;
 726   uint64_t currentAtomAddress = 0;
 727   StabsDebugInfo::StabsList stabsList;
 728   for (const auto &stabSym : normalizedFile.stabsSymbols) {
 729     Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
 730               stabSym.value, stabSym.name);
 731     switch (state) {
 732     case start:
 733       switch (static_cast<StabType>(stabSym.type)) {
 734       case N_BNSYM:
 735         state = inBeginEnd;
 736         currentAtomAddress = stabSym.value;
 737         Reference::Addend addend;
 738         currentAtom = findAtomCoveringAddress(normalizedFile, file,
 739                                               currentAtomAddress, addend);
 740         if (addend != 0)
 741           return llvm::make_error<GenericError>(
 742                    "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
 743                    file.path());
 744         if (currentAtom)
 745           stab.atom = currentAtom;
 746         else {
 747           // FIXME: ld64 just issues a warning here - should we match that?
 748           return llvm::make_error<GenericError>(
 749                    "can't find atom for stabs BNSYM at " +
 750                    Twine::utohexstr(stabSym.value) + " in " + file.path());
 751         }
 752         break;
 753       case N_SO:
 754       case N_OSO:
 755         // Not associated with an atom, just copy.
 756         if (copyRefs)
 757           stab.str = copyDebugString(stabSym.name, *allocator);
 758         else
 759           stab.str = stabSym.name;
 760         break;
 761       case N_GSYM: {
 762         auto colonIdx = stabSym.name.find(':');
 763         if (colonIdx != StringRef::npos) {
 764           StringRef name = stabSym.name.substr(0, colonIdx);
 765           currentAtom = findDefinedAtomByName(file, "_" + name);
 766           stab.atom = currentAtom;
 767           if (copyRefs)
 768             stab.str = copyDebugString(stabSym.name, *allocator);
 769           else
 770             stab.str = stabSym.name;
 771         } else {
 772           currentAtom = findDefinedAtomByName(file, stabSym.name);
 773           stab.atom = currentAtom;
 774           if (copyRefs)
 775             stab.str = copyDebugString(stabSym.name, *allocator);
 776           else
 777             stab.str = stabSym.name;
 778         }
 779         if (stab.atom == nullptr)
 780           return llvm::make_error<GenericError>(
 781                    "can't find atom for N_GSYM stabs" + stabSym.name +
 782                    " in " + file.path());
 783         break;
 784       }
 785       case N_FUN:
 786         return llvm::make_error<GenericError>(
 787                  "old-style N_FUN stab '" + stabSym.name + "' unsupported");
 788       default:
 789         return llvm::make_error<GenericError>(
 790                  "unrecognized stab symbol '" + stabSym.name + "'");
 791       }
 792       break;
 793     case inBeginEnd:
 794       stab.atom = currentAtom;
 795       switch (static_cast<StabType>(stabSym.type)) {
 796       case N_ENSYM:
 797         state = start;
 798         currentAtom = nullptr;
 799         break;
 800       case N_FUN:
 801         // Just copy the string.
 802         if (copyRefs)
 803           stab.str = copyDebugString(stabSym.name, *allocator);
 804         else
 805           stab.str = stabSym.name;
 806         break;
 807       default:
 808         return llvm::make_error<GenericError>(
 809                  "unrecognized stab symbol '" + stabSym.name + "'");
 810       }
 811     }
 812     llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
 813     stabsList.push_back(stab);
 814   }
 815
 816   file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
 817
 818   // FIXME: Kill this off when we fix YAML memory ownership.
 819   file.debugInfo()->setAllocator(std::move(allocator));
 820
 821   return llvm::Error::success();
 822 }
 823
 824 static llvm::DataExtractor
 825 dataExtractorFromSection(const NormalizedFile &normalizedFile,
 826                          const Section &S) {
 827   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
 828   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
 829   StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
 830                     S.content.size());
 831   return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
 832 }
 833
 834 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 835 //        inspection" code if possible.
 836 static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
 837                                   uint64_t abbrCode) {
 838   uint64_t curCode;
 839   uint32_t offset = 0;
 840   while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
 841     // Tag
 842     abbrevData.getULEB128(&offset);
 843     // DW_CHILDREN
 844     abbrevData.getU8(&offset);
 845     // Attributes
 846     while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
 847       ;
 848   }
 849   return offset;
 850 }
 851
 852 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 853 //        inspection" code if possible.
 854 static Expected<const char *>
 855 getIndexedString(const NormalizedFile &normalizedFile,
 856                  llvm::dwarf::Form form, llvm::DataExtractor infoData,
 857                  uint32_t &infoOffset, const Section &stringsSection) {
 858   if (form == llvm::dwarf::DW_FORM_string)
 859    return infoData.getCStr(&infoOffset);
 860   if (form != llvm::dwarf::DW_FORM_strp)
 861     return llvm::make_error<GenericError>(
 862         "string field encoded without DW_FORM_strp");
 863   uint32_t stringOffset = infoData.getU32(&infoOffset);
 864   llvm::DataExtractor stringsData =
 865     dataExtractorFromSection(normalizedFile, stringsSection);
 866   return stringsData.getCStr(&stringOffset);
 867 }
 868
 869 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 870 //        inspection" code if possible.
 871 static llvm::Expected<TranslationUnitSource>
 872 readCompUnit(const NormalizedFile &normalizedFile,
 873              const Section &info,
 874              const Section &abbrev,
 875              const Section &strings,
 876              StringRef path) {
 877   // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 878   //        inspection" code if possible.
 879   uint32_t offset = 0;
 880   llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
 881   auto infoData = dataExtractorFromSection(normalizedFile, info);
 882   uint32_t length = infoData.getU32(&offset);
 883   if (length == 0xffffffff) {
 884     Format = llvm::dwarf::DwarfFormat::DWARF64;
 885     infoData.getU64(&offset);
 886   }
 887   else if (length > 0xffffff00)
 888     return llvm::make_error<GenericError>("Malformed DWARF in " + path);
 889
 890   uint16_t version = infoData.getU16(&offset);
 891
 892   if (version < 2 || version > 4)
 893     return llvm::make_error<GenericError>("Unsupported DWARF version in " +
 894                                           path);
 895
 896   infoData.getU32(&offset); // Abbrev offset (should be zero)
 897   uint8_t addrSize = infoData.getU8(&offset);
 898
 899   uint32_t abbrCode = infoData.getULEB128(&offset);
 900   auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
 901   uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
 902   uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
 903   if (tag != llvm::dwarf::DW_TAG_compile_unit)
 904     return llvm::make_error<GenericError>("top level DIE is not a compile unit");
 905   // DW_CHILDREN
 906   abbrevData.getU8(&abbrevOffset);
 907   uint32_t name;
 908   llvm::dwarf::Form form;
 909   llvm::dwarf::FormParams formParams = {version, addrSize, Format};
 910   TranslationUnitSource tu;
 911   while ((name = abbrevData.getULEB128(&abbrevOffset)) |
 912          (form = static_cast<llvm::dwarf::Form>(
 913              abbrevData.getULEB128(&abbrevOffset))) &&
 914          (name != 0 || form != 0)) {
 915     switch (name) {
 916     case llvm::dwarf::DW_AT_name: {
 917       if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
 918                                         strings))
 919           tu.name = *eName;
 920       else
 921         return eName.takeError();
 922       break;
 923     }
 924     case llvm::dwarf::DW_AT_comp_dir: {
 925       if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
 926                                         strings))
 927         tu.path = *eName;
 928       else
 929         return eName.takeError();
 930       break;
 931     }
 932     default:
 933       llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
 934     }
 935   }
 936   return tu;
 937 }
 938
 939 llvm::Error parseDebugInfo(MachOFile &file,
 940                            const NormalizedFile &normalizedFile, bool copyRefs) {
 941
 942   // Find the interesting debug info sections.
 943   const Section *debugInfo = nullptr;
 944   const Section *debugAbbrev = nullptr;
 945   const Section *debugStrings = nullptr;
 946
 947   for (auto &s : normalizedFile.sections) {
 948     if (s.segmentName == "__DWARF") {
 949       if (s.sectionName == "__debug_info")
 950         debugInfo = &s;
 951       else if (s.sectionName == "__debug_abbrev")
 952         debugAbbrev = &s;
 953       else if (s.sectionName == "__debug_str")
 954         debugStrings = &s;
 955     }
 956   }
 957
 958   if (!debugInfo)
 959     return parseStabs(file, normalizedFile, copyRefs);
 960
 961   if (debugInfo->content.size() == 0)
 962     return llvm::Error::success();
 963
 964   if (debugInfo->content.size() < 12)
 965     return llvm::make_error<GenericError>("Malformed __debug_info section in " +
 966                                           file.path() + ": too small");
 967
 968   if (!debugAbbrev)
 969     return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
 970                                           file.path());
 971
 972   if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
 973                                   *debugStrings, file.path())) {
 974     // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
 975     //        memory ownership.
 976     std::unique_ptr<BumpPtrAllocator> allocator;
 977     if (copyRefs) {
 978       allocator = llvm::make_unique<BumpPtrAllocator>();
 979       tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
 980       tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
 981     }
 982     file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
 983     if (copyRefs)
 984       file.debugInfo()->setAllocator(std::move(allocator));
 985   } else
 986     return tuOrErr.takeError();
 987
 988   return llvm::Error::success();
 989 }
 990
 991 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
 992   if (is64)
 993     return read64(addr, isBig);
 994
 995   int32_t res = read32(addr, isBig);
 996   return res;
 997 }
 998
 999 /// --- Augmentation String Processing ---
1000
1001 struct CIEInfo {
1002   bool _augmentationDataPresent = false;
1003   bool _mayHaveEH = false;
1004   uint32_t _offsetOfLSDA = ~0U;
1005   uint32_t _offsetOfPersonality = ~0U;
1006   uint32_t _offsetOfFDEPointerEncoding = ~0U;
1007   uint32_t _augmentationDataLength = ~0U;
1008 };
1009
1010 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1011
1012 static llvm::Error processAugmentationString(const uint8_t *augStr,
1013                                              CIEInfo &cieInfo,
1014                                              unsigned &len) {
1015
1016   if (augStr[0] == '\0') {
1017     len = 1;
1018     return llvm::Error::success();
1019   }
1020
1021   if (augStr[0] != 'z')
1022     return llvm::make_error<GenericError>("expected 'z' at start of "
1023                                           "augmentation string");
1024
1025   cieInfo._augmentationDataPresent = true;
1026   uint64_t idx = 1;
1027
1028   uint32_t offsetInAugmentationData = 0;
1029   while (augStr[idx] != '\0') {
1030     if (augStr[idx] == 'L') {
1031       cieInfo._offsetOfLSDA = offsetInAugmentationData;
1032       // This adds a single byte to the augmentation data.
1033       ++offsetInAugmentationData;
1034       ++idx;
1035       continue;
1036     }
1037     if (augStr[idx] == 'P') {
1038       cieInfo._offsetOfPersonality = offsetInAugmentationData;
1039       // This adds a single byte to the augmentation data for the encoding,
1040       // then a number of bytes for the pointer data.
1041       // FIXME: We are assuming 4 is correct here for the pointer size as we
1042       // always currently use delta32ToGOT.
1043       offsetInAugmentationData += 5;
1044       ++idx;
1045       continue;
1046     }
1047     if (augStr[idx] == 'R') {
1048       cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1049       // This adds a single byte to the augmentation data.
1050       ++offsetInAugmentationData;
1051       ++idx;
1052       continue;
1053     }
1054     if (augStr[idx] == 'e') {
1055       if (augStr[idx + 1] != 'h')
1056         return llvm::make_error<GenericError>("expected 'eh' in "
1057                                               "augmentation string");
1058       cieInfo._mayHaveEH = true;
1059       idx += 2;
1060       continue;
1061     }
1062     ++idx;
1063   }
1064
1065   cieInfo._augmentationDataLength = offsetInAugmentationData;
1066
1067   len = idx + 1;
1068   return llvm::Error::success();
1069 }
1070
1071 static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1072                               MachOFile &file,
1073                               mach_o::ArchHandler &handler,
1074                               const Section *ehFrameSection,
1075                               MachODefinedAtom *atom,
1076                               uint64_t offset,
1077                               CIEInfoMap &cieInfos) {
1078   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1079   const uint8_t *frameData = atom->rawContent().data();
1080
1081   CIEInfo cieInfo;
1082
1083   uint32_t size = read32(frameData, isBig);
1084   uint64_t cieIDField = size == 0xffffffffU
1085                           ? sizeof(uint32_t) + sizeof(uint64_t)
1086                           : sizeof(uint32_t);
1087   uint64_t versionField = cieIDField + sizeof(uint32_t);
1088   uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1089
1090   unsigned augmentationStringLength = 0;
1091   if (auto err = processAugmentationString(frameData + augmentationStringField,
1092                                            cieInfo, augmentationStringLength))
1093     return err;
1094
1095   if (cieInfo._offsetOfPersonality != ~0U) {
1096     // If we have augmentation data for the personality function, then we may
1097     // need to implicitly generate its relocation.
1098
1099     // Parse the EH Data field which is pointer sized.
1100     uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1101     const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1102     unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
1103
1104     // Parse Code Align Factor which is a ULEB128.
1105     uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1106     unsigned lengthFieldSize = 0;
1107     llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1108
1109     // Parse Data Align Factor which is a SLEB128.
1110     uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1111     llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1112
1113     // Parse Return Address Register which is a byte.
1114     uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1115
1116     // Parse the augmentation length which is a ULEB128.
1117     uint64_t AugmentationLengthField = ReturnAddressField + 1;
1118     uint64_t AugmentationLength =
1119       llvm::decodeULEB128(frameData + AugmentationLengthField,
1120                           &lengthFieldSize);
1121
1122     if (AugmentationLength != cieInfo._augmentationDataLength)
1123       return llvm::make_error<GenericError>("CIE augmentation data length "
1124                                             "mismatch");
1125
1126     // Get the start address of the augmentation data.
1127     uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1128
1129     // Parse the personality function from the augmentation data.
1130     uint64_t PersonalityField =
1131       AugmentationDataField + cieInfo._offsetOfPersonality;
1132
1133     // Parse the personality encoding.
1134     // FIXME: Verify that this is a 32-bit pcrel offset.
1135     uint64_t PersonalityFunctionField = PersonalityField + 1;
1136
1137     if (atom->begin() != atom->end()) {
1138       // If we have an explicit relocation, then make sure it matches this
1139       // offset as this is where we'd expect it to be applied to.
1140       DefinedAtom::reference_iterator CurrentRef = atom->begin();
1141       if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1142         return llvm::make_error<GenericError>("CIE personality reloc at "
1143                                               "wrong offset");
1144
1145       if (++CurrentRef != atom->end())
1146         return llvm::make_error<GenericError>("CIE contains too many relocs");
1147     } else {
1148       // Implicitly generate the personality function reloc.  It's assumed to
1149       // be a delta32 offset to a GOT entry.
1150       // FIXME: Parse the encoding and check this.
1151       int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1152       uint64_t funcAddress = ehFrameSection->address + offset +
1153                              PersonalityFunctionField;
1154       funcAddress += funcDelta;
1155
1156       const MachODefinedAtom *func = nullptr;
1157       Reference::Addend addend;
1158       func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1159                                      addend);
1160       atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1161                          handler.unwindRefToPersonalityFunctionKind(),
1162                          PersonalityFunctionField, func, addend);
1163     }
1164   } else if (atom->begin() != atom->end()) {
1165     // Otherwise, we expect there to be no relocations in this atom as the only
1166     // relocation would have been to the personality function.
1167     return llvm::make_error<GenericError>("unexpected relocation in CIE");
1168   }
1169
1170
1171   cieInfos[atom] = std::move(cieInfo);
1172
1173   return llvm::Error::success();
1174 }
1175
1176 static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1177                               MachOFile &file,
1178                               mach_o::ArchHandler &handler,
1179                               const Section *ehFrameSection,
1180                               MachODefinedAtom *atom,
1181                               uint64_t offset,
1182                               const CIEInfoMap &cieInfos) {
1183
1184   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1185   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1186
1187   // Compiler wasn't lazy and actually told us what it meant.
1188   // Unfortunately, the compiler may not have generated references for all of
1189   // [cie, func, lsda] and so we still need to parse the FDE and add references
1190   // for any the compiler didn't generate.
1191   if (atom->begin() != atom->end())
1192     atom->sortReferences();
1193
1194   DefinedAtom::reference_iterator CurrentRef = atom->begin();
1195
1196   // This helper returns the reference (if one exists) at the offset we are
1197   // currently processing.  It automatically increments the ref iterator if we
1198   // do return a ref, and throws an error if we pass over a ref without
1199   // comsuming it.
1200   auto currentRefGetter = [&CurrentRef,
1201                            &atom](uint64_t Offset)->const Reference* {
1202     // If there are no more refs found, then we are done.
1203     if (CurrentRef == atom->end())
1204       return nullptr;
1205
1206     const Reference *Ref = *CurrentRef;
1207
1208     // If we haven't reached the offset for this reference, then return that
1209     // we don't yet have a reference to process.
1210     if (Offset < Ref->offsetInAtom())
1211       return nullptr;
1212
1213     // If the offset is equal, then we want to process this ref.
1214     if (Offset == Ref->offsetInAtom()) {
1215       ++CurrentRef;
1216       return Ref;
1217     }
1218
1219     // The current ref is at an offset which is earlier than the current
1220     // offset, then we failed to consume it when we should have.  In this case
1221     // throw an error.
1222     llvm::report_fatal_error("Skipped reference when processing FDE");
1223   };
1224
1225   // Helper to either get the reference at this current location, and verify
1226   // that it is of the expected type, or add a reference of that type.
1227   // Returns the reference target.
1228   auto verifyOrAddReference = [&](uint64_t targetAddress,
1229                                   Reference::KindValue refKind,
1230                                   uint64_t refAddress,
1231                                   bool allowsAddend)->const Atom* {
1232     if (auto *ref = currentRefGetter(refAddress)) {
1233       // The compiler already emitted a relocation for the CIE ref.  This should
1234       // have been converted to the correct type of reference in
1235       // get[Pair]ReferenceInfo().
1236       assert(ref->kindValue() == refKind &&
1237              "Incorrect EHFrame reference kind");
1238       return ref->target();
1239     }
1240     Reference::Addend addend;
1241     auto *target = findAtomCoveringAddress(normalizedFile, file,
1242                                            targetAddress, addend);
1243     atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1244                        refKind, refAddress, target, addend);
1245
1246     if (!allowsAddend)
1247       assert(!addend && "EHFrame reference cannot have addend");
1248     return target;
1249   };
1250
1251   const uint8_t *startFrameData = atom->rawContent().data();
1252   const uint8_t *frameData = startFrameData;
1253
1254   uint32_t size = read32(frameData, isBig);
1255   uint64_t cieFieldInFDE = size == 0xffffffffU
1256     ? sizeof(uint32_t) + sizeof(uint64_t)
1257     : sizeof(uint32_t);
1258
1259   // Linker needs to fixup a reference from the FDE to its parent CIE (a
1260   // 32-bit byte offset backwards in the __eh_frame section).
1261   uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1262   uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1263   cieAddress -= cieDelta;
1264
1265   auto *cieRefTarget = verifyOrAddReference(cieAddress,
1266                                             handler.unwindRefToCIEKind(),
1267                                             cieFieldInFDE, false);
1268   const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1269   assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1270          "FDE's CIE field does not point at the start of a CIE.");
1271
1272   const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1273
1274   // Linker needs to fixup reference from the FDE to the function it's
1275   // describing. FIXME: there are actually different ways to do this, and the
1276   // particular method used is specified in the CIE's augmentation fields
1277   // (hopefully)
1278   uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1279
1280   int64_t functionFromFDE = readSPtr(is64, isBig,
1281                                      frameData + rangeFieldInFDE);
1282   uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1283   rangeStart += functionFromFDE;
1284
1285   verifyOrAddReference(rangeStart,
1286                        handler.unwindRefToFunctionKind(),
1287                        rangeFieldInFDE, true);
1288
1289   // Handle the augmentation data if there is any.
1290   if (cieInfo._augmentationDataPresent) {
1291     // First process the augmentation data length field.
1292     uint64_t augmentationDataLengthFieldInFDE =
1293       rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
1294     unsigned lengthFieldSize = 0;
1295     uint64_t augmentationDataLength =
1296       llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1297                           &lengthFieldSize);
1298
1299     if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
1300
1301       // Look at the augmentation data field.
1302       uint64_t augmentationDataFieldInFDE =
1303         augmentationDataLengthFieldInFDE + lengthFieldSize;
1304
1305       int64_t lsdaFromFDE = readSPtr(is64, isBig,
1306                                      frameData + augmentationDataFieldInFDE);
1307       uint64_t lsdaStart =
1308         ehFrameSection->address + offset + augmentationDataFieldInFDE +
1309         lsdaFromFDE;
1310
1311       verifyOrAddReference(lsdaStart,
1312                            handler.unwindRefToFunctionKind(),
1313                            augmentationDataFieldInFDE, true);
1314     }
1315   }
1316
1317   return llvm::Error::success();
1318 }
1319
1320 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1321                                  MachOFile &file,
1322                                  mach_o::ArchHandler &handler) {
1323
1324   const Section *ehFrameSection = nullptr;
1325   for (auto &section : normalizedFile.sections)
1326     if (section.segmentName == "__TEXT" &&
1327         section.sectionName == "__eh_frame") {
1328       ehFrameSection = &section;
1329       break;
1330     }
1331
1332   // No __eh_frame so nothing to do.
1333   if (!ehFrameSection)
1334     return llvm::Error::success();
1335
1336   llvm::Error ehFrameErr = llvm::Error::success();
1337   CIEInfoMap cieInfos;
1338
1339   file.eachAtomInSection(*ehFrameSection,
1340                          [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1341     assert(atom->contentType() == DefinedAtom::typeCFI);
1342
1343     // Bail out if we've encountered an error.
1344     if (ehFrameErr)
1345       return;
1346
1347     const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1348     if (ArchHandler::isDwarfCIE(isBig, atom))
1349       ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1350                               atom, offset, cieInfos);
1351     else
1352       ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1353                               atom, offset, cieInfos);
1354   });
1355
1356   return ehFrameErr;
1357 }
1358
1359 llvm::Error parseObjCImageInfo(const Section &sect,
1360                                const NormalizedFile &normalizedFile,
1361                                MachOFile &file) {
1362
1363   //    struct objc_image_info  {
1364   //            uint32_t        version;        // initially 0
1365   //            uint32_t        flags;
1366   //    };
1367
1368   ArrayRef<uint8_t> content = sect.content;
1369   if (content.size() != 8)
1370     return llvm::make_error<GenericError>(sect.segmentName + "/" +
1371                                           sect.sectionName +
1372                                           " in file " + file.path() +
1373                                           " should be 8 bytes in size");
1374
1375   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1376   uint32_t version = read32(content.data(), isBig);
1377   if (version)
1378     return llvm::make_error<GenericError>(sect.segmentName + "/" +
1379                                           sect.sectionName +
1380                                           " in file " + file.path() +
1381                                           " should have version=0");
1382
1383   uint32_t flags = read32(content.data() + 4, isBig);
1384   if (flags & (MachOLinkingContext::objc_supports_gc |
1385                MachOLinkingContext::objc_gc_only))
1386     return llvm::make_error<GenericError>(sect.segmentName + "/" +
1387                                           sect.sectionName +
1388                                           " in file " + file.path() +
1389                                           " uses GC.  This is not supported");
1390
1391   if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1392     file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1393   else
1394     file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1395
1396   file.setSwiftVersion((flags >> 8) & 0xFF);
1397
1398   return llvm::Error::success();
1399 }
1400
1401 /// Converts normalized mach-o file into an lld::File and lld::Atoms.
1402 llvm::Expected<std::unique_ptr<lld::File>>
1403 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1404               bool copyRefs) {
1405   std::unique_ptr<MachOFile> file(new MachOFile(path));
1406   if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1407     return std::move(ec);
1408   return std::unique_ptr<File>(std::move(file));
1409 }
1410
1411 llvm::Expected<std::unique_ptr<lld::File>>
1412 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1413              bool copyRefs) {
1414   // Instantiate SharedLibraryFile object.
1415   std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1416   if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1417     return std::move(ec);
1418   return std::unique_ptr<File>(std::move(file));
1419 }
1420
1421 } // anonymous namespace
1422
1423 namespace normalized {
1424
1425 static bool isObjCImageInfo(const Section &sect) {
1426   return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1427     (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
1428 }
1429
1430 llvm::Error
1431 normalizedObjectToAtoms(MachOFile *file,
1432                         const NormalizedFile &normalizedFile,
1433                         bool copyRefs) {
1434   LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1435                           << file->path() << "\n");
1436   bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1437
1438   // Create atoms from each section.
1439   for (auto &sect : normalizedFile.sections) {
1440
1441     // If this is a debug-info section parse it specially.
1442     if (isDebugInfoSection(sect))
1443       continue;
1444
1445     // If the file contains an objc_image_info struct, then we should parse the
1446     // ObjC flags and Swift version.
1447     if (isObjCImageInfo(sect)) {
1448       if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1449         return ec;
1450       // We then skip adding atoms for this section as we use the ObjCPass to
1451       // re-emit this data after it has been aggregated for all files.
1452       continue;
1453     }
1454
1455     bool customSectionName;
1456     DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1457                                                             customSectionName);
1458     if (auto ec =  processSection(atomType, sect, customSectionName,
1459                                   normalizedFile, *file, scatterable, copyRefs))
1460       return ec;
1461   }
1462   // Create atoms from undefined symbols.
1463   for (auto &sym : normalizedFile.undefinedSymbols) {
1464     // Undefinded symbols with n_value != 0 are actually tentative definitions.
1465     if (sym.value == Hex64(0)) {
1466       file->addUndefinedAtom(sym.name, copyRefs);
1467     } else {
1468       file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1469                                 DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1470                                 copyRefs);
1471     }
1472   }
1473
1474   // Convert mach-o relocations to References
1475   std::unique_ptr<mach_o::ArchHandler> handler
1476                                      = ArchHandler::create(normalizedFile.arch);
1477   for (auto &sect : normalizedFile.sections) {
1478     if (isDebugInfoSection(sect))
1479       continue;
1480     if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1481                                        *file, *handler))
1482       return ec;
1483   }
1484
1485   // Add additional arch-specific References
1486   file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
1487     handler->addAdditionalReferences(*atom);
1488   });
1489
1490   // Each __eh_frame section needs references to both __text (the function we're
1491   // providing unwind info for) and itself (FDE -> CIE). These aren't
1492   // represented in the relocations on some architectures, so we have to add
1493   // them back in manually there.
1494   if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1495     return ec;
1496
1497   // Process mach-o data-in-code regions array. That information is encoded in
1498   // atoms as References at each transition point.
1499   unsigned nextIndex = 0;
1500   for (const DataInCode &entry : normalizedFile.dataInCode) {
1501     ++nextIndex;
1502     const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1503     if (!s) {
1504       return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1505                                                   + Twine(entry.offset)
1506                                                   + ") is not in any section"));
1507     }
1508     uint64_t offsetInSect = entry.offset - s->address;
1509     uint32_t offsetInAtom;
1510     MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1511                                                            &offsetInAtom);
1512     if (offsetInAtom + entry.length > atom->size()) {
1513       return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1514                                                   "(offset="
1515                                                   + Twine(entry.offset)
1516                                                   + ", length="
1517                                                   + Twine(entry.length)
1518                                                   + ") crosses atom boundary."));
1519     }
1520     // Add reference that marks start of data-in-code.
1521     atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1522                        handler->dataInCodeTransitionStart(*atom),
1523                        offsetInAtom, atom, entry.kind);
1524
1525     // Peek at next entry, if it starts where this one ends, skip ending ref.
1526     if (nextIndex < normalizedFile.dataInCode.size()) {
1527       const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1528       if (nextEntry.offset == (entry.offset + entry.length))
1529         continue;
1530     }
1531
1532     // If data goes to end of function, skip ending ref.
1533     if ((offsetInAtom + entry.length) == atom->size())
1534       continue;
1535
1536     // Add reference that marks end of data-in-code.
1537     atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1538                        handler->dataInCodeTransitionEnd(*atom),
1539                        offsetInAtom+entry.length, atom, 0);
1540   }
1541
1542   // Cache some attributes on the file for use later.
1543   file->setFlags(normalizedFile.flags);
1544   file->setArch(normalizedFile.arch);
1545   file->setOS(normalizedFile.os);
1546   file->setMinVersion(normalizedFile.minOSverson);
1547   file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1548
1549   // Sort references in each atom to their canonical order.
1550   for (const DefinedAtom* defAtom : file->defined()) {
1551     reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1552   }
1553
1554   if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1555     return err;
1556
1557   return llvm::Error::success();
1558 }
1559
1560 llvm::Error
1561 normalizedDylibToAtoms(MachODylibFile *file,
1562                        const NormalizedFile &normalizedFile,
1563                        bool copyRefs) {
1564   file->setInstallName(normalizedFile.installName);
1565   file->setCompatVersion(normalizedFile.compatVersion);
1566   file->setCurrentVersion(normalizedFile.currentVersion);
1567
1568   // Tell MachODylibFile object about all symbols it exports.
1569   if (!normalizedFile.exportInfo.empty()) {
1570     // If exports trie exists, use it instead of traditional symbol table.
1571     for (const Export &exp : normalizedFile.exportInfo) {
1572       bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1573       // StringRefs from export iterator are ephemeral, so force copy.
1574       file->addExportedSymbol(exp.name, weakDef, true);
1575     }
1576   } else {
1577     for (auto &sym : normalizedFile.globalSymbols) {
1578       assert((sym.scope & N_EXT) && "only expect external symbols here");
1579       bool weakDef = (sym.desc & N_WEAK_DEF);
1580       file->addExportedSymbol(sym.name, weakDef, copyRefs);
1581     }
1582   }
1583   // Tell MachODylibFile object about all dylibs it re-exports.
1584   for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1585     if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1586       file->addReExportedDylib(dep.path);
1587   }
1588   return llvm::Error::success();
1589 }
1590
1591 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1592                                           StringRef &segmentName,
1593                                           StringRef &sectionName,
1594                                           SectionType &sectionType,
1595                                           SectionAttr &sectionAttrs,
1596                                           bool &relocsToDefinedCanBeImplicit) {
1597
1598   for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1599                                  p->atomType != DefinedAtom::typeUnknown; ++p) {
1600     if (p->atomType != atomType)
1601       continue;
1602     // Wild carded entries are ignored for reverse lookups.
1603     if (p->segmentName.empty() || p->sectionName.empty())
1604       continue;
1605     segmentName = p->segmentName;
1606     sectionName = p->sectionName;
1607     sectionType = p->sectionType;
1608     sectionAttrs = 0;
1609     relocsToDefinedCanBeImplicit = false;
1610     if (atomType == DefinedAtom::typeCode)
1611       sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1612     if (atomType == DefinedAtom::typeCFI)
1613       relocsToDefinedCanBeImplicit = true;
1614     return;
1615   }
1616   llvm_unreachable("content type not yet supported");
1617 }
1618
1619 llvm::Expected<std::unique_ptr<lld::File>>
1620 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1621                   bool copyRefs) {
1622   switch (normalizedFile.fileType) {
1623   case MH_DYLIB:
1624   case MH_DYLIB_STUB:
1625     return dylibToAtoms(normalizedFile, path, copyRefs);
1626   case MH_OBJECT:
1627     return objectToAtoms(normalizedFile, path, copyRefs);
1628   default:
1629     llvm_unreachable("unhandled MachO file type!");
1630   }
1631 }
1632
1633 } // namespace normalized
1634 } // namespace mach_o
1635 } // namespace lld