contrib/llvm-project/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp

   1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 ///
  10 /// \file Converts from in-memory normalized mach-o to in-memory Atoms.
  11 ///
  12 ///                  +------------+
  13 ///                  | normalized |
  14 ///                  +------------+
  15 ///                        |
  16 ///                        |
  17 ///                        v
  18 ///                    +-------+
  19 ///                    | Atoms |
  20 ///                    +-------+
  21
  22 #include "ArchHandler.h"
  23 #include "Atoms.h"
  24 #include "File.h"
  25 #include "MachONormalizedFile.h"
  26 #include "MachONormalizedFileBinaryUtils.h"
  27 #include "lld/Common/LLVM.h"
  28 #include "lld/Core/Error.h"
  29 #include "llvm/BinaryFormat/Dwarf.h"
  30 #include "llvm/BinaryFormat/MachO.h"
  31 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
  32 #include "llvm/Support/DataExtractor.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/Error.h"
  35 #include "llvm/Support/Format.h"
  36 #include "llvm/Support/LEB128.h"
  37 #include "llvm/Support/raw_ostream.h"
  38
  39 using namespace llvm::MachO;
  40 using namespace lld::mach_o::normalized;
  41
  42 #define DEBUG_TYPE "normalized-file-to-atoms"
  43
  44 namespace lld {
  45 namespace mach_o {
  46
  47
  48 namespace { // anonymous
  49
  50
  51 #define ENTRY(seg, sect, type, atomType) \
  52   {seg, sect, type, DefinedAtom::atomType }
  53
  54 struct MachORelocatableSectionToAtomType {
  55   StringRef                 segmentName;
  56   StringRef                 sectionName;
  57   SectionType               sectionType;
  58   DefinedAtom::ContentType  atomType;
  59 };
  60
  61 const MachORelocatableSectionToAtomType sectsToAtomType[] = {
  62   ENTRY("__TEXT", "__text",           S_REGULAR,          typeCode),
  63   ENTRY("__TEXT", "__text",           S_REGULAR,          typeResolver),
  64   ENTRY("__TEXT", "__cstring",        S_CSTRING_LITERALS, typeCString),
  65   ENTRY("",       "",                 S_CSTRING_LITERALS, typeCString),
  66   ENTRY("__TEXT", "__ustring",        S_REGULAR,          typeUTF16String),
  67   ENTRY("__TEXT", "__const",          S_REGULAR,          typeConstant),
  68   ENTRY("__TEXT", "__const_coal",     S_COALESCED,        typeConstant),
  69   ENTRY("__TEXT", "__eh_frame",       S_COALESCED,        typeCFI),
  70   ENTRY("__TEXT", "__eh_frame",       S_REGULAR,          typeCFI),
  71   ENTRY("__TEXT", "__literal4",       S_4BYTE_LITERALS,   typeLiteral4),
  72   ENTRY("__TEXT", "__literal8",       S_8BYTE_LITERALS,   typeLiteral8),
  73   ENTRY("__TEXT", "__literal16",      S_16BYTE_LITERALS,  typeLiteral16),
  74   ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR,          typeLSDA),
  75   ENTRY("__DATA", "__data",           S_REGULAR,          typeData),
  76   ENTRY("__DATA", "__datacoal_nt",    S_COALESCED,        typeData),
  77   ENTRY("__DATA", "__const",          S_REGULAR,          typeConstData),
  78   ENTRY("__DATA", "__cfstring",       S_REGULAR,          typeCFString),
  79   ENTRY("__DATA", "__mod_init_func",  S_MOD_INIT_FUNC_POINTERS,
  80                                                           typeInitializerPtr),
  81   ENTRY("__DATA", "__mod_term_func",  S_MOD_TERM_FUNC_POINTERS,
  82                                                           typeTerminatorPtr),
  83   ENTRY("__DATA", "__got",            S_NON_LAZY_SYMBOL_POINTERS,
  84                                                           typeGOT),
  85   ENTRY("__DATA", "__bss",            S_ZEROFILL,         typeZeroFill),
  86   ENTRY("",       "",                 S_NON_LAZY_SYMBOL_POINTERS,
  87                                                           typeGOT),
  88   ENTRY("__DATA", "__interposing",    S_INTERPOSING,      typeInterposingTuples),
  89   ENTRY("__DATA", "__thread_vars",    S_THREAD_LOCAL_VARIABLES,
  90                                                           typeThunkTLV),
  91   ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
  92   ENTRY("__DATA", "__thread_bss",     S_THREAD_LOCAL_ZEROFILL,
  93                                                         typeTLVInitialZeroFill),
  94   ENTRY("__DATA", "__objc_imageinfo", S_REGULAR,          typeObjCImageInfo),
  95   ENTRY("__DATA", "__objc_catlist",   S_REGULAR,          typeObjC2CategoryList),
  96   ENTRY("",       "",                 S_INTERPOSING,      typeInterposingTuples),
  97   ENTRY("__LD",   "__compact_unwind", S_REGULAR,
  98                                                          typeCompactUnwindInfo),
  99   ENTRY("",       "",                 S_REGULAR,          typeUnknown)
 100 };
 101 #undef ENTRY
 102
 103
 104 /// Figures out ContentType of a mach-o section.
 105 DefinedAtom::ContentType atomTypeFromSection(const Section &section,
 106                                              bool &customSectionName) {
 107   // First look for match of name and type. Empty names in table are wildcards.
 108   customSectionName = false;
 109   for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
 110                                  p->atomType != DefinedAtom::typeUnknown; ++p) {
 111     if (p->sectionType != section.type)
 112       continue;
 113     if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
 114       continue;
 115     if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
 116       continue;
 117     customSectionName = p->segmentName.empty() && p->sectionName.empty();
 118     return p->atomType;
 119   }
 120   // Look for code denoted by section attributes
 121   if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
 122     return DefinedAtom::typeCode;
 123
 124   return DefinedAtom::typeUnknown;
 125 }
 126
 127 enum AtomizeModel {
 128   atomizeAtSymbols,
 129   atomizeFixedSize,
 130   atomizePointerSize,
 131   atomizeUTF8,
 132   atomizeUTF16,
 133   atomizeCFI,
 134   atomizeCU,
 135   atomizeCFString
 136 };
 137
 138 /// Returns info on how to atomize a section of the specified ContentType.
 139 void sectionParseInfo(DefinedAtom::ContentType atomType,
 140                       unsigned int &sizeMultiple,
 141                       DefinedAtom::Scope &scope,
 142                       DefinedAtom::Merge &merge,
 143                       AtomizeModel &atomizeModel) {
 144   struct ParseInfo {
 145     DefinedAtom::ContentType  atomType;
 146     unsigned int              sizeMultiple;
 147     DefinedAtom::Scope        scope;
 148     DefinedAtom::Merge        merge;
 149     AtomizeModel              atomizeModel;
 150   };
 151
 152   #define ENTRY(type, size, scope, merge, model) \
 153     {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
 154
 155   static const ParseInfo parseInfo[] = {
 156     ENTRY(typeCode,              1, scopeGlobal,          mergeNo,
 157                                                             atomizeAtSymbols),
 158     ENTRY(typeData,              1, scopeGlobal,          mergeNo,
 159                                                             atomizeAtSymbols),
 160     ENTRY(typeConstData,         1, scopeGlobal,          mergeNo,
 161                                                             atomizeAtSymbols),
 162     ENTRY(typeZeroFill,          1, scopeGlobal,          mergeNo,
 163                                                             atomizeAtSymbols),
 164     ENTRY(typeConstant,          1, scopeGlobal,          mergeNo,
 165                                                             atomizeAtSymbols),
 166     ENTRY(typeCString,           1, scopeLinkageUnit,     mergeByContent,
 167                                                             atomizeUTF8),
 168     ENTRY(typeUTF16String,       1, scopeLinkageUnit,     mergeByContent,
 169                                                             atomizeUTF16),
 170     ENTRY(typeCFI,               4, scopeTranslationUnit, mergeNo,
 171                                                             atomizeCFI),
 172     ENTRY(typeLiteral4,          4, scopeLinkageUnit,     mergeByContent,
 173                                                             atomizeFixedSize),
 174     ENTRY(typeLiteral8,          8, scopeLinkageUnit,     mergeByContent,
 175                                                             atomizeFixedSize),
 176     ENTRY(typeLiteral16,        16, scopeLinkageUnit,     mergeByContent,
 177                                                             atomizeFixedSize),
 178     ENTRY(typeCFString,          4, scopeLinkageUnit,     mergeByContent,
 179                                                             atomizeCFString),
 180     ENTRY(typeInitializerPtr,    4, scopeTranslationUnit, mergeNo,
 181                                                             atomizePointerSize),
 182     ENTRY(typeTerminatorPtr,     4, scopeTranslationUnit, mergeNo,
 183                                                             atomizePointerSize),
 184     ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
 185                                                             atomizeCU),
 186     ENTRY(typeGOT,               4, scopeLinkageUnit,     mergeByContent,
 187                                                             atomizePointerSize),
 188     ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
 189                                                             atomizePointerSize),
 190     ENTRY(typeUnknown,           1, scopeGlobal,          mergeNo,
 191                                                             atomizeAtSymbols)
 192   };
 193   #undef ENTRY
 194   const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
 195   for (int i=0; i < tableLen; ++i) {
 196     if (parseInfo[i].atomType == atomType) {
 197       sizeMultiple = parseInfo[i].sizeMultiple;
 198       scope        = parseInfo[i].scope;
 199       merge        = parseInfo[i].merge;
 200       atomizeModel = parseInfo[i].atomizeModel;
 201       return;
 202     }
 203   }
 204
 205   // Unknown type is atomized by symbols.
 206   sizeMultiple = 1;
 207   scope = DefinedAtom::scopeGlobal;
 208   merge = DefinedAtom::mergeNo;
 209   atomizeModel = atomizeAtSymbols;
 210 }
 211
 212
 213 Atom::Scope atomScope(uint8_t scope) {
 214   switch (scope) {
 215   case N_EXT:
 216     return Atom::scopeGlobal;
 217   case N_PEXT:
 218   case N_PEXT | N_EXT:
 219     return Atom::scopeLinkageUnit;
 220   case 0:
 221     return Atom::scopeTranslationUnit;
 222   }
 223   llvm_unreachable("unknown scope value!");
 224 }
 225
 226 void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
 227                             uint32_t sectionIndex,
 228                             SmallVector<const Symbol *, 64> &outSyms) {
 229   for (const Symbol &sym : inSymbols) {
 230     // Only look at definition symbols.
 231     if ((sym.type & N_TYPE) != N_SECT)
 232       continue;
 233     if (sym.sect != sectionIndex)
 234       continue;
 235     outSyms.push_back(&sym);
 236   }
 237 }
 238
 239 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section &section,
 240                     MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
 241                     uint16_t symbolDescFlags, Atom::Scope symbolScope,
 242                     uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
 243   // Mach-O symbol table does have size in it. Instead the size is the
 244   // difference between this and the next symbol.
 245   uint64_t size = nextSymbolAddr - symbolAddr;
 246   uint64_t offset = symbolAddr - section.address;
 247   bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
 248   if (isZeroFillSection(section.type)) {
 249     file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
 250                                 noDeadStrip, copyRefs, &section);
 251   } else {
 252     DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
 253                               ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
 254     bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
 255     if (atomType == DefinedAtom::typeUnknown) {
 256       // Mach-O needs a segment and section name.  Concatentate those two
 257       // with a / separator (e.g. "seg/sect") to fit into the lld model
 258       // of just a section name.
 259       std::string segSectName = section.segmentName.str()
 260                                 + "/" + section.sectionName.str();
 261       file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
 262                                          merge, thumb, noDeadStrip, offset,
 263                                          size, segSectName, true, &section);
 264     } else {
 265       if ((atomType == lld::DefinedAtom::typeCode) &&
 266           (symbolDescFlags & N_SYMBOL_RESOLVER)) {
 267         atomType = lld::DefinedAtom::typeResolver;
 268       }
 269       file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
 270                           offset, size, thumb, noDeadStrip, copyRefs, &section);
 271     }
 272   }
 273 }
 274
 275 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
 276                                    const Section &section,
 277                                    const NormalizedFile &normalizedFile,
 278                                    MachOFile &file, bool scatterable,
 279                                    bool copyRefs) {
 280   // Find section's index.
 281   uint32_t sectIndex = 1;
 282   for (auto &sect : normalizedFile.sections) {
 283     if (&sect == &section)
 284       break;
 285     ++sectIndex;
 286   }
 287
 288   // Find all symbols in this section.
 289   SmallVector<const Symbol *, 64> symbols;
 290   appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
 291   appendSymbolsInSection(normalizedFile.localSymbols,  sectIndex, symbols);
 292
 293   // Sort symbols.
 294   std::sort(symbols.begin(), symbols.end(),
 295             [](const Symbol *lhs, const Symbol *rhs) -> bool {
 296               if (lhs == rhs)
 297                 return false;
 298               // First by address.
 299               uint64_t lhsAddr = lhs->value;
 300               uint64_t rhsAddr = rhs->value;
 301               if (lhsAddr != rhsAddr)
 302                 return lhsAddr < rhsAddr;
 303                // If same address, one is an alias so sort by scope.
 304               Atom::Scope lScope = atomScope(lhs->scope);
 305               Atom::Scope rScope = atomScope(rhs->scope);
 306               if (lScope != rScope)
 307                 return lScope < rScope;
 308               // If same address and scope, see if one might be better as
 309               // the alias.
 310               bool lPrivate = (lhs->name.front() == 'l');
 311               bool rPrivate = (rhs->name.front() == 'l');
 312               if (lPrivate != rPrivate)
 313                 return lPrivate;
 314               // If same address and scope, sort by name.
 315               return lhs->name < rhs->name;
 316             });
 317
 318   // Debug logging of symbols.
 319   //for (const Symbol *sym : symbols)
 320   //  llvm::errs() << "  sym: "
 321   //    << llvm::format("0x%08llx ", (uint64_t)sym->value)
 322   //    << ", " << sym->name << "\n";
 323
 324   // If section has no symbols and no content, there are no atoms.
 325   if (symbols.empty() && section.content.empty())
 326     return llvm::Error::success();
 327
 328   if (symbols.empty()) {
 329     // Section has no symbols, put all content in one anoymous atom.
 330     atomFromSymbol(atomType, section, file, section.address, StringRef(),
 331                   0, Atom::scopeTranslationUnit,
 332                   section.address + section.content.size(),
 333                   scatterable, copyRefs);
 334   }
 335   else if (symbols.front()->value != section.address) {
 336     // Section has anonymous content before first symbol.
 337     atomFromSymbol(atomType, section, file, section.address, StringRef(),
 338                    0, Atom::scopeTranslationUnit, symbols.front()->value,
 339                    scatterable, copyRefs);
 340   }
 341
 342   const Symbol *lastSym = nullptr;
 343   for (const Symbol *sym : symbols) {
 344     if (lastSym != nullptr) {
 345       // Ignore any assembler added "ltmpNNN" symbol at start of section
 346       // if there is another symbol at the start.
 347       if ((lastSym->value != sym->value)
 348           || lastSym->value != section.address
 349           || !lastSym->name.startswith("ltmp")) {
 350         atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
 351                        lastSym->desc, atomScope(lastSym->scope), sym->value,
 352                        scatterable, copyRefs);
 353       }
 354     }
 355     lastSym = sym;
 356   }
 357   if (lastSym != nullptr) {
 358     atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
 359                    lastSym->desc, atomScope(lastSym->scope),
 360                    section.address + section.content.size(),
 361                    scatterable, copyRefs);
 362   }
 363
 364   // If object built without .subsections_via_symbols, add reference chain.
 365   if (!scatterable) {
 366     MachODefinedAtom *prevAtom = nullptr;
 367     file.eachAtomInSection(section,
 368                            [&](MachODefinedAtom *atom, uint64_t offset)->void {
 369       if (prevAtom)
 370         prevAtom->addReference(Reference::KindNamespace::all,
 371                                Reference::KindArch::all,
 372                                Reference::kindLayoutAfter, 0, atom, 0);
 373       prevAtom = atom;
 374     });
 375   }
 376
 377   return llvm::Error::success();
 378 }
 379
 380 llvm::Error processSection(DefinedAtom::ContentType atomType,
 381                            const Section &section,
 382                            bool customSectionName,
 383                            const NormalizedFile &normalizedFile,
 384                            MachOFile &file, bool scatterable,
 385                            bool copyRefs) {
 386   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
 387   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
 388
 389   // Get info on how to atomize section.
 390   unsigned int       sizeMultiple;
 391   DefinedAtom::Scope scope;
 392   DefinedAtom::Merge merge;
 393   AtomizeModel       atomizeModel;
 394   sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
 395
 396   // Validate section size.
 397   if ((section.content.size() % sizeMultiple) != 0)
 398     return llvm::make_error<GenericError>(Twine("Section ")
 399                                           + section.segmentName
 400                                           + "/" + section.sectionName
 401                                           + " has size ("
 402                                           + Twine(section.content.size())
 403                                           + ") which is not a multiple of "
 404                                           + Twine(sizeMultiple));
 405
 406   if (atomizeModel == atomizeAtSymbols) {
 407     // Break section up into atoms each with a fixed size.
 408     return processSymboledSection(atomType, section, normalizedFile, file,
 409                                   scatterable, copyRefs);
 410   } else {
 411     unsigned int size;
 412     for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
 413       switch (atomizeModel) {
 414       case atomizeFixedSize:
 415         // Break section up into atoms each with a fixed size.
 416         size = sizeMultiple;
 417         break;
 418       case atomizePointerSize:
 419         // Break section up into atoms each the size of a pointer.
 420         size = is64 ? 8 : 4;
 421         break;
 422       case atomizeUTF8:
 423         // Break section up into zero terminated c-strings.
 424         size = 0;
 425         for (unsigned int i = offset; i < e; ++i) {
 426           if (section.content[i] == 0) {
 427             size = i + 1 - offset;
 428             break;
 429           }
 430         }
 431         break;
 432       case atomizeUTF16:
 433         // Break section up into zero terminated UTF16 strings.
 434         size = 0;
 435         for (unsigned int i = offset; i < e; i += 2) {
 436           if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
 437             size = i + 2 - offset;
 438             break;
 439           }
 440         }
 441         break;
 442       case atomizeCFI:
 443         // Break section up into dwarf unwind CFIs (FDE or CIE).
 444         size = read32(&section.content[offset], isBig) + 4;
 445         if (offset+size > section.content.size()) {
 446           return llvm::make_error<GenericError>(Twine("Section ")
 447                                                 + section.segmentName
 448                                                 + "/" + section.sectionName
 449                                                 + " is malformed.  Size of CFI "
 450                                                 "starting at offset ("
 451                                                 + Twine(offset)
 452                                                 + ") is past end of section.");
 453         }
 454         break;
 455       case atomizeCU:
 456         // Break section up into compact unwind entries.
 457         size = is64 ? 32 : 20;
 458         break;
 459       case atomizeCFString:
 460         // Break section up into NS/CFString objects.
 461         size = is64 ? 32 : 16;
 462         break;
 463       case atomizeAtSymbols:
 464         break;
 465       }
 466       if (size == 0) {
 467         return llvm::make_error<GenericError>(Twine("Section ")
 468                                               + section.segmentName
 469                                               + "/" + section.sectionName
 470                                               + " is malformed.  The last atom "
 471                                               "is not zero terminated.");
 472       }
 473       if (customSectionName) {
 474         // Mach-O needs a segment and section name.  Concatentate those two
 475         // with a / separator (e.g. "seg/sect") to fit into the lld model
 476         // of just a section name.
 477         std::string segSectName = section.segmentName.str()
 478                                   + "/" + section.sectionName.str();
 479         file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
 480                                            merge, false, false, offset,
 481                                            size, segSectName, true, &section);
 482       } else {
 483         file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
 484                             false, false, copyRefs, &section);
 485       }
 486       offset += size;
 487     }
 488   }
 489   return llvm::Error::success();
 490 }
 491
 492 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
 493                                           uint64_t address) {
 494   for (const Section &s : normalizedFile.sections) {
 495     uint64_t sAddr = s.address;
 496     if ((sAddr <= address) && (address < sAddr+s.content.size())) {
 497       return &s;
 498     }
 499   }
 500   return nullptr;
 501 }
 502
 503 const MachODefinedAtom *
 504 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
 505                         uint64_t addr, Reference::Addend &addend) {
 506   const Section *sect = nullptr;
 507   sect = findSectionCoveringAddress(normalizedFile, addr);
 508   if (!sect)
 509     return nullptr;
 510
 511   uint32_t offsetInTarget;
 512   uint64_t offsetInSect = addr - sect->address;
 513   auto atom =
 514       file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
 515   addend = offsetInTarget;
 516   return atom;
 517 }
 518
 519 // Walks all relocations for a section in a normalized .o file and
 520 // creates corresponding lld::Reference objects.
 521 llvm::Error convertRelocs(const Section &section,
 522                           const NormalizedFile &normalizedFile,
 523                           bool scatterable,
 524                           MachOFile &file,
 525                           ArchHandler &handler) {
 526   // Utility function for ArchHandler to find atom by its address.
 527   auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
 528                          const lld::Atom **atom, Reference::Addend *addend)
 529                          -> llvm::Error {
 530     if (sectIndex > normalizedFile.sections.size())
 531       return llvm::make_error<GenericError>(Twine("out of range section "
 532                                      "index (") + Twine(sectIndex) + ")");
 533     const Section *sect = nullptr;
 534     if (sectIndex == 0) {
 535       sect = findSectionCoveringAddress(normalizedFile, addr);
 536       if (!sect)
 537         return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
 538                                        + ") is not in any section"));
 539     } else {
 540       sect = &normalizedFile.sections[sectIndex-1];
 541     }
 542     uint32_t offsetInTarget;
 543     uint64_t offsetInSect = addr - sect->address;
 544     *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
 545     *addend = offsetInTarget;
 546     return llvm::Error::success();
 547   };
 548
 549   // Utility function for ArchHandler to find atom by its symbol index.
 550   auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
 551                            -> llvm::Error {
 552     // Find symbol from index.
 553     const Symbol *sym = nullptr;
 554     uint32_t numStabs  = normalizedFile.stabsSymbols.size();
 555     uint32_t numLocal  = normalizedFile.localSymbols.size();
 556     uint32_t numGlobal = normalizedFile.globalSymbols.size();
 557     uint32_t numUndef  = normalizedFile.undefinedSymbols.size();
 558     assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
 559     if (symbolIndex < numStabs+numLocal) {
 560       sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
 561     } else if (symbolIndex < numStabs+numLocal+numGlobal) {
 562       sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
 563     } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
 564       sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
 565                                              numGlobal];
 566     } else {
 567       return llvm::make_error<GenericError>(Twine("symbol index (")
 568                                      + Twine(symbolIndex) + ") out of range");
 569     }
 570
 571     // Find atom from symbol.
 572     if ((sym->type & N_TYPE) == N_SECT) {
 573       if (sym->sect > normalizedFile.sections.size())
 574         return llvm::make_error<GenericError>(Twine("symbol section index (")
 575                                         + Twine(sym->sect) + ") out of range ");
 576       const Section &symSection = normalizedFile.sections[sym->sect-1];
 577       uint64_t targetOffsetInSect = sym->value - symSection.address;
 578       MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
 579                                                             targetOffsetInSect);
 580       if (target) {
 581         *result = target;
 582         return llvm::Error::success();
 583       }
 584       return llvm::make_error<GenericError>("no atom found for defined symbol");
 585     } else if ((sym->type & N_TYPE) == N_UNDF) {
 586       const lld::Atom *target = file.findUndefAtom(sym->name);
 587       if (target) {
 588         *result = target;
 589         return llvm::Error::success();
 590       }
 591       return llvm::make_error<GenericError>("no undefined atom found for sym");
 592     } else {
 593       // Search undefs
 594       return llvm::make_error<GenericError>("no atom found for symbol");
 595     }
 596   };
 597
 598   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
 599   // Use old-school iterator so that paired relocations can be grouped.
 600   for (auto it=section.relocations.begin(), e=section.relocations.end();
 601                                                                 it != e; ++it) {
 602     const Relocation &reloc = *it;
 603     // Find atom this relocation is in.
 604     if (reloc.offset > section.content.size())
 605       return llvm::make_error<GenericError>(
 606                                     Twine("r_address (") + Twine(reloc.offset)
 607                                     + ") is larger than section size ("
 608                                     + Twine(section.content.size()) + ")");
 609     uint32_t offsetInAtom;
 610     MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
 611                                                             reloc.offset,
 612                                                             &offsetInAtom);
 613     assert(inAtom && "r_address in range, should have found atom");
 614     uint64_t fixupAddress = section.address + reloc.offset;
 615
 616     const lld::Atom *target = nullptr;
 617     Reference::Addend addend = 0;
 618     Reference::KindValue kind;
 619     if (handler.isPairedReloc(reloc)) {
 620       // Handle paired relocations together.
 621       const Relocation &reloc2 = *++it;
 622       auto relocErr = handler.getPairReferenceInfo(
 623           reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
 624           atomByAddr, atomBySymbol, &kind, &target, &addend);
 625       if (relocErr) {
 626         return handleErrors(std::move(relocErr),
 627                             [&](std::unique_ptr<GenericError> GE) {
 628           return llvm::make_error<GenericError>(
 629             Twine("bad relocation (") + GE->getMessage()
 630              + ") in section "
 631              + section.segmentName + "/" + section.sectionName
 632              + " (r1_address=" + Twine::utohexstr(reloc.offset)
 633              + ", r1_type=" + Twine(reloc.type)
 634              + ", r1_extern=" + Twine(reloc.isExtern)
 635              + ", r1_length=" + Twine((int)reloc.length)
 636              + ", r1_pcrel=" + Twine(reloc.pcRel)
 637              + (!reloc.scattered ? (Twine(", r1_symbolnum=")
 638                                     + Twine(reloc.symbol))
 639                                  : (Twine(", r1_scattered=1, r1_value=")
 640                                     + Twine(reloc.value)))
 641              + ")"
 642              + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
 643              + ", r2_type=" + Twine(reloc2.type)
 644              + ", r2_extern=" + Twine(reloc2.isExtern)
 645              + ", r2_length=" + Twine((int)reloc2.length)
 646              + ", r2_pcrel=" + Twine(reloc2.pcRel)
 647              + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
 648                                      + Twine(reloc2.symbol))
 649                                   : (Twine(", r2_scattered=1, r2_value=")
 650                                      + Twine(reloc2.value)))
 651              + ")" );
 652           });
 653       }
 654     }
 655     else {
 656       // Use ArchHandler to convert relocation record into information
 657       // needed to instantiate an lld::Reference object.
 658       auto relocErr = handler.getReferenceInfo(
 659           reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
 660           atomBySymbol, &kind, &target, &addend);
 661       if (relocErr) {
 662         return handleErrors(std::move(relocErr),
 663                             [&](std::unique_ptr<GenericError> GE) {
 664           return llvm::make_error<GenericError>(
 665             Twine("bad relocation (") + GE->getMessage()
 666              + ") in section "
 667              + section.segmentName + "/" + section.sectionName
 668              + " (r_address=" + Twine::utohexstr(reloc.offset)
 669              + ", r_type=" + Twine(reloc.type)
 670              + ", r_extern=" + Twine(reloc.isExtern)
 671              + ", r_length=" + Twine((int)reloc.length)
 672              + ", r_pcrel=" + Twine(reloc.pcRel)
 673              + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
 674                                  : (Twine(", r_scattered=1, r_value=")
 675                                     + Twine(reloc.value)))
 676              + ")" );
 677           });
 678       }
 679     }
 680     // Instantiate an lld::Reference object and add to its atom.
 681     inAtom->addReference(Reference::KindNamespace::mach_o,
 682                          handler.kindArch(),
 683                          kind, offsetInAtom, target, addend);
 684   }
 685
 686   return llvm::Error::success();
 687 }
 688
 689 bool isDebugInfoSection(const Section &section) {
 690   if ((section.attributes & S_ATTR_DEBUG) == 0)
 691     return false;
 692   return section.segmentName.equals("__DWARF");
 693 }
 694
 695 static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
 696   std::string strName = name.str();
 697   for (auto *atom : file.defined())
 698     if (atom->name() == strName)
 699       return atom;
 700   return nullptr;
 701 }
 702
 703 static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
 704   char *strCopy = alloc.Allocate<char>(str.size() + 1);
 705   memcpy(strCopy, str.data(), str.size());
 706   strCopy[str.size()] = '\0';
 707   return strCopy;
 708 }
 709
 710 llvm::Error parseStabs(MachOFile &file,
 711                        const NormalizedFile &normalizedFile,
 712                        bool copyRefs) {
 713
 714   if (normalizedFile.stabsSymbols.empty())
 715     return llvm::Error::success();
 716
 717   // FIXME: Kill this off when we can move to sane yaml parsing.
 718   std::unique_ptr<BumpPtrAllocator> allocator;
 719   if (copyRefs)
 720     allocator = llvm::make_unique<BumpPtrAllocator>();
 721
 722   enum { start, inBeginEnd } state = start;
 723
 724   const Atom *currentAtom = nullptr;
 725   uint64_t currentAtomAddress = 0;
 726   StabsDebugInfo::StabsList stabsList;
 727   for (const auto &stabSym : normalizedFile.stabsSymbols) {
 728     Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
 729               stabSym.value, stabSym.name);
 730     switch (state) {
 731     case start:
 732       switch (static_cast<StabType>(stabSym.type)) {
 733       case N_BNSYM:
 734         state = inBeginEnd;
 735         currentAtomAddress = stabSym.value;
 736         Reference::Addend addend;
 737         currentAtom = findAtomCoveringAddress(normalizedFile, file,
 738                                               currentAtomAddress, addend);
 739         if (addend != 0)
 740           return llvm::make_error<GenericError>(
 741                    "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
 742                    file.path());
 743         if (currentAtom)
 744           stab.atom = currentAtom;
 745         else {
 746           // FIXME: ld64 just issues a warning here - should we match that?
 747           return llvm::make_error<GenericError>(
 748                    "can't find atom for stabs BNSYM at " +
 749                    Twine::utohexstr(stabSym.value) + " in " + file.path());
 750         }
 751         break;
 752       case N_SO:
 753       case N_OSO:
 754         // Not associated with an atom, just copy.
 755         if (copyRefs)
 756           stab.str = copyDebugString(stabSym.name, *allocator);
 757         else
 758           stab.str = stabSym.name;
 759         break;
 760       case N_GSYM: {
 761         auto colonIdx = stabSym.name.find(':');
 762         if (colonIdx != StringRef::npos) {
 763           StringRef name = stabSym.name.substr(0, colonIdx);
 764           currentAtom = findDefinedAtomByName(file, "_" + name);
 765           stab.atom = currentAtom;
 766           if (copyRefs)
 767             stab.str = copyDebugString(stabSym.name, *allocator);
 768           else
 769             stab.str = stabSym.name;
 770         } else {
 771           currentAtom = findDefinedAtomByName(file, stabSym.name);
 772           stab.atom = currentAtom;
 773           if (copyRefs)
 774             stab.str = copyDebugString(stabSym.name, *allocator);
 775           else
 776             stab.str = stabSym.name;
 777         }
 778         if (stab.atom == nullptr)
 779           return llvm::make_error<GenericError>(
 780                    "can't find atom for N_GSYM stabs" + stabSym.name +
 781                    " in " + file.path());
 782         break;
 783       }
 784       case N_FUN:
 785         return llvm::make_error<GenericError>(
 786                  "old-style N_FUN stab '" + stabSym.name + "' unsupported");
 787       default:
 788         return llvm::make_error<GenericError>(
 789                  "unrecognized stab symbol '" + stabSym.name + "'");
 790       }
 791       break;
 792     case inBeginEnd:
 793       stab.atom = currentAtom;
 794       switch (static_cast<StabType>(stabSym.type)) {
 795       case N_ENSYM:
 796         state = start;
 797         currentAtom = nullptr;
 798         break;
 799       case N_FUN:
 800         // Just copy the string.
 801         if (copyRefs)
 802           stab.str = copyDebugString(stabSym.name, *allocator);
 803         else
 804           stab.str = stabSym.name;
 805         break;
 806       default:
 807         return llvm::make_error<GenericError>(
 808                  "unrecognized stab symbol '" + stabSym.name + "'");
 809       }
 810     }
 811     llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
 812     stabsList.push_back(stab);
 813   }
 814
 815   file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
 816
 817   // FIXME: Kill this off when we fix YAML memory ownership.
 818   file.debugInfo()->setAllocator(std::move(allocator));
 819
 820   return llvm::Error::success();
 821 }
 822
 823 static llvm::DataExtractor
 824 dataExtractorFromSection(const NormalizedFile &normalizedFile,
 825                          const Section &S) {
 826   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
 827   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
 828   StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
 829                     S.content.size());
 830   return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
 831 }
 832
 833 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 834 //        inspection" code if possible.
 835 static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
 836                                   uint64_t abbrCode) {
 837   uint64_t curCode;
 838   uint32_t offset = 0;
 839   while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
 840     // Tag
 841     abbrevData.getULEB128(&offset);
 842     // DW_CHILDREN
 843     abbrevData.getU8(&offset);
 844     // Attributes
 845     while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
 846       ;
 847   }
 848   return offset;
 849 }
 850
 851 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 852 //        inspection" code if possible.
 853 static Expected<const char *>
 854 getIndexedString(const NormalizedFile &normalizedFile,
 855                  llvm::dwarf::Form form, llvm::DataExtractor infoData,
 856                  uint32_t &infoOffset, const Section &stringsSection) {
 857   if (form == llvm::dwarf::DW_FORM_string)
 858    return infoData.getCStr(&infoOffset);
 859   if (form != llvm::dwarf::DW_FORM_strp)
 860     return llvm::make_error<GenericError>(
 861         "string field encoded without DW_FORM_strp");
 862   uint32_t stringOffset = infoData.getU32(&infoOffset);
 863   llvm::DataExtractor stringsData =
 864     dataExtractorFromSection(normalizedFile, stringsSection);
 865   return stringsData.getCStr(&stringOffset);
 866 }
 867
 868 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 869 //        inspection" code if possible.
 870 static llvm::Expected<TranslationUnitSource>
 871 readCompUnit(const NormalizedFile &normalizedFile,
 872              const Section &info,
 873              const Section &abbrev,
 874              const Section &strings,
 875              StringRef path) {
 876   // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
 877   //        inspection" code if possible.
 878   uint32_t offset = 0;
 879   llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
 880   auto infoData = dataExtractorFromSection(normalizedFile, info);
 881   uint32_t length = infoData.getU32(&offset);
 882   if (length == 0xffffffff) {
 883     Format = llvm::dwarf::DwarfFormat::DWARF64;
 884     infoData.getU64(&offset);
 885   }
 886   else if (length > 0xffffff00)
 887     return llvm::make_error<GenericError>("Malformed DWARF in " + path);
 888
 889   uint16_t version = infoData.getU16(&offset);
 890
 891   if (version < 2 || version > 4)
 892     return llvm::make_error<GenericError>("Unsupported DWARF version in " +
 893                                           path);
 894
 895   infoData.getU32(&offset); // Abbrev offset (should be zero)
 896   uint8_t addrSize = infoData.getU8(&offset);
 897
 898   uint32_t abbrCode = infoData.getULEB128(&offset);
 899   auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
 900   uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
 901   uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
 902   if (tag != llvm::dwarf::DW_TAG_compile_unit)
 903     return llvm::make_error<GenericError>("top level DIE is not a compile unit");
 904   // DW_CHILDREN
 905   abbrevData.getU8(&abbrevOffset);
 906   uint32_t name;
 907   llvm::dwarf::Form form;
 908   llvm::dwarf::FormParams formParams = {version, addrSize, Format};
 909   TranslationUnitSource tu;
 910   while ((name = abbrevData.getULEB128(&abbrevOffset)) |
 911          (form = static_cast<llvm::dwarf::Form>(
 912              abbrevData.getULEB128(&abbrevOffset))) &&
 913          (name != 0 || form != 0)) {
 914     switch (name) {
 915     case llvm::dwarf::DW_AT_name: {
 916       if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
 917                                         strings))
 918           tu.name = *eName;
 919       else
 920         return eName.takeError();
 921       break;
 922     }
 923     case llvm::dwarf::DW_AT_comp_dir: {
 924       if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
 925                                         strings))
 926         tu.path = *eName;
 927       else
 928         return eName.takeError();
 929       break;
 930     }
 931     default:
 932       llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
 933     }
 934   }
 935   return tu;
 936 }
 937
 938 llvm::Error parseDebugInfo(MachOFile &file,
 939                            const NormalizedFile &normalizedFile, bool copyRefs) {
 940
 941   // Find the interesting debug info sections.
 942   const Section *debugInfo = nullptr;
 943   const Section *debugAbbrev = nullptr;
 944   const Section *debugStrings = nullptr;
 945
 946   for (auto &s : normalizedFile.sections) {
 947     if (s.segmentName == "__DWARF") {
 948       if (s.sectionName == "__debug_info")
 949         debugInfo = &s;
 950       else if (s.sectionName == "__debug_abbrev")
 951         debugAbbrev = &s;
 952       else if (s.sectionName == "__debug_str")
 953         debugStrings = &s;
 954     }
 955   }
 956
 957   if (!debugInfo)
 958     return parseStabs(file, normalizedFile, copyRefs);
 959
 960   if (debugInfo->content.size() == 0)
 961     return llvm::Error::success();
 962
 963   if (debugInfo->content.size() < 12)
 964     return llvm::make_error<GenericError>("Malformed __debug_info section in " +
 965                                           file.path() + ": too small");
 966
 967   if (!debugAbbrev)
 968     return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
 969                                           file.path());
 970
 971   if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
 972                                   *debugStrings, file.path())) {
 973     // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
 974     //        memory ownership.
 975     std::unique_ptr<BumpPtrAllocator> allocator;
 976     if (copyRefs) {
 977       allocator = llvm::make_unique<BumpPtrAllocator>();
 978       tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
 979       tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
 980     }
 981     file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
 982     if (copyRefs)
 983       file.debugInfo()->setAllocator(std::move(allocator));
 984   } else
 985     return tuOrErr.takeError();
 986
 987   return llvm::Error::success();
 988 }
 989
 990 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
 991   if (is64)
 992     return read64(addr, isBig);
 993
 994   int32_t res = read32(addr, isBig);
 995   return res;
 996 }
 997
 998 /// --- Augmentation String Processing ---
 999
1000 struct CIEInfo {
1001   bool _augmentationDataPresent = false;
1002   bool _mayHaveEH = false;
1003   uint32_t _offsetOfLSDA = ~0U;
1004   uint32_t _offsetOfPersonality = ~0U;
1005   uint32_t _offsetOfFDEPointerEncoding = ~0U;
1006   uint32_t _augmentationDataLength = ~0U;
1007 };
1008
1009 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1010
1011 static llvm::Error processAugmentationString(const uint8_t *augStr,
1012                                              CIEInfo &cieInfo,
1013                                              unsigned &len) {
1014
1015   if (augStr[0] == '\0') {
1016     len = 1;
1017     return llvm::Error::success();
1018   }
1019
1020   if (augStr[0] != 'z')
1021     return llvm::make_error<GenericError>("expected 'z' at start of "
1022                                           "augmentation string");
1023
1024   cieInfo._augmentationDataPresent = true;
1025   uint64_t idx = 1;
1026
1027   uint32_t offsetInAugmentationData = 0;
1028   while (augStr[idx] != '\0') {
1029     if (augStr[idx] == 'L') {
1030       cieInfo._offsetOfLSDA = offsetInAugmentationData;
1031       // This adds a single byte to the augmentation data.
1032       ++offsetInAugmentationData;
1033       ++idx;
1034       continue;
1035     }
1036     if (augStr[idx] == 'P') {
1037       cieInfo._offsetOfPersonality = offsetInAugmentationData;
1038       // This adds a single byte to the augmentation data for the encoding,
1039       // then a number of bytes for the pointer data.
1040       // FIXME: We are assuming 4 is correct here for the pointer size as we
1041       // always currently use delta32ToGOT.
1042       offsetInAugmentationData += 5;
1043       ++idx;
1044       continue;
1045     }
1046     if (augStr[idx] == 'R') {
1047       cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1048       // This adds a single byte to the augmentation data.
1049       ++offsetInAugmentationData;
1050       ++idx;
1051       continue;
1052     }
1053     if (augStr[idx] == 'e') {
1054       if (augStr[idx + 1] != 'h')
1055         return llvm::make_error<GenericError>("expected 'eh' in "
1056                                               "augmentation string");
1057       cieInfo._mayHaveEH = true;
1058       idx += 2;
1059       continue;
1060     }
1061     ++idx;
1062   }
1063
1064   cieInfo._augmentationDataLength = offsetInAugmentationData;
1065
1066   len = idx + 1;
1067   return llvm::Error::success();
1068 }
1069
1070 static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1071                               MachOFile &file,
1072                               mach_o::ArchHandler &handler,
1073                               const Section *ehFrameSection,
1074                               MachODefinedAtom *atom,
1075                               uint64_t offset,
1076                               CIEInfoMap &cieInfos) {
1077   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1078   const uint8_t *frameData = atom->rawContent().data();
1079
1080   CIEInfo cieInfo;
1081
1082   uint32_t size = read32(frameData, isBig);
1083   uint64_t cieIDField = size == 0xffffffffU
1084                           ? sizeof(uint32_t) + sizeof(uint64_t)
1085                           : sizeof(uint32_t);
1086   uint64_t versionField = cieIDField + sizeof(uint32_t);
1087   uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1088
1089   unsigned augmentationStringLength = 0;
1090   if (auto err = processAugmentationString(frameData + augmentationStringField,
1091                                            cieInfo, augmentationStringLength))
1092     return err;
1093
1094   if (cieInfo._offsetOfPersonality != ~0U) {
1095     // If we have augmentation data for the personality function, then we may
1096     // need to implicitly generate its relocation.
1097
1098     // Parse the EH Data field which is pointer sized.
1099     uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1100     const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1101     unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
1102
1103     // Parse Code Align Factor which is a ULEB128.
1104     uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1105     unsigned lengthFieldSize = 0;
1106     llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1107
1108     // Parse Data Align Factor which is a SLEB128.
1109     uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1110     llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1111
1112     // Parse Return Address Register which is a byte.
1113     uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1114
1115     // Parse the augmentation length which is a ULEB128.
1116     uint64_t AugmentationLengthField = ReturnAddressField + 1;
1117     uint64_t AugmentationLength =
1118       llvm::decodeULEB128(frameData + AugmentationLengthField,
1119                           &lengthFieldSize);
1120
1121     if (AugmentationLength != cieInfo._augmentationDataLength)
1122       return llvm::make_error<GenericError>("CIE augmentation data length "
1123                                             "mismatch");
1124
1125     // Get the start address of the augmentation data.
1126     uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1127
1128     // Parse the personality function from the augmentation data.
1129     uint64_t PersonalityField =
1130       AugmentationDataField + cieInfo._offsetOfPersonality;
1131
1132     // Parse the personality encoding.
1133     // FIXME: Verify that this is a 32-bit pcrel offset.
1134     uint64_t PersonalityFunctionField = PersonalityField + 1;
1135
1136     if (atom->begin() != atom->end()) {
1137       // If we have an explicit relocation, then make sure it matches this
1138       // offset as this is where we'd expect it to be applied to.
1139       DefinedAtom::reference_iterator CurrentRef = atom->begin();
1140       if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1141         return llvm::make_error<GenericError>("CIE personality reloc at "
1142                                               "wrong offset");
1143
1144       if (++CurrentRef != atom->end())
1145         return llvm::make_error<GenericError>("CIE contains too many relocs");
1146     } else {
1147       // Implicitly generate the personality function reloc.  It's assumed to
1148       // be a delta32 offset to a GOT entry.
1149       // FIXME: Parse the encoding and check this.
1150       int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1151       uint64_t funcAddress = ehFrameSection->address + offset +
1152                              PersonalityFunctionField;
1153       funcAddress += funcDelta;
1154
1155       const MachODefinedAtom *func = nullptr;
1156       Reference::Addend addend;
1157       func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1158                                      addend);
1159       atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1160                          handler.unwindRefToPersonalityFunctionKind(),
1161                          PersonalityFunctionField, func, addend);
1162     }
1163   } else if (atom->begin() != atom->end()) {
1164     // Otherwise, we expect there to be no relocations in this atom as the only
1165     // relocation would have been to the personality function.
1166     return llvm::make_error<GenericError>("unexpected relocation in CIE");
1167   }
1168
1169
1170   cieInfos[atom] = std::move(cieInfo);
1171
1172   return llvm::Error::success();
1173 }
1174
1175 static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1176                               MachOFile &file,
1177                               mach_o::ArchHandler &handler,
1178                               const Section *ehFrameSection,
1179                               MachODefinedAtom *atom,
1180                               uint64_t offset,
1181                               const CIEInfoMap &cieInfos) {
1182
1183   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1184   const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1185
1186   // Compiler wasn't lazy and actually told us what it meant.
1187   // Unfortunately, the compiler may not have generated references for all of
1188   // [cie, func, lsda] and so we still need to parse the FDE and add references
1189   // for any the compiler didn't generate.
1190   if (atom->begin() != atom->end())
1191     atom->sortReferences();
1192
1193   DefinedAtom::reference_iterator CurrentRef = atom->begin();
1194
1195   // This helper returns the reference (if one exists) at the offset we are
1196   // currently processing.  It automatically increments the ref iterator if we
1197   // do return a ref, and throws an error if we pass over a ref without
1198   // comsuming it.
1199   auto currentRefGetter = [&CurrentRef,
1200                            &atom](uint64_t Offset)->const Reference* {
1201     // If there are no more refs found, then we are done.
1202     if (CurrentRef == atom->end())
1203       return nullptr;
1204
1205     const Reference *Ref = *CurrentRef;
1206
1207     // If we haven't reached the offset for this reference, then return that
1208     // we don't yet have a reference to process.
1209     if (Offset < Ref->offsetInAtom())
1210       return nullptr;
1211
1212     // If the offset is equal, then we want to process this ref.
1213     if (Offset == Ref->offsetInAtom()) {
1214       ++CurrentRef;
1215       return Ref;
1216     }
1217
1218     // The current ref is at an offset which is earlier than the current
1219     // offset, then we failed to consume it when we should have.  In this case
1220     // throw an error.
1221     llvm::report_fatal_error("Skipped reference when processing FDE");
1222   };
1223
1224   // Helper to either get the reference at this current location, and verify
1225   // that it is of the expected type, or add a reference of that type.
1226   // Returns the reference target.
1227   auto verifyOrAddReference = [&](uint64_t targetAddress,
1228                                   Reference::KindValue refKind,
1229                                   uint64_t refAddress,
1230                                   bool allowsAddend)->const Atom* {
1231     if (auto *ref = currentRefGetter(refAddress)) {
1232       // The compiler already emitted a relocation for the CIE ref.  This should
1233       // have been converted to the correct type of reference in
1234       // get[Pair]ReferenceInfo().
1235       assert(ref->kindValue() == refKind &&
1236              "Incorrect EHFrame reference kind");
1237       return ref->target();
1238     }
1239     Reference::Addend addend;
1240     auto *target = findAtomCoveringAddress(normalizedFile, file,
1241                                            targetAddress, addend);
1242     atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1243                        refKind, refAddress, target, addend);
1244
1245     if (!allowsAddend)
1246       assert(!addend && "EHFrame reference cannot have addend");
1247     return target;
1248   };
1249
1250   const uint8_t *startFrameData = atom->rawContent().data();
1251   const uint8_t *frameData = startFrameData;
1252
1253   uint32_t size = read32(frameData, isBig);
1254   uint64_t cieFieldInFDE = size == 0xffffffffU
1255     ? sizeof(uint32_t) + sizeof(uint64_t)
1256     : sizeof(uint32_t);
1257
1258   // Linker needs to fixup a reference from the FDE to its parent CIE (a
1259   // 32-bit byte offset backwards in the __eh_frame section).
1260   uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1261   uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1262   cieAddress -= cieDelta;
1263
1264   auto *cieRefTarget = verifyOrAddReference(cieAddress,
1265                                             handler.unwindRefToCIEKind(),
1266                                             cieFieldInFDE, false);
1267   const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1268   assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1269          "FDE's CIE field does not point at the start of a CIE.");
1270
1271   const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1272
1273   // Linker needs to fixup reference from the FDE to the function it's
1274   // describing. FIXME: there are actually different ways to do this, and the
1275   // particular method used is specified in the CIE's augmentation fields
1276   // (hopefully)
1277   uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1278
1279   int64_t functionFromFDE = readSPtr(is64, isBig,
1280                                      frameData + rangeFieldInFDE);
1281   uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1282   rangeStart += functionFromFDE;
1283
1284   verifyOrAddReference(rangeStart,
1285                        handler.unwindRefToFunctionKind(),
1286                        rangeFieldInFDE, true);
1287
1288   // Handle the augmentation data if there is any.
1289   if (cieInfo._augmentationDataPresent) {
1290     // First process the augmentation data length field.
1291     uint64_t augmentationDataLengthFieldInFDE =
1292       rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
1293     unsigned lengthFieldSize = 0;
1294     uint64_t augmentationDataLength =
1295       llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1296                           &lengthFieldSize);
1297
1298     if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
1299
1300       // Look at the augmentation data field.
1301       uint64_t augmentationDataFieldInFDE =
1302         augmentationDataLengthFieldInFDE + lengthFieldSize;
1303
1304       int64_t lsdaFromFDE = readSPtr(is64, isBig,
1305                                      frameData + augmentationDataFieldInFDE);
1306       uint64_t lsdaStart =
1307         ehFrameSection->address + offset + augmentationDataFieldInFDE +
1308         lsdaFromFDE;
1309
1310       verifyOrAddReference(lsdaStart,
1311                            handler.unwindRefToFunctionKind(),
1312                            augmentationDataFieldInFDE, true);
1313     }
1314   }
1315
1316   return llvm::Error::success();
1317 }
1318
1319 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1320                                  MachOFile &file,
1321                                  mach_o::ArchHandler &handler) {
1322
1323   const Section *ehFrameSection = nullptr;
1324   for (auto &section : normalizedFile.sections)
1325     if (section.segmentName == "__TEXT" &&
1326         section.sectionName == "__eh_frame") {
1327       ehFrameSection = &section;
1328       break;
1329     }
1330
1331   // No __eh_frame so nothing to do.
1332   if (!ehFrameSection)
1333     return llvm::Error::success();
1334
1335   llvm::Error ehFrameErr = llvm::Error::success();
1336   CIEInfoMap cieInfos;
1337
1338   file.eachAtomInSection(*ehFrameSection,
1339                          [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1340     assert(atom->contentType() == DefinedAtom::typeCFI);
1341
1342     // Bail out if we've encountered an error.
1343     if (ehFrameErr)
1344       return;
1345
1346     const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1347     if (ArchHandler::isDwarfCIE(isBig, atom))
1348       ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1349                               atom, offset, cieInfos);
1350     else
1351       ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1352                               atom, offset, cieInfos);
1353   });
1354
1355   return ehFrameErr;
1356 }
1357
1358 llvm::Error parseObjCImageInfo(const Section &sect,
1359                                const NormalizedFile &normalizedFile,
1360                                MachOFile &file) {
1361
1362   //    struct objc_image_info  {
1363   //            uint32_t        version;        // initially 0
1364   //            uint32_t        flags;
1365   //    };
1366
1367   ArrayRef<uint8_t> content = sect.content;
1368   if (content.size() != 8)
1369     return llvm::make_error<GenericError>(sect.segmentName + "/" +
1370                                           sect.sectionName +
1371                                           " in file " + file.path() +
1372                                           " should be 8 bytes in size");
1373
1374   const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1375   uint32_t version = read32(content.data(), isBig);
1376   if (version)
1377     return llvm::make_error<GenericError>(sect.segmentName + "/" +
1378                                           sect.sectionName +
1379                                           " in file " + file.path() +
1380                                           " should have version=0");
1381
1382   uint32_t flags = read32(content.data() + 4, isBig);
1383   if (flags & (MachOLinkingContext::objc_supports_gc |
1384                MachOLinkingContext::objc_gc_only))
1385     return llvm::make_error<GenericError>(sect.segmentName + "/" +
1386                                           sect.sectionName +
1387                                           " in file " + file.path() +
1388                                           " uses GC.  This is not supported");
1389
1390   if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1391     file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1392   else
1393     file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1394
1395   file.setSwiftVersion((flags >> 8) & 0xFF);
1396
1397   return llvm::Error::success();
1398 }
1399
1400 /// Converts normalized mach-o file into an lld::File and lld::Atoms.
1401 llvm::Expected<std::unique_ptr<lld::File>>
1402 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1403               bool copyRefs) {
1404   std::unique_ptr<MachOFile> file(new MachOFile(path));
1405   if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1406     return std::move(ec);
1407   return std::unique_ptr<File>(std::move(file));
1408 }
1409
1410 llvm::Expected<std::unique_ptr<lld::File>>
1411 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1412              bool copyRefs) {
1413   // Instantiate SharedLibraryFile object.
1414   std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1415   if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1416     return std::move(ec);
1417   return std::unique_ptr<File>(std::move(file));
1418 }
1419
1420 } // anonymous namespace
1421
1422 namespace normalized {
1423
1424 static bool isObjCImageInfo(const Section &sect) {
1425   return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1426     (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
1427 }
1428
1429 llvm::Error
1430 normalizedObjectToAtoms(MachOFile *file,
1431                         const NormalizedFile &normalizedFile,
1432                         bool copyRefs) {
1433   LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1434                           << file->path() << "\n");
1435   bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1436
1437   // Create atoms from each section.
1438   for (auto &sect : normalizedFile.sections) {
1439
1440     // If this is a debug-info section parse it specially.
1441     if (isDebugInfoSection(sect))
1442       continue;
1443
1444     // If the file contains an objc_image_info struct, then we should parse the
1445     // ObjC flags and Swift version.
1446     if (isObjCImageInfo(sect)) {
1447       if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1448         return ec;
1449       // We then skip adding atoms for this section as we use the ObjCPass to
1450       // re-emit this data after it has been aggregated for all files.
1451       continue;
1452     }
1453
1454     bool customSectionName;
1455     DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1456                                                             customSectionName);
1457     if (auto ec =  processSection(atomType, sect, customSectionName,
1458                                   normalizedFile, *file, scatterable, copyRefs))
1459       return ec;
1460   }
1461   // Create atoms from undefined symbols.
1462   for (auto &sym : normalizedFile.undefinedSymbols) {
1463     // Undefinded symbols with n_value != 0 are actually tentative definitions.
1464     if (sym.value == Hex64(0)) {
1465       file->addUndefinedAtom(sym.name, copyRefs);
1466     } else {
1467       file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1468                                 DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1469                                 copyRefs);
1470     }
1471   }
1472
1473   // Convert mach-o relocations to References
1474   std::unique_ptr<mach_o::ArchHandler> handler
1475                                      = ArchHandler::create(normalizedFile.arch);
1476   for (auto &sect : normalizedFile.sections) {
1477     if (isDebugInfoSection(sect))
1478       continue;
1479     if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1480                                        *file, *handler))
1481       return ec;
1482   }
1483
1484   // Add additional arch-specific References
1485   file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
1486     handler->addAdditionalReferences(*atom);
1487   });
1488
1489   // Each __eh_frame section needs references to both __text (the function we're
1490   // providing unwind info for) and itself (FDE -> CIE). These aren't
1491   // represented in the relocations on some architectures, so we have to add
1492   // them back in manually there.
1493   if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1494     return ec;
1495
1496   // Process mach-o data-in-code regions array. That information is encoded in
1497   // atoms as References at each transition point.
1498   unsigned nextIndex = 0;
1499   for (const DataInCode &entry : normalizedFile.dataInCode) {
1500     ++nextIndex;
1501     const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1502     if (!s) {
1503       return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1504                                                   + Twine(entry.offset)
1505                                                   + ") is not in any section"));
1506     }
1507     uint64_t offsetInSect = entry.offset - s->address;
1508     uint32_t offsetInAtom;
1509     MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1510                                                            &offsetInAtom);
1511     if (offsetInAtom + entry.length > atom->size()) {
1512       return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1513                                                   "(offset="
1514                                                   + Twine(entry.offset)
1515                                                   + ", length="
1516                                                   + Twine(entry.length)
1517                                                   + ") crosses atom boundary."));
1518     }
1519     // Add reference that marks start of data-in-code.
1520     atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1521                        handler->dataInCodeTransitionStart(*atom),
1522                        offsetInAtom, atom, entry.kind);
1523
1524     // Peek at next entry, if it starts where this one ends, skip ending ref.
1525     if (nextIndex < normalizedFile.dataInCode.size()) {
1526       const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1527       if (nextEntry.offset == (entry.offset + entry.length))
1528         continue;
1529     }
1530
1531     // If data goes to end of function, skip ending ref.
1532     if ((offsetInAtom + entry.length) == atom->size())
1533       continue;
1534
1535     // Add reference that marks end of data-in-code.
1536     atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1537                        handler->dataInCodeTransitionEnd(*atom),
1538                        offsetInAtom+entry.length, atom, 0);
1539   }
1540
1541   // Cache some attributes on the file for use later.
1542   file->setFlags(normalizedFile.flags);
1543   file->setArch(normalizedFile.arch);
1544   file->setOS(normalizedFile.os);
1545   file->setMinVersion(normalizedFile.minOSverson);
1546   file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1547
1548   // Sort references in each atom to their canonical order.
1549   for (const DefinedAtom* defAtom : file->defined()) {
1550     reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1551   }
1552
1553   if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1554     return err;
1555
1556   return llvm::Error::success();
1557 }
1558
1559 llvm::Error
1560 normalizedDylibToAtoms(MachODylibFile *file,
1561                        const NormalizedFile &normalizedFile,
1562                        bool copyRefs) {
1563   file->setInstallName(normalizedFile.installName);
1564   file->setCompatVersion(normalizedFile.compatVersion);
1565   file->setCurrentVersion(normalizedFile.currentVersion);
1566
1567   // Tell MachODylibFile object about all symbols it exports.
1568   if (!normalizedFile.exportInfo.empty()) {
1569     // If exports trie exists, use it instead of traditional symbol table.
1570     for (const Export &exp : normalizedFile.exportInfo) {
1571       bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1572       // StringRefs from export iterator are ephemeral, so force copy.
1573       file->addExportedSymbol(exp.name, weakDef, true);
1574     }
1575   } else {
1576     for (auto &sym : normalizedFile.globalSymbols) {
1577       assert((sym.scope & N_EXT) && "only expect external symbols here");
1578       bool weakDef = (sym.desc & N_WEAK_DEF);
1579       file->addExportedSymbol(sym.name, weakDef, copyRefs);
1580     }
1581   }
1582   // Tell MachODylibFile object about all dylibs it re-exports.
1583   for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1584     if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1585       file->addReExportedDylib(dep.path);
1586   }
1587   return llvm::Error::success();
1588 }
1589
1590 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1591                                           StringRef &segmentName,
1592                                           StringRef &sectionName,
1593                                           SectionType &sectionType,
1594                                           SectionAttr &sectionAttrs,
1595                                           bool &relocsToDefinedCanBeImplicit) {
1596
1597   for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1598                                  p->atomType != DefinedAtom::typeUnknown; ++p) {
1599     if (p->atomType != atomType)
1600       continue;
1601     // Wild carded entries are ignored for reverse lookups.
1602     if (p->segmentName.empty() || p->sectionName.empty())
1603       continue;
1604     segmentName = p->segmentName;
1605     sectionName = p->sectionName;
1606     sectionType = p->sectionType;
1607     sectionAttrs = 0;
1608     relocsToDefinedCanBeImplicit = false;
1609     if (atomType == DefinedAtom::typeCode)
1610       sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1611     if (atomType == DefinedAtom::typeCFI)
1612       relocsToDefinedCanBeImplicit = true;
1613     return;
1614   }
1615   llvm_unreachable("content type not yet supported");
1616 }
1617
1618 llvm::Expected<std::unique_ptr<lld::File>>
1619 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1620                   bool copyRefs) {
1621   switch (normalizedFile.fileType) {
1622   case MH_DYLIB:
1623   case MH_DYLIB_STUB:
1624     return dylibToAtoms(normalizedFile, path, copyRefs);
1625   case MH_OBJECT:
1626     return objectToAtoms(normalizedFile, path, copyRefs);
1627   default:
1628     llvm_unreachable("unhandled MachO file type!");
1629   }
1630 }
1631
1632 } // namespace normalized
1633 } // namespace mach_o
1634 } // namespace lld