//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// // // The LLVM Linker // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file Converts from in-memory normalized mach-o to in-memory Atoms. /// /// +------------+ /// | normalized | /// +------------+ /// | /// | /// v /// +-------+ /// | Atoms | /// +-------+ #include "MachONormalizedFile.h" #include "ArchHandler.h" #include "Atoms.h" #include "File.h" #include "MachONormalizedFileBinaryUtils.h" #include "lld/Core/Error.h" #include "lld/Core/LLVM.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/MachO.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/raw_ostream.h" using namespace llvm::MachO; using namespace lld::mach_o::normalized; #define DEBUG_TYPE "normalized-file-to-atoms" namespace lld { namespace mach_o { namespace { // anonymous #define ENTRY(seg, sect, type, atomType) \ {seg, sect, type, DefinedAtom::atomType } struct MachORelocatableSectionToAtomType { StringRef segmentName; StringRef sectionName; SectionType sectionType; DefinedAtom::ContentType atomType; }; const MachORelocatableSectionToAtomType sectsToAtomType[] = { ENTRY("__TEXT", "__text", S_REGULAR, typeCode), ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), ENTRY("", "", S_CSTRING_LITERALS, typeCString), ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), ENTRY("__DATA", "__data", S_REGULAR, typeData), ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), ENTRY("__DATA", "__const", S_REGULAR, typeConstData), ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, typeInitializerPtr), ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, typeTerminatorPtr), ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, typeGOT), ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, typeGOT), ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, typeThunkTLV), ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, typeTLVInitialZeroFill), ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), ENTRY("", "", S_INTERPOSING, typeInterposingTuples), ENTRY("__LD", "__compact_unwind", S_REGULAR, typeCompactUnwindInfo), ENTRY("", "", S_REGULAR, typeUnknown) }; #undef ENTRY /// Figures out ContentType of a mach-o section. DefinedAtom::ContentType atomTypeFromSection(const Section §ion, bool &customSectionName) { // First look for match of name and type. Empty names in table are wildcards. customSectionName = false; for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; p->atomType != DefinedAtom::typeUnknown; ++p) { if (p->sectionType != section.type) continue; if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) continue; if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) continue; customSectionName = p->segmentName.empty() && p->sectionName.empty(); return p->atomType; } // Look for code denoted by section attributes if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) return DefinedAtom::typeCode; return DefinedAtom::typeUnknown; } enum AtomizeModel { atomizeAtSymbols, atomizeFixedSize, atomizePointerSize, atomizeUTF8, atomizeUTF16, atomizeCFI, atomizeCU, atomizeCFString }; /// Returns info on how to atomize a section of the specified ContentType. void sectionParseInfo(DefinedAtom::ContentType atomType, unsigned int &sizeMultiple, DefinedAtom::Scope &scope, DefinedAtom::Merge &merge, AtomizeModel &atomizeModel) { struct ParseInfo { DefinedAtom::ContentType atomType; unsigned int sizeMultiple; DefinedAtom::Scope scope; DefinedAtom::Merge merge; AtomizeModel atomizeModel; }; #define ENTRY(type, size, scope, merge, model) \ {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } static const ParseInfo parseInfo[] = { ENTRY(typeCode, 1, scopeGlobal, mergeNo, atomizeAtSymbols), ENTRY(typeData, 1, scopeGlobal, mergeNo, atomizeAtSymbols), ENTRY(typeConstData, 1, scopeGlobal, mergeNo, atomizeAtSymbols), ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, atomizeAtSymbols), ENTRY(typeConstant, 1, scopeGlobal, mergeNo, atomizeAtSymbols), ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, atomizeUTF8), ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, atomizeUTF16), ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, atomizeCFI), ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, atomizeFixedSize), ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, atomizeFixedSize), ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, atomizeFixedSize), ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, atomizeCFString), ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, atomizePointerSize), ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, atomizePointerSize), ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, atomizeCU), ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, atomizePointerSize), ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, atomizePointerSize), ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, atomizeAtSymbols) }; #undef ENTRY const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); for (int i=0; i < tableLen; ++i) { if (parseInfo[i].atomType == atomType) { sizeMultiple = parseInfo[i].sizeMultiple; scope = parseInfo[i].scope; merge = parseInfo[i].merge; atomizeModel = parseInfo[i].atomizeModel; return; } } // Unknown type is atomized by symbols. sizeMultiple = 1; scope = DefinedAtom::scopeGlobal; merge = DefinedAtom::mergeNo; atomizeModel = atomizeAtSymbols; } Atom::Scope atomScope(uint8_t scope) { switch (scope) { case N_EXT: return Atom::scopeGlobal; case N_PEXT: case N_PEXT | N_EXT: return Atom::scopeLinkageUnit; case 0: return Atom::scopeTranslationUnit; } llvm_unreachable("unknown scope value!"); } void appendSymbolsInSection(const std::vector &inSymbols, uint32_t sectionIndex, SmallVector &outSyms) { for (const Symbol &sym : inSymbols) { // Only look at definition symbols. if ((sym.type & N_TYPE) != N_SECT) continue; if (sym.sect != sectionIndex) continue; outSyms.push_back(&sym); } } void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, MachOFile &file, uint64_t symbolAddr, StringRef symbolName, uint16_t symbolDescFlags, Atom::Scope symbolScope, uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { // Mach-O symbol table does have size in it. Instead the size is the // difference between this and the next symbol. uint64_t size = nextSymbolAddr - symbolAddr; uint64_t offset = symbolAddr - section.address; bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; if (isZeroFillSection(section.type)) { file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, noDeadStrip, copyRefs, §ion); } else { DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); if (atomType == DefinedAtom::typeUnknown) { // Mach-O needs a segment and section name. Concatentate those two // with a / separator (e.g. "seg/sect") to fit into the lld model // of just a section name. std::string segSectName = section.segmentName.str() + "/" + section.sectionName.str(); file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, merge, thumb, noDeadStrip, offset, size, segSectName, true, §ion); } else { if ((atomType == lld::DefinedAtom::typeCode) && (symbolDescFlags & N_SYMBOL_RESOLVER)) { atomType = lld::DefinedAtom::typeResolver; } file.addDefinedAtom(symbolName, symbolScope, atomType, merge, offset, size, thumb, noDeadStrip, copyRefs, §ion); } } } llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, const Section §ion, const NormalizedFile &normalizedFile, MachOFile &file, bool scatterable, bool copyRefs) { // Find section's index. uint32_t sectIndex = 1; for (auto § : normalizedFile.sections) { if (§ == §ion) break; ++sectIndex; } // Find all symbols in this section. SmallVector symbols; appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); // Sort symbols. std::sort(symbols.begin(), symbols.end(), [](const Symbol *lhs, const Symbol *rhs) -> bool { if (lhs == rhs) return false; // First by address. uint64_t lhsAddr = lhs->value; uint64_t rhsAddr = rhs->value; if (lhsAddr != rhsAddr) return lhsAddr < rhsAddr; // If same address, one is an alias so sort by scope. Atom::Scope lScope = atomScope(lhs->scope); Atom::Scope rScope = atomScope(rhs->scope); if (lScope != rScope) return lScope < rScope; // If same address and scope, see if one might be better as // the alias. bool lPrivate = (lhs->name.front() == 'l'); bool rPrivate = (rhs->name.front() == 'l'); if (lPrivate != rPrivate) return lPrivate; // If same address and scope, sort by name. return lhs->name < rhs->name; }); // Debug logging of symbols. //for (const Symbol *sym : symbols) // llvm::errs() << " sym: " // << llvm::format("0x%08llx ", (uint64_t)sym->value) // << ", " << sym->name << "\n"; // If section has no symbols and no content, there are no atoms. if (symbols.empty() && section.content.empty()) return llvm::Error(); if (symbols.empty()) { // Section has no symbols, put all content in one anoymous atom. atomFromSymbol(atomType, section, file, section.address, StringRef(), 0, Atom::scopeTranslationUnit, section.address + section.content.size(), scatterable, copyRefs); } else if (symbols.front()->value != section.address) { // Section has anonymous content before first symbol. atomFromSymbol(atomType, section, file, section.address, StringRef(), 0, Atom::scopeTranslationUnit, symbols.front()->value, scatterable, copyRefs); } const Symbol *lastSym = nullptr; for (const Symbol *sym : symbols) { if (lastSym != nullptr) { // Ignore any assembler added "ltmpNNN" symbol at start of section // if there is another symbol at the start. if ((lastSym->value != sym->value) || lastSym->value != section.address || !lastSym->name.startswith("ltmp")) { atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, lastSym->desc, atomScope(lastSym->scope), sym->value, scatterable, copyRefs); } } lastSym = sym; } if (lastSym != nullptr) { atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, lastSym->desc, atomScope(lastSym->scope), section.address + section.content.size(), scatterable, copyRefs); } // If object built without .subsections_via_symbols, add reference chain. if (!scatterable) { MachODefinedAtom *prevAtom = nullptr; file.eachAtomInSection(section, [&](MachODefinedAtom *atom, uint64_t offset)->void { if (prevAtom) prevAtom->addReference(Reference::KindNamespace::all, Reference::KindArch::all, Reference::kindLayoutAfter, 0, atom, 0); prevAtom = atom; }); } return llvm::Error(); } llvm::Error processSection(DefinedAtom::ContentType atomType, const Section §ion, bool customSectionName, const NormalizedFile &normalizedFile, MachOFile &file, bool scatterable, bool copyRefs) { const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); // Get info on how to atomize section. unsigned int sizeMultiple; DefinedAtom::Scope scope; DefinedAtom::Merge merge; AtomizeModel atomizeModel; sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); // Validate section size. if ((section.content.size() % sizeMultiple) != 0) return llvm::make_error(Twine("Section ") + section.segmentName + "/" + section.sectionName + " has size (" + Twine(section.content.size()) + ") which is not a multiple of " + Twine(sizeMultiple)); if (atomizeModel == atomizeAtSymbols) { // Break section up into atoms each with a fixed size. return processSymboledSection(atomType, section, normalizedFile, file, scatterable, copyRefs); } else { unsigned int size; for (unsigned int offset = 0, e = section.content.size(); offset != e;) { switch (atomizeModel) { case atomizeFixedSize: // Break section up into atoms each with a fixed size. size = sizeMultiple; break; case atomizePointerSize: // Break section up into atoms each the size of a pointer. size = is64 ? 8 : 4; break; case atomizeUTF8: // Break section up into zero terminated c-strings. size = 0; for (unsigned int i = offset; i < e; ++i) { if (section.content[i] == 0) { size = i + 1 - offset; break; } } break; case atomizeUTF16: // Break section up into zero terminated UTF16 strings. size = 0; for (unsigned int i = offset; i < e; i += 2) { if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { size = i + 2 - offset; break; } } break; case atomizeCFI: // Break section up into dwarf unwind CFIs (FDE or CIE). size = read32(§ion.content[offset], isBig) + 4; if (offset+size > section.content.size()) { return llvm::make_error(Twine("Section ") + section.segmentName + "/" + section.sectionName + " is malformed. Size of CFI " "starting at offset (" + Twine(offset) + ") is past end of section."); } break; case atomizeCU: // Break section up into compact unwind entries. size = is64 ? 32 : 20; break; case atomizeCFString: // Break section up into NS/CFString objects. size = is64 ? 32 : 16; break; case atomizeAtSymbols: break; } if (size == 0) { return llvm::make_error(Twine("Section ") + section.segmentName + "/" + section.sectionName + " is malformed. The last atom " "is not zero terminated."); } if (customSectionName) { // Mach-O needs a segment and section name. Concatentate those two // with a / separator (e.g. "seg/sect") to fit into the lld model // of just a section name. std::string segSectName = section.segmentName.str() + "/" + section.sectionName.str(); file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, merge, false, false, offset, size, segSectName, true, §ion); } else { file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, false, false, copyRefs, §ion); } offset += size; } } return llvm::Error(); } const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, uint64_t address) { for (const Section &s : normalizedFile.sections) { uint64_t sAddr = s.address; if ((sAddr <= address) && (address < sAddr+s.content.size())) { return &s; } } return nullptr; } const MachODefinedAtom * findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, uint64_t addr, Reference::Addend *addend) { const Section *sect = nullptr; sect = findSectionCoveringAddress(normalizedFile, addr); if (!sect) return nullptr; uint32_t offsetInTarget; uint64_t offsetInSect = addr - sect->address; auto atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); *addend = offsetInTarget; return atom; } // Walks all relocations for a section in a normalized .o file and // creates corresponding lld::Reference objects. llvm::Error convertRelocs(const Section §ion, const NormalizedFile &normalizedFile, bool scatterable, MachOFile &file, ArchHandler &handler) { // Utility function for ArchHandler to find atom by its address. auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, const lld::Atom **atom, Reference::Addend *addend) -> llvm::Error { if (sectIndex > normalizedFile.sections.size()) return llvm::make_error(Twine("out of range section " "index (") + Twine(sectIndex) + ")"); const Section *sect = nullptr; if (sectIndex == 0) { sect = findSectionCoveringAddress(normalizedFile, addr); if (!sect) return llvm::make_error(Twine("address (" + Twine(addr) + ") is not in any section")); } else { sect = &normalizedFile.sections[sectIndex-1]; } uint32_t offsetInTarget; uint64_t offsetInSect = addr - sect->address; *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); *addend = offsetInTarget; return llvm::Error(); }; // Utility function for ArchHandler to find atom by its symbol index. auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) -> llvm::Error { // Find symbol from index. const Symbol *sym = nullptr; uint32_t numLocal = normalizedFile.localSymbols.size(); uint32_t numGlobal = normalizedFile.globalSymbols.size(); uint32_t numUndef = normalizedFile.undefinedSymbols.size(); if (symbolIndex < numLocal) { sym = &normalizedFile.localSymbols[symbolIndex]; } else if (symbolIndex < numLocal+numGlobal) { sym = &normalizedFile.globalSymbols[symbolIndex-numLocal]; } else if (symbolIndex < numLocal+numGlobal+numUndef) { sym = &normalizedFile.undefinedSymbols[symbolIndex-numLocal-numGlobal]; } else { return llvm::make_error(Twine("symbol index (") + Twine(symbolIndex) + ") out of range"); } // Find atom from symbol. if ((sym->type & N_TYPE) == N_SECT) { if (sym->sect > normalizedFile.sections.size()) return llvm::make_error(Twine("symbol section index (") + Twine(sym->sect) + ") out of range "); const Section &symSection = normalizedFile.sections[sym->sect-1]; uint64_t targetOffsetInSect = sym->value - symSection.address; MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, targetOffsetInSect); if (target) { *result = target; return llvm::Error(); } return llvm::make_error("no atom found for defined symbol"); } else if ((sym->type & N_TYPE) == N_UNDF) { const lld::Atom *target = file.findUndefAtom(sym->name); if (target) { *result = target; return llvm::Error(); } return llvm::make_error("no undefined atom found for sym"); } else { // Search undefs return llvm::make_error("no atom found for symbol"); } }; const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); // Use old-school iterator so that paired relocations can be grouped. for (auto it=section.relocations.begin(), e=section.relocations.end(); it != e; ++it) { const Relocation &reloc = *it; // Find atom this relocation is in. if (reloc.offset > section.content.size()) return llvm::make_error( Twine("r_address (") + Twine(reloc.offset) + ") is larger than section size (" + Twine(section.content.size()) + ")"); uint32_t offsetInAtom; MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, reloc.offset, &offsetInAtom); assert(inAtom && "r_address in range, should have found atom"); uint64_t fixupAddress = section.address + reloc.offset; const lld::Atom *target = nullptr; Reference::Addend addend = 0; Reference::KindValue kind; if (handler.isPairedReloc(reloc)) { // Handle paired relocations together. const Relocation &reloc2 = *++it; auto relocErr = handler.getPairReferenceInfo( reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, atomByAddr, atomBySymbol, &kind, &target, &addend); if (relocErr) { return handleErrors(std::move(relocErr), [&](std::unique_ptr GE) { return llvm::make_error( Twine("bad relocation (") + GE->getMessage() + ") in section " + section.segmentName + "/" + section.sectionName + " (r1_address=" + Twine::utohexstr(reloc.offset) + ", r1_type=" + Twine(reloc.type) + ", r1_extern=" + Twine(reloc.isExtern) + ", r1_length=" + Twine((int)reloc.length) + ", r1_pcrel=" + Twine(reloc.pcRel) + (!reloc.scattered ? (Twine(", r1_symbolnum=") + Twine(reloc.symbol)) : (Twine(", r1_scattered=1, r1_value=") + Twine(reloc.value))) + ")" + ", (r2_address=" + Twine::utohexstr(reloc2.offset) + ", r2_type=" + Twine(reloc2.type) + ", r2_extern=" + Twine(reloc2.isExtern) + ", r2_length=" + Twine((int)reloc2.length) + ", r2_pcrel=" + Twine(reloc2.pcRel) + (!reloc2.scattered ? (Twine(", r2_symbolnum=") + Twine(reloc2.symbol)) : (Twine(", r2_scattered=1, r2_value=") + Twine(reloc2.value))) + ")" ); }); } } else { // Use ArchHandler to convert relocation record into information // needed to instantiate an lld::Reference object. auto relocErr = handler.getReferenceInfo( reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, atomBySymbol, &kind, &target, &addend); if (relocErr) { return handleErrors(std::move(relocErr), [&](std::unique_ptr GE) { return llvm::make_error( Twine("bad relocation (") + GE->getMessage() + ") in section " + section.segmentName + "/" + section.sectionName + " (r_address=" + Twine::utohexstr(reloc.offset) + ", r_type=" + Twine(reloc.type) + ", r_extern=" + Twine(reloc.isExtern) + ", r_length=" + Twine((int)reloc.length) + ", r_pcrel=" + Twine(reloc.pcRel) + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) : (Twine(", r_scattered=1, r_value=") + Twine(reloc.value))) + ")" ); }); } } // Instantiate an lld::Reference object and add to its atom. inAtom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), kind, offsetInAtom, target, addend); } return llvm::Error(); } bool isDebugInfoSection(const Section §ion) { if ((section.attributes & S_ATTR_DEBUG) == 0) return false; return section.segmentName.equals("__DWARF"); } static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { if (is64) return read64(addr, isBig); int32_t res = read32(addr, isBig); return res; } /// --- Augmentation String Processing --- struct CIEInfo { bool _augmentationDataPresent = false; bool _mayHaveEH = false; uint32_t _offsetOfLSDA = ~0U; uint32_t _offsetOfPersonality = ~0U; uint32_t _offsetOfFDEPointerEncoding = ~0U; uint32_t _augmentationDataLength = ~0U; }; typedef llvm::DenseMap CIEInfoMap; static llvm::Error processAugmentationString(const uint8_t *augStr, CIEInfo &cieInfo, unsigned &len) { if (augStr[0] == '\0') { len = 1; return llvm::Error(); } if (augStr[0] != 'z') return llvm::make_error("expected 'z' at start of " "augmentation string"); cieInfo._augmentationDataPresent = true; uint64_t idx = 1; uint32_t offsetInAugmentationData = 0; while (augStr[idx] != '\0') { if (augStr[idx] == 'L') { cieInfo._offsetOfLSDA = offsetInAugmentationData; // This adds a single byte to the augmentation data. ++offsetInAugmentationData; ++idx; continue; } if (augStr[idx] == 'P') { cieInfo._offsetOfPersonality = offsetInAugmentationData; // This adds a single byte to the augmentation data for the encoding, // then a number of bytes for the pointer data. // FIXME: We are assuming 4 is correct here for the pointer size as we // always currently use delta32ToGOT. offsetInAugmentationData += 5; ++idx; continue; } if (augStr[idx] == 'R') { cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; // This adds a single byte to the augmentation data. ++offsetInAugmentationData; ++idx; continue; } if (augStr[idx] == 'e') { if (augStr[idx + 1] != 'h') return llvm::make_error("expected 'eh' in " "augmentation string"); cieInfo._mayHaveEH = true; idx += 2; continue; } ++idx; } cieInfo._augmentationDataLength = offsetInAugmentationData; len = idx + 1; return llvm::Error(); } static llvm::Error processCIE(const NormalizedFile &normalizedFile, MachOFile &file, mach_o::ArchHandler &handler, const Section *ehFrameSection, MachODefinedAtom *atom, uint64_t offset, CIEInfoMap &cieInfos) { const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); const uint8_t *frameData = atom->rawContent().data(); CIEInfo cieInfo; uint32_t size = read32(frameData, isBig); uint64_t cieIDField = size == 0xffffffffU ? sizeof(uint32_t) + sizeof(uint64_t) : sizeof(uint32_t); uint64_t versionField = cieIDField + sizeof(uint32_t); uint64_t augmentationStringField = versionField + sizeof(uint8_t); unsigned augmentationStringLength = 0; if (auto err = processAugmentationString(frameData + augmentationStringField, cieInfo, augmentationStringLength)) return err; if (cieInfo._offsetOfPersonality != ~0U) { // If we have augmentation data for the personality function, then we may // need to implicitly generate its relocation. // Parse the EH Data field which is pointer sized. uint64_t EHDataField = augmentationStringField + augmentationStringLength; const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); // Parse Code Align Factor which is a ULEB128. uint64_t CodeAlignField = EHDataField + EHDataFieldSize; unsigned lengthFieldSize = 0; llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); // Parse Data Align Factor which is a SLEB128. uint64_t DataAlignField = CodeAlignField + lengthFieldSize; llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); // Parse Return Address Register which is a byte. uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; // Parse the augmentation length which is a ULEB128. uint64_t AugmentationLengthField = ReturnAddressField + 1; uint64_t AugmentationLength = llvm::decodeULEB128(frameData + AugmentationLengthField, &lengthFieldSize); if (AugmentationLength != cieInfo._augmentationDataLength) return llvm::make_error("CIE augmentation data length " "mismatch"); // Get the start address of the augmentation data. uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; // Parse the personality function from the augmentation data. uint64_t PersonalityField = AugmentationDataField + cieInfo._offsetOfPersonality; // Parse the personality encoding. // FIXME: Verify that this is a 32-bit pcrel offset. uint64_t PersonalityFunctionField = PersonalityField + 1; if (atom->begin() != atom->end()) { // If we have an explicit relocation, then make sure it matches this // offset as this is where we'd expect it to be applied to. DefinedAtom::reference_iterator CurrentRef = atom->begin(); if (CurrentRef->offsetInAtom() != PersonalityFunctionField) return llvm::make_error("CIE personality reloc at " "wrong offset"); if (++CurrentRef != atom->end()) return llvm::make_error("CIE contains too many relocs"); } else { // Implicitly generate the personality function reloc. It's assumed to // be a delta32 offset to a GOT entry. // FIXME: Parse the encoding and check this. int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); uint64_t funcAddress = ehFrameSection->address + offset + PersonalityFunctionField; funcAddress += funcDelta; const MachODefinedAtom *func = nullptr; Reference::Addend addend; func = findAtomCoveringAddress(normalizedFile, file, funcAddress, &addend); atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), handler.unwindRefToPersonalityFunctionKind(), PersonalityFunctionField, func, addend); } } else if (atom->begin() != atom->end()) { // Otherwise, we expect there to be no relocations in this atom as the only // relocation would have been to the personality function. return llvm::make_error("unexpected relocation in CIE"); } cieInfos[atom] = std::move(cieInfo); return llvm::Error(); } static llvm::Error processFDE(const NormalizedFile &normalizedFile, MachOFile &file, mach_o::ArchHandler &handler, const Section *ehFrameSection, MachODefinedAtom *atom, uint64_t offset, const CIEInfoMap &cieInfos) { const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); // Compiler wasn't lazy and actually told us what it meant. // Unfortunately, the compiler may not have generated references for all of // [cie, func, lsda] and so we still need to parse the FDE and add references // for any the compiler didn't generate. if (atom->begin() != atom->end()) atom->sortReferences(); DefinedAtom::reference_iterator CurrentRef = atom->begin(); // This helper returns the reference (if one exists) at the offset we are // currently processing. It automatically increments the ref iterator if we // do return a ref, and throws an error if we pass over a ref without // comsuming it. auto currentRefGetter = [&CurrentRef, &atom](uint64_t Offset)->const Reference* { // If there are no more refs found, then we are done. if (CurrentRef == atom->end()) return nullptr; const Reference *Ref = *CurrentRef; // If we haven't reached the offset for this reference, then return that // we don't yet have a reference to process. if (Offset < Ref->offsetInAtom()) return nullptr; // If the offset is equal, then we want to process this ref. if (Offset == Ref->offsetInAtom()) { ++CurrentRef; return Ref; } // The current ref is at an offset which is earlier than the current // offset, then we failed to consume it when we should have. In this case // throw an error. llvm::report_fatal_error("Skipped reference when processing FDE"); }; // Helper to either get the reference at this current location, and verify // that it is of the expected type, or add a reference of that type. // Returns the reference target. auto verifyOrAddReference = [&](uint64_t targetAddress, Reference::KindValue refKind, uint64_t refAddress, bool allowsAddend)->const Atom* { if (auto *ref = currentRefGetter(refAddress)) { // The compiler already emitted a relocation for the CIE ref. This should // have been converted to the correct type of reference in // get[Pair]ReferenceInfo(). assert(ref->kindValue() == refKind && "Incorrect EHFrame reference kind"); return ref->target(); } Reference::Addend addend; auto *target = findAtomCoveringAddress(normalizedFile, file, targetAddress, &addend); atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), refKind, refAddress, target, addend); if (!allowsAddend) assert(!addend && "EHFrame reference cannot have addend"); return target; }; const uint8_t *startFrameData = atom->rawContent().data(); const uint8_t *frameData = startFrameData; uint32_t size = read32(frameData, isBig); uint64_t cieFieldInFDE = size == 0xffffffffU ? sizeof(uint32_t) + sizeof(uint64_t) : sizeof(uint32_t); // Linker needs to fixup a reference from the FDE to its parent CIE (a // 32-bit byte offset backwards in the __eh_frame section). uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; cieAddress -= cieDelta; auto *cieRefTarget = verifyOrAddReference(cieAddress, handler.unwindRefToCIEKind(), cieFieldInFDE, false); const MachODefinedAtom *cie = dyn_cast(cieRefTarget); assert(cie && cie->contentType() == DefinedAtom::typeCFI && "FDE's CIE field does not point at the start of a CIE."); const CIEInfo &cieInfo = cieInfos.find(cie)->second; // Linker needs to fixup reference from the FDE to the function it's // describing. FIXME: there are actually different ways to do this, and the // particular method used is specified in the CIE's augmentation fields // (hopefully) uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); int64_t functionFromFDE = readSPtr(is64, isBig, frameData + rangeFieldInFDE); uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; rangeStart += functionFromFDE; verifyOrAddReference(rangeStart, handler.unwindRefToFunctionKind(), rangeFieldInFDE, true); // Handle the augmentation data if there is any. if (cieInfo._augmentationDataPresent) { // First process the augmentation data length field. uint64_t augmentationDataLengthFieldInFDE = rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); unsigned lengthFieldSize = 0; uint64_t augmentationDataLength = llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, &lengthFieldSize); if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { // Look at the augmentation data field. uint64_t augmentationDataFieldInFDE = augmentationDataLengthFieldInFDE + lengthFieldSize; int64_t lsdaFromFDE = readSPtr(is64, isBig, frameData + augmentationDataFieldInFDE); uint64_t lsdaStart = ehFrameSection->address + offset + augmentationDataFieldInFDE + lsdaFromFDE; verifyOrAddReference(lsdaStart, handler.unwindRefToFunctionKind(), augmentationDataFieldInFDE, true); } } return llvm::Error(); } llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, MachOFile &file, mach_o::ArchHandler &handler) { const Section *ehFrameSection = nullptr; for (auto §ion : normalizedFile.sections) if (section.segmentName == "__TEXT" && section.sectionName == "__eh_frame") { ehFrameSection = §ion; break; } // No __eh_frame so nothing to do. if (!ehFrameSection) return llvm::Error(); llvm::Error ehFrameErr; CIEInfoMap cieInfos; file.eachAtomInSection(*ehFrameSection, [&](MachODefinedAtom *atom, uint64_t offset) -> void { assert(atom->contentType() == DefinedAtom::typeCFI); // Bail out if we've encountered an error. if (ehFrameErr) return; const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); if (ArchHandler::isDwarfCIE(isBig, atom)) ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, atom, offset, cieInfos); else ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, atom, offset, cieInfos); }); return ehFrameErr; } llvm::Error parseObjCImageInfo(const Section §, const NormalizedFile &normalizedFile, MachOFile &file) { // struct objc_image_info { // uint32_t version; // initially 0 // uint32_t flags; // }; ArrayRef content = sect.content; if (content.size() != 8) return llvm::make_error(sect.segmentName + "/" + sect.sectionName + " in file " + file.path() + " should be 8 bytes in size"); const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); uint32_t version = read32(content.data(), isBig); if (version) return llvm::make_error(sect.segmentName + "/" + sect.sectionName + " in file " + file.path() + " should have version=0"); uint32_t flags = read32(content.data() + 4, isBig); if (flags & (MachOLinkingContext::objc_supports_gc | MachOLinkingContext::objc_gc_only)) return llvm::make_error(sect.segmentName + "/" + sect.sectionName + " in file " + file.path() + " uses GC. This is not supported"); if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); else file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); file.setSwiftVersion((flags >> 8) & 0xFF); return llvm::Error(); } /// Converts normalized mach-o file into an lld::File and lld::Atoms. llvm::Expected> objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, bool copyRefs) { std::unique_ptr file(new MachOFile(path)); if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) return std::move(ec); return std::unique_ptr(std::move(file)); } llvm::Expected> dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, bool copyRefs) { // Instantiate SharedLibraryFile object. std::unique_ptr file(new MachODylibFile(path)); if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) return std::move(ec); return std::unique_ptr(std::move(file)); } } // anonymous namespace namespace normalized { static bool isObjCImageInfo(const Section §) { return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); } llvm::Error normalizedObjectToAtoms(MachOFile *file, const NormalizedFile &normalizedFile, bool copyRefs) { DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " << file->path() << "\n"); bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); // Create atoms from each section. for (auto § : normalizedFile.sections) { DEBUG(llvm::dbgs() << "Creating atoms: "; sect.dump()); if (isDebugInfoSection(sect)) continue; // If the file contains an objc_image_info struct, then we should parse the // ObjC flags and Swift version. if (isObjCImageInfo(sect)) { if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) return ec; // We then skip adding atoms for this section as we use the ObjCPass to // re-emit this data after it has been aggregated for all files. continue; } bool customSectionName; DefinedAtom::ContentType atomType = atomTypeFromSection(sect, customSectionName); if (auto ec = processSection(atomType, sect, customSectionName, normalizedFile, *file, scatterable, copyRefs)) return ec; } // Create atoms from undefined symbols. for (auto &sym : normalizedFile.undefinedSymbols) { // Undefinded symbols with n_value != 0 are actually tentative definitions. if (sym.value == Hex64(0)) { file->addUndefinedAtom(sym.name, copyRefs); } else { file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, DefinedAtom::Alignment(1 << (sym.desc >> 8)), copyRefs); } } // Convert mach-o relocations to References std::unique_ptr handler = ArchHandler::create(normalizedFile.arch); for (auto § : normalizedFile.sections) { if (isDebugInfoSection(sect)) continue; if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, *file, *handler)) return ec; } // Add additional arch-specific References file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { handler->addAdditionalReferences(*atom); }); // Each __eh_frame section needs references to both __text (the function we're // providing unwind info for) and itself (FDE -> CIE). These aren't // represented in the relocations on some architectures, so we have to add // them back in manually there. if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) return ec; // Process mach-o data-in-code regions array. That information is encoded in // atoms as References at each transition point. unsigned nextIndex = 0; for (const DataInCode &entry : normalizedFile.dataInCode) { ++nextIndex; const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); if (!s) { return llvm::make_error(Twine("LC_DATA_IN_CODE address (" + Twine(entry.offset) + ") is not in any section")); } uint64_t offsetInSect = entry.offset - s->address; uint32_t offsetInAtom; MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, &offsetInAtom); if (offsetInAtom + entry.length > atom->size()) { return llvm::make_error(Twine("LC_DATA_IN_CODE entry " "(offset=" + Twine(entry.offset) + ", length=" + Twine(entry.length) + ") crosses atom boundary.")); } // Add reference that marks start of data-in-code. atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), handler->dataInCodeTransitionStart(*atom), offsetInAtom, atom, entry.kind); // Peek at next entry, if it starts where this one ends, skip ending ref. if (nextIndex < normalizedFile.dataInCode.size()) { const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; if (nextEntry.offset == (entry.offset + entry.length)) continue; } // If data goes to end of function, skip ending ref. if ((offsetInAtom + entry.length) == atom->size()) continue; // Add reference that marks end of data-in-code. atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), handler->dataInCodeTransitionEnd(*atom), offsetInAtom+entry.length, atom, 0); } // Cache some attributes on the file for use later. file->setFlags(normalizedFile.flags); file->setArch(normalizedFile.arch); file->setOS(normalizedFile.os); file->setMinVersion(normalizedFile.minOSverson); file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); // Sort references in each atom to their canonical order. for (const DefinedAtom* defAtom : file->defined()) { reinterpret_cast(defAtom)->sortReferences(); } return llvm::Error(); } llvm::Error normalizedDylibToAtoms(MachODylibFile *file, const NormalizedFile &normalizedFile, bool copyRefs) { file->setInstallName(normalizedFile.installName); file->setCompatVersion(normalizedFile.compatVersion); file->setCurrentVersion(normalizedFile.currentVersion); // Tell MachODylibFile object about all symbols it exports. if (!normalizedFile.exportInfo.empty()) { // If exports trie exists, use it instead of traditional symbol table. for (const Export &exp : normalizedFile.exportInfo) { bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); // StringRefs from export iterator are ephemeral, so force copy. file->addExportedSymbol(exp.name, weakDef, true); } } else { for (auto &sym : normalizedFile.globalSymbols) { assert((sym.scope & N_EXT) && "only expect external symbols here"); bool weakDef = (sym.desc & N_WEAK_DEF); file->addExportedSymbol(sym.name, weakDef, copyRefs); } } // Tell MachODylibFile object about all dylibs it re-exports. for (const DependentDylib &dep : normalizedFile.dependentDylibs) { if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) file->addReExportedDylib(dep.path); } return llvm::Error(); } void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, StringRef &segmentName, StringRef §ionName, SectionType §ionType, SectionAttr §ionAttrs, bool &relocsToDefinedCanBeImplicit) { for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; p->atomType != DefinedAtom::typeUnknown; ++p) { if (p->atomType != atomType) continue; // Wild carded entries are ignored for reverse lookups. if (p->segmentName.empty() || p->sectionName.empty()) continue; segmentName = p->segmentName; sectionName = p->sectionName; sectionType = p->sectionType; sectionAttrs = 0; relocsToDefinedCanBeImplicit = false; if (atomType == DefinedAtom::typeCode) sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; if (atomType == DefinedAtom::typeCFI) relocsToDefinedCanBeImplicit = true; return; } llvm_unreachable("content type not yet supported"); } llvm::Expected> normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, bool copyRefs) { switch (normalizedFile.fileType) { case MH_DYLIB: case MH_DYLIB_STUB: return dylibToAtoms(normalizedFile, path, copyRefs); case MH_OBJECT: return objectToAtoms(normalizedFile, path, copyRefs); default: llvm_unreachable("unhandled MachO file type!"); } } #ifndef NDEBUG void Section::dump(llvm::raw_ostream &OS) const { OS << "Section (\"" << segmentName << ", " << sectionName << "\""; OS << ", addr: " << llvm::format_hex(address, 16, true); OS << ", size: " << llvm::format_hex(content.size(), 8, true) << ")\n"; } #endif } // namespace normalized } // namespace mach_o } // namespace lld