1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \file Converts from in-memory normalized mach-o to in-memory Atoms.
23 #include "MachONormalizedFile.h"
24 #include "ArchHandler.h"
27 #include "MachONormalizedFileBinaryUtils.h"
28 #include "lld/Core/Error.h"
29 #include "lld/Core/LLVM.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/Format.h"
32 #include "llvm/Support/MachO.h"
33 #include "llvm/Support/LEB128.h"
34 #include "llvm/Support/raw_ostream.h"
36 using namespace llvm::MachO;
37 using namespace lld::mach_o::normalized;
39 #define DEBUG_TYPE "normalized-file-to-atoms"
45 namespace { // anonymous
48 #define ENTRY(seg, sect, type, atomType) \
49 {seg, sect, type, DefinedAtom::atomType }
51 struct MachORelocatableSectionToAtomType {
52 StringRef segmentName;
53 StringRef sectionName;
54 SectionType sectionType;
55 DefinedAtom::ContentType atomType;
58 const MachORelocatableSectionToAtomType sectsToAtomType[] = {
59 ENTRY("__TEXT", "__text", S_REGULAR, typeCode),
60 ENTRY("__TEXT", "__text", S_REGULAR, typeResolver),
61 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString),
62 ENTRY("", "", S_CSTRING_LITERALS, typeCString),
63 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String),
64 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant),
65 ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant),
66 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI),
67 ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI),
68 ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4),
69 ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8),
70 ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16),
71 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA),
72 ENTRY("__DATA", "__data", S_REGULAR, typeData),
73 ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData),
74 ENTRY("__DATA", "__const", S_REGULAR, typeConstData),
75 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString),
76 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS,
78 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS,
80 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS,
82 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill),
83 ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS,
85 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples),
86 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES,
88 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
89 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL,
90 typeTLVInitialZeroFill),
91 ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo),
92 ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList),
93 ENTRY("", "", S_INTERPOSING, typeInterposingTuples),
94 ENTRY("__LD", "__compact_unwind", S_REGULAR,
95 typeCompactUnwindInfo),
96 ENTRY("", "", S_REGULAR, typeUnknown)
101 /// Figures out ContentType of a mach-o section.
102 DefinedAtom::ContentType atomTypeFromSection(const Section §ion,
103 bool &customSectionName) {
104 // First look for match of name and type. Empty names in table are wildcards.
105 customSectionName = false;
106 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
107 p->atomType != DefinedAtom::typeUnknown; ++p) {
108 if (p->sectionType != section.type)
110 if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
112 if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
114 customSectionName = p->segmentName.empty() && p->sectionName.empty();
117 // Look for code denoted by section attributes
118 if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
119 return DefinedAtom::typeCode;
121 return DefinedAtom::typeUnknown;
135 /// Returns info on how to atomize a section of the specified ContentType.
136 void sectionParseInfo(DefinedAtom::ContentType atomType,
137 unsigned int &sizeMultiple,
138 DefinedAtom::Scope &scope,
139 DefinedAtom::Merge &merge,
140 AtomizeModel &atomizeModel) {
142 DefinedAtom::ContentType atomType;
143 unsigned int sizeMultiple;
144 DefinedAtom::Scope scope;
145 DefinedAtom::Merge merge;
146 AtomizeModel atomizeModel;
149 #define ENTRY(type, size, scope, merge, model) \
150 {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
152 static const ParseInfo parseInfo[] = {
153 ENTRY(typeCode, 1, scopeGlobal, mergeNo,
155 ENTRY(typeData, 1, scopeGlobal, mergeNo,
157 ENTRY(typeConstData, 1, scopeGlobal, mergeNo,
159 ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo,
161 ENTRY(typeConstant, 1, scopeGlobal, mergeNo,
163 ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent,
165 ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent,
167 ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo,
169 ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent,
171 ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent,
173 ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent,
175 ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent,
177 ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo,
179 ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo,
181 ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
183 ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent,
185 ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
187 ENTRY(typeUnknown, 1, scopeGlobal, mergeNo,
191 const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
192 for (int i=0; i < tableLen; ++i) {
193 if (parseInfo[i].atomType == atomType) {
194 sizeMultiple = parseInfo[i].sizeMultiple;
195 scope = parseInfo[i].scope;
196 merge = parseInfo[i].merge;
197 atomizeModel = parseInfo[i].atomizeModel;
202 // Unknown type is atomized by symbols.
204 scope = DefinedAtom::scopeGlobal;
205 merge = DefinedAtom::mergeNo;
206 atomizeModel = atomizeAtSymbols;
210 Atom::Scope atomScope(uint8_t scope) {
213 return Atom::scopeGlobal;
216 return Atom::scopeLinkageUnit;
218 return Atom::scopeTranslationUnit;
220 llvm_unreachable("unknown scope value!");
223 void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
224 uint32_t sectionIndex,
225 SmallVector<const Symbol *, 64> &outSyms) {
226 for (const Symbol &sym : inSymbols) {
227 // Only look at definition symbols.
228 if ((sym.type & N_TYPE) != N_SECT)
230 if (sym.sect != sectionIndex)
232 outSyms.push_back(&sym);
236 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion,
237 MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
238 uint16_t symbolDescFlags, Atom::Scope symbolScope,
239 uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
240 // Mach-O symbol table does have size in it. Instead the size is the
241 // difference between this and the next symbol.
242 uint64_t size = nextSymbolAddr - symbolAddr;
243 uint64_t offset = symbolAddr - section.address;
244 bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
245 if (isZeroFillSection(section.type)) {
246 file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
247 noDeadStrip, copyRefs, §ion);
249 DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
250 ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
251 bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
252 if (atomType == DefinedAtom::typeUnknown) {
253 // Mach-O needs a segment and section name. Concatentate those two
254 // with a / separator (e.g. "seg/sect") to fit into the lld model
255 // of just a section name.
256 std::string segSectName = section.segmentName.str()
257 + "/" + section.sectionName.str();
258 file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
259 merge, thumb, noDeadStrip, offset,
260 size, segSectName, true, §ion);
262 if ((atomType == lld::DefinedAtom::typeCode) &&
263 (symbolDescFlags & N_SYMBOL_RESOLVER)) {
264 atomType = lld::DefinedAtom::typeResolver;
266 file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
267 offset, size, thumb, noDeadStrip, copyRefs, §ion);
272 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
273 const Section §ion,
274 const NormalizedFile &normalizedFile,
275 MachOFile &file, bool scatterable,
277 // Find section's index.
278 uint32_t sectIndex = 1;
279 for (auto § : normalizedFile.sections) {
280 if (§ == §ion)
285 // Find all symbols in this section.
286 SmallVector<const Symbol *, 64> symbols;
287 appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
288 appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols);
291 std::sort(symbols.begin(), symbols.end(),
292 [](const Symbol *lhs, const Symbol *rhs) -> bool {
296 uint64_t lhsAddr = lhs->value;
297 uint64_t rhsAddr = rhs->value;
298 if (lhsAddr != rhsAddr)
299 return lhsAddr < rhsAddr;
300 // If same address, one is an alias so sort by scope.
301 Atom::Scope lScope = atomScope(lhs->scope);
302 Atom::Scope rScope = atomScope(rhs->scope);
303 if (lScope != rScope)
304 return lScope < rScope;
305 // If same address and scope, see if one might be better as
307 bool lPrivate = (lhs->name.front() == 'l');
308 bool rPrivate = (rhs->name.front() == 'l');
309 if (lPrivate != rPrivate)
311 // If same address and scope, sort by name.
312 return lhs->name < rhs->name;
315 // Debug logging of symbols.
316 //for (const Symbol *sym : symbols)
317 // llvm::errs() << " sym: "
318 // << llvm::format("0x%08llx ", (uint64_t)sym->value)
319 // << ", " << sym->name << "\n";
321 // If section has no symbols and no content, there are no atoms.
322 if (symbols.empty() && section.content.empty())
323 return llvm::Error();
325 if (symbols.empty()) {
326 // Section has no symbols, put all content in one anoymous atom.
327 atomFromSymbol(atomType, section, file, section.address, StringRef(),
328 0, Atom::scopeTranslationUnit,
329 section.address + section.content.size(),
330 scatterable, copyRefs);
332 else if (symbols.front()->value != section.address) {
333 // Section has anonymous content before first symbol.
334 atomFromSymbol(atomType, section, file, section.address, StringRef(),
335 0, Atom::scopeTranslationUnit, symbols.front()->value,
336 scatterable, copyRefs);
339 const Symbol *lastSym = nullptr;
340 for (const Symbol *sym : symbols) {
341 if (lastSym != nullptr) {
342 // Ignore any assembler added "ltmpNNN" symbol at start of section
343 // if there is another symbol at the start.
344 if ((lastSym->value != sym->value)
345 || lastSym->value != section.address
346 || !lastSym->name.startswith("ltmp")) {
347 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
348 lastSym->desc, atomScope(lastSym->scope), sym->value,
349 scatterable, copyRefs);
354 if (lastSym != nullptr) {
355 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
356 lastSym->desc, atomScope(lastSym->scope),
357 section.address + section.content.size(),
358 scatterable, copyRefs);
361 // If object built without .subsections_via_symbols, add reference chain.
363 MachODefinedAtom *prevAtom = nullptr;
364 file.eachAtomInSection(section,
365 [&](MachODefinedAtom *atom, uint64_t offset)->void {
367 prevAtom->addReference(Reference::KindNamespace::all,
368 Reference::KindArch::all,
369 Reference::kindLayoutAfter, 0, atom, 0);
374 return llvm::Error();
377 llvm::Error processSection(DefinedAtom::ContentType atomType,
378 const Section §ion,
379 bool customSectionName,
380 const NormalizedFile &normalizedFile,
381 MachOFile &file, bool scatterable,
383 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
384 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
386 // Get info on how to atomize section.
387 unsigned int sizeMultiple;
388 DefinedAtom::Scope scope;
389 DefinedAtom::Merge merge;
390 AtomizeModel atomizeModel;
391 sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
393 // Validate section size.
394 if ((section.content.size() % sizeMultiple) != 0)
395 return llvm::make_error<GenericError>(Twine("Section ")
396 + section.segmentName
397 + "/" + section.sectionName
399 + Twine(section.content.size())
400 + ") which is not a multiple of "
401 + Twine(sizeMultiple));
403 if (atomizeModel == atomizeAtSymbols) {
404 // Break section up into atoms each with a fixed size.
405 return processSymboledSection(atomType, section, normalizedFile, file,
406 scatterable, copyRefs);
409 for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
410 switch (atomizeModel) {
411 case atomizeFixedSize:
412 // Break section up into atoms each with a fixed size.
415 case atomizePointerSize:
416 // Break section up into atoms each the size of a pointer.
420 // Break section up into zero terminated c-strings.
422 for (unsigned int i = offset; i < e; ++i) {
423 if (section.content[i] == 0) {
424 size = i + 1 - offset;
430 // Break section up into zero terminated UTF16 strings.
432 for (unsigned int i = offset; i < e; i += 2) {
433 if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
434 size = i + 2 - offset;
440 // Break section up into dwarf unwind CFIs (FDE or CIE).
441 size = read32(§ion.content[offset], isBig) + 4;
442 if (offset+size > section.content.size()) {
443 return llvm::make_error<GenericError>(Twine("Section ")
444 + section.segmentName
445 + "/" + section.sectionName
446 + " is malformed. Size of CFI "
447 "starting at offset ("
449 + ") is past end of section.");
453 // Break section up into compact unwind entries.
454 size = is64 ? 32 : 20;
456 case atomizeCFString:
457 // Break section up into NS/CFString objects.
458 size = is64 ? 32 : 16;
460 case atomizeAtSymbols:
464 return llvm::make_error<GenericError>(Twine("Section ")
465 + section.segmentName
466 + "/" + section.sectionName
467 + " is malformed. The last atom "
468 "is not zero terminated.");
470 if (customSectionName) {
471 // Mach-O needs a segment and section name. Concatentate those two
472 // with a / separator (e.g. "seg/sect") to fit into the lld model
473 // of just a section name.
474 std::string segSectName = section.segmentName.str()
475 + "/" + section.sectionName.str();
476 file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
477 merge, false, false, offset,
478 size, segSectName, true, §ion);
480 file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
481 false, false, copyRefs, §ion);
486 return llvm::Error();
489 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
491 for (const Section &s : normalizedFile.sections) {
492 uint64_t sAddr = s.address;
493 if ((sAddr <= address) && (address < sAddr+s.content.size())) {
500 const MachODefinedAtom *
501 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
502 uint64_t addr, Reference::Addend *addend) {
503 const Section *sect = nullptr;
504 sect = findSectionCoveringAddress(normalizedFile, addr);
508 uint32_t offsetInTarget;
509 uint64_t offsetInSect = addr - sect->address;
511 file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
512 *addend = offsetInTarget;
516 // Walks all relocations for a section in a normalized .o file and
517 // creates corresponding lld::Reference objects.
518 llvm::Error convertRelocs(const Section §ion,
519 const NormalizedFile &normalizedFile,
522 ArchHandler &handler) {
523 // Utility function for ArchHandler to find atom by its address.
524 auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
525 const lld::Atom **atom, Reference::Addend *addend)
527 if (sectIndex > normalizedFile.sections.size())
528 return llvm::make_error<GenericError>(Twine("out of range section "
529 "index (") + Twine(sectIndex) + ")");
530 const Section *sect = nullptr;
531 if (sectIndex == 0) {
532 sect = findSectionCoveringAddress(normalizedFile, addr);
534 return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
535 + ") is not in any section"));
537 sect = &normalizedFile.sections[sectIndex-1];
539 uint32_t offsetInTarget;
540 uint64_t offsetInSect = addr - sect->address;
541 *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
542 *addend = offsetInTarget;
543 return llvm::Error();
546 // Utility function for ArchHandler to find atom by its symbol index.
547 auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
549 // Find symbol from index.
550 const Symbol *sym = nullptr;
551 uint32_t numLocal = normalizedFile.localSymbols.size();
552 uint32_t numGlobal = normalizedFile.globalSymbols.size();
553 uint32_t numUndef = normalizedFile.undefinedSymbols.size();
554 if (symbolIndex < numLocal) {
555 sym = &normalizedFile.localSymbols[symbolIndex];
556 } else if (symbolIndex < numLocal+numGlobal) {
557 sym = &normalizedFile.globalSymbols[symbolIndex-numLocal];
558 } else if (symbolIndex < numLocal+numGlobal+numUndef) {
559 sym = &normalizedFile.undefinedSymbols[symbolIndex-numLocal-numGlobal];
561 return llvm::make_error<GenericError>(Twine("symbol index (")
562 + Twine(symbolIndex) + ") out of range");
564 // Find atom from symbol.
565 if ((sym->type & N_TYPE) == N_SECT) {
566 if (sym->sect > normalizedFile.sections.size())
567 return llvm::make_error<GenericError>(Twine("symbol section index (")
568 + Twine(sym->sect) + ") out of range ");
569 const Section &symSection = normalizedFile.sections[sym->sect-1];
570 uint64_t targetOffsetInSect = sym->value - symSection.address;
571 MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
575 return llvm::Error();
577 return llvm::make_error<GenericError>("no atom found for defined symbol");
578 } else if ((sym->type & N_TYPE) == N_UNDF) {
579 const lld::Atom *target = file.findUndefAtom(sym->name);
582 return llvm::Error();
584 return llvm::make_error<GenericError>("no undefined atom found for sym");
587 return llvm::make_error<GenericError>("no atom found for symbol");
591 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
592 // Use old-school iterator so that paired relocations can be grouped.
593 for (auto it=section.relocations.begin(), e=section.relocations.end();
595 const Relocation &reloc = *it;
596 // Find atom this relocation is in.
597 if (reloc.offset > section.content.size())
598 return llvm::make_error<GenericError>(
599 Twine("r_address (") + Twine(reloc.offset)
600 + ") is larger than section size ("
601 + Twine(section.content.size()) + ")");
602 uint32_t offsetInAtom;
603 MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
606 assert(inAtom && "r_address in range, should have found atom");
607 uint64_t fixupAddress = section.address + reloc.offset;
609 const lld::Atom *target = nullptr;
610 Reference::Addend addend = 0;
611 Reference::KindValue kind;
612 if (handler.isPairedReloc(reloc)) {
613 // Handle paired relocations together.
614 const Relocation &reloc2 = *++it;
615 auto relocErr = handler.getPairReferenceInfo(
616 reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
617 atomByAddr, atomBySymbol, &kind, &target, &addend);
619 return handleErrors(std::move(relocErr),
620 [&](std::unique_ptr<GenericError> GE) {
621 return llvm::make_error<GenericError>(
622 Twine("bad relocation (") + GE->getMessage()
624 + section.segmentName + "/" + section.sectionName
625 + " (r1_address=" + Twine::utohexstr(reloc.offset)
626 + ", r1_type=" + Twine(reloc.type)
627 + ", r1_extern=" + Twine(reloc.isExtern)
628 + ", r1_length=" + Twine((int)reloc.length)
629 + ", r1_pcrel=" + Twine(reloc.pcRel)
630 + (!reloc.scattered ? (Twine(", r1_symbolnum=")
631 + Twine(reloc.symbol))
632 : (Twine(", r1_scattered=1, r1_value=")
633 + Twine(reloc.value)))
635 + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
636 + ", r2_type=" + Twine(reloc2.type)
637 + ", r2_extern=" + Twine(reloc2.isExtern)
638 + ", r2_length=" + Twine((int)reloc2.length)
639 + ", r2_pcrel=" + Twine(reloc2.pcRel)
640 + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
641 + Twine(reloc2.symbol))
642 : (Twine(", r2_scattered=1, r2_value=")
643 + Twine(reloc2.value)))
649 // Use ArchHandler to convert relocation record into information
650 // needed to instantiate an lld::Reference object.
651 auto relocErr = handler.getReferenceInfo(
652 reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
653 atomBySymbol, &kind, &target, &addend);
655 return handleErrors(std::move(relocErr),
656 [&](std::unique_ptr<GenericError> GE) {
657 return llvm::make_error<GenericError>(
658 Twine("bad relocation (") + GE->getMessage()
660 + section.segmentName + "/" + section.sectionName
661 + " (r_address=" + Twine::utohexstr(reloc.offset)
662 + ", r_type=" + Twine(reloc.type)
663 + ", r_extern=" + Twine(reloc.isExtern)
664 + ", r_length=" + Twine((int)reloc.length)
665 + ", r_pcrel=" + Twine(reloc.pcRel)
666 + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
667 : (Twine(", r_scattered=1, r_value=")
668 + Twine(reloc.value)))
673 // Instantiate an lld::Reference object and add to its atom.
674 inAtom->addReference(Reference::KindNamespace::mach_o,
676 kind, offsetInAtom, target, addend);
679 return llvm::Error();
682 bool isDebugInfoSection(const Section §ion) {
683 if ((section.attributes & S_ATTR_DEBUG) == 0)
685 return section.segmentName.equals("__DWARF");
688 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
690 return read64(addr, isBig);
692 int32_t res = read32(addr, isBig);
696 /// --- Augmentation String Processing ---
699 bool _augmentationDataPresent = false;
700 bool _mayHaveEH = false;
701 uint32_t _offsetOfLSDA = ~0U;
702 uint32_t _offsetOfPersonality = ~0U;
703 uint32_t _offsetOfFDEPointerEncoding = ~0U;
704 uint32_t _augmentationDataLength = ~0U;
707 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
709 static llvm::Error processAugmentationString(const uint8_t *augStr,
713 if (augStr[0] == '\0') {
715 return llvm::Error();
718 if (augStr[0] != 'z')
719 return llvm::make_error<GenericError>("expected 'z' at start of "
720 "augmentation string");
722 cieInfo._augmentationDataPresent = true;
725 uint32_t offsetInAugmentationData = 0;
726 while (augStr[idx] != '\0') {
727 if (augStr[idx] == 'L') {
728 cieInfo._offsetOfLSDA = offsetInAugmentationData;
729 // This adds a single byte to the augmentation data.
730 ++offsetInAugmentationData;
734 if (augStr[idx] == 'P') {
735 cieInfo._offsetOfPersonality = offsetInAugmentationData;
736 // This adds a single byte to the augmentation data for the encoding,
737 // then a number of bytes for the pointer data.
738 // FIXME: We are assuming 4 is correct here for the pointer size as we
739 // always currently use delta32ToGOT.
740 offsetInAugmentationData += 5;
744 if (augStr[idx] == 'R') {
745 cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
746 // This adds a single byte to the augmentation data.
747 ++offsetInAugmentationData;
751 if (augStr[idx] == 'e') {
752 if (augStr[idx + 1] != 'h')
753 return llvm::make_error<GenericError>("expected 'eh' in "
754 "augmentation string");
755 cieInfo._mayHaveEH = true;
762 cieInfo._augmentationDataLength = offsetInAugmentationData;
765 return llvm::Error();
768 static llvm::Error processCIE(const NormalizedFile &normalizedFile,
770 mach_o::ArchHandler &handler,
771 const Section *ehFrameSection,
772 MachODefinedAtom *atom,
774 CIEInfoMap &cieInfos) {
775 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
776 const uint8_t *frameData = atom->rawContent().data();
780 uint32_t size = read32(frameData, isBig);
781 uint64_t cieIDField = size == 0xffffffffU
782 ? sizeof(uint32_t) + sizeof(uint64_t)
784 uint64_t versionField = cieIDField + sizeof(uint32_t);
785 uint64_t augmentationStringField = versionField + sizeof(uint8_t);
787 unsigned augmentationStringLength = 0;
788 if (auto err = processAugmentationString(frameData + augmentationStringField,
789 cieInfo, augmentationStringLength))
792 if (cieInfo._offsetOfPersonality != ~0U) {
793 // If we have augmentation data for the personality function, then we may
794 // need to implicitly generate its relocation.
796 // Parse the EH Data field which is pointer sized.
797 uint64_t EHDataField = augmentationStringField + augmentationStringLength;
798 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
799 unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
801 // Parse Code Align Factor which is a ULEB128.
802 uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
803 unsigned lengthFieldSize = 0;
804 llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
806 // Parse Data Align Factor which is a SLEB128.
807 uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
808 llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
810 // Parse Return Address Register which is a byte.
811 uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
813 // Parse the augmentation length which is a ULEB128.
814 uint64_t AugmentationLengthField = ReturnAddressField + 1;
815 uint64_t AugmentationLength =
816 llvm::decodeULEB128(frameData + AugmentationLengthField,
819 if (AugmentationLength != cieInfo._augmentationDataLength)
820 return llvm::make_error<GenericError>("CIE augmentation data length "
823 // Get the start address of the augmentation data.
824 uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
826 // Parse the personality function from the augmentation data.
827 uint64_t PersonalityField =
828 AugmentationDataField + cieInfo._offsetOfPersonality;
830 // Parse the personality encoding.
831 // FIXME: Verify that this is a 32-bit pcrel offset.
832 uint64_t PersonalityFunctionField = PersonalityField + 1;
834 if (atom->begin() != atom->end()) {
835 // If we have an explicit relocation, then make sure it matches this
836 // offset as this is where we'd expect it to be applied to.
837 DefinedAtom::reference_iterator CurrentRef = atom->begin();
838 if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
839 return llvm::make_error<GenericError>("CIE personality reloc at "
842 if (++CurrentRef != atom->end())
843 return llvm::make_error<GenericError>("CIE contains too many relocs");
845 // Implicitly generate the personality function reloc. It's assumed to
846 // be a delta32 offset to a GOT entry.
847 // FIXME: Parse the encoding and check this.
848 int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
849 uint64_t funcAddress = ehFrameSection->address + offset +
850 PersonalityFunctionField;
851 funcAddress += funcDelta;
853 const MachODefinedAtom *func = nullptr;
854 Reference::Addend addend;
855 func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
857 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
858 handler.unwindRefToPersonalityFunctionKind(),
859 PersonalityFunctionField, func, addend);
861 } else if (atom->begin() != atom->end()) {
862 // Otherwise, we expect there to be no relocations in this atom as the only
863 // relocation would have been to the personality function.
864 return llvm::make_error<GenericError>("unexpected relocation in CIE");
868 cieInfos[atom] = std::move(cieInfo);
870 return llvm::Error();
873 static llvm::Error processFDE(const NormalizedFile &normalizedFile,
875 mach_o::ArchHandler &handler,
876 const Section *ehFrameSection,
877 MachODefinedAtom *atom,
879 const CIEInfoMap &cieInfos) {
881 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
882 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
884 // Compiler wasn't lazy and actually told us what it meant.
885 // Unfortunately, the compiler may not have generated references for all of
886 // [cie, func, lsda] and so we still need to parse the FDE and add references
887 // for any the compiler didn't generate.
888 if (atom->begin() != atom->end())
889 atom->sortReferences();
891 DefinedAtom::reference_iterator CurrentRef = atom->begin();
893 // This helper returns the reference (if one exists) at the offset we are
894 // currently processing. It automatically increments the ref iterator if we
895 // do return a ref, and throws an error if we pass over a ref without
897 auto currentRefGetter = [&CurrentRef,
898 &atom](uint64_t Offset)->const Reference* {
899 // If there are no more refs found, then we are done.
900 if (CurrentRef == atom->end())
903 const Reference *Ref = *CurrentRef;
905 // If we haven't reached the offset for this reference, then return that
906 // we don't yet have a reference to process.
907 if (Offset < Ref->offsetInAtom())
910 // If the offset is equal, then we want to process this ref.
911 if (Offset == Ref->offsetInAtom()) {
916 // The current ref is at an offset which is earlier than the current
917 // offset, then we failed to consume it when we should have. In this case
919 llvm::report_fatal_error("Skipped reference when processing FDE");
922 // Helper to either get the reference at this current location, and verify
923 // that it is of the expected type, or add a reference of that type.
924 // Returns the reference target.
925 auto verifyOrAddReference = [&](uint64_t targetAddress,
926 Reference::KindValue refKind,
928 bool allowsAddend)->const Atom* {
929 if (auto *ref = currentRefGetter(refAddress)) {
930 // The compiler already emitted a relocation for the CIE ref. This should
931 // have been converted to the correct type of reference in
932 // get[Pair]ReferenceInfo().
933 assert(ref->kindValue() == refKind &&
934 "Incorrect EHFrame reference kind");
935 return ref->target();
937 Reference::Addend addend;
938 auto *target = findAtomCoveringAddress(normalizedFile, file,
939 targetAddress, &addend);
940 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
941 refKind, refAddress, target, addend);
944 assert(!addend && "EHFrame reference cannot have addend");
948 const uint8_t *startFrameData = atom->rawContent().data();
949 const uint8_t *frameData = startFrameData;
951 uint32_t size = read32(frameData, isBig);
952 uint64_t cieFieldInFDE = size == 0xffffffffU
953 ? sizeof(uint32_t) + sizeof(uint64_t)
956 // Linker needs to fixup a reference from the FDE to its parent CIE (a
957 // 32-bit byte offset backwards in the __eh_frame section).
958 uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
959 uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
960 cieAddress -= cieDelta;
962 auto *cieRefTarget = verifyOrAddReference(cieAddress,
963 handler.unwindRefToCIEKind(),
964 cieFieldInFDE, false);
965 const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
966 assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
967 "FDE's CIE field does not point at the start of a CIE.");
969 const CIEInfo &cieInfo = cieInfos.find(cie)->second;
971 // Linker needs to fixup reference from the FDE to the function it's
972 // describing. FIXME: there are actually different ways to do this, and the
973 // particular method used is specified in the CIE's augmentation fields
975 uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
977 int64_t functionFromFDE = readSPtr(is64, isBig,
978 frameData + rangeFieldInFDE);
979 uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
980 rangeStart += functionFromFDE;
982 verifyOrAddReference(rangeStart,
983 handler.unwindRefToFunctionKind(),
984 rangeFieldInFDE, true);
986 // Handle the augmentation data if there is any.
987 if (cieInfo._augmentationDataPresent) {
988 // First process the augmentation data length field.
989 uint64_t augmentationDataLengthFieldInFDE =
990 rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
991 unsigned lengthFieldSize = 0;
992 uint64_t augmentationDataLength =
993 llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
996 if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
998 // Look at the augmentation data field.
999 uint64_t augmentationDataFieldInFDE =
1000 augmentationDataLengthFieldInFDE + lengthFieldSize;
1002 int64_t lsdaFromFDE = readSPtr(is64, isBig,
1003 frameData + augmentationDataFieldInFDE);
1004 uint64_t lsdaStart =
1005 ehFrameSection->address + offset + augmentationDataFieldInFDE +
1008 verifyOrAddReference(lsdaStart,
1009 handler.unwindRefToFunctionKind(),
1010 augmentationDataFieldInFDE, true);
1014 return llvm::Error();
1017 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1019 mach_o::ArchHandler &handler) {
1021 const Section *ehFrameSection = nullptr;
1022 for (auto §ion : normalizedFile.sections)
1023 if (section.segmentName == "__TEXT" &&
1024 section.sectionName == "__eh_frame") {
1025 ehFrameSection = §ion;
1029 // No __eh_frame so nothing to do.
1030 if (!ehFrameSection)
1031 return llvm::Error();
1033 llvm::Error ehFrameErr;
1034 CIEInfoMap cieInfos;
1036 file.eachAtomInSection(*ehFrameSection,
1037 [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1038 assert(atom->contentType() == DefinedAtom::typeCFI);
1040 // Bail out if we've encountered an error.
1044 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1045 if (ArchHandler::isDwarfCIE(isBig, atom))
1046 ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1047 atom, offset, cieInfos);
1049 ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1050 atom, offset, cieInfos);
1056 llvm::Error parseObjCImageInfo(const Section §,
1057 const NormalizedFile &normalizedFile,
1060 // struct objc_image_info {
1061 // uint32_t version; // initially 0
1065 ArrayRef<uint8_t> content = sect.content;
1066 if (content.size() != 8)
1067 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1069 " in file " + file.path() +
1070 " should be 8 bytes in size");
1072 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1073 uint32_t version = read32(content.data(), isBig);
1075 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1077 " in file " + file.path() +
1078 " should have version=0");
1080 uint32_t flags = read32(content.data() + 4, isBig);
1081 if (flags & (MachOLinkingContext::objc_supports_gc |
1082 MachOLinkingContext::objc_gc_only))
1083 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1085 " in file " + file.path() +
1086 " uses GC. This is not supported");
1088 if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1089 file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1091 file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1093 file.setSwiftVersion((flags >> 8) & 0xFF);
1095 return llvm::Error();
1099 /// Converts normalized mach-o file into an lld::File and lld::Atoms.
1100 llvm::Expected<std::unique_ptr<lld::File>>
1101 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1103 std::unique_ptr<MachOFile> file(new MachOFile(path));
1104 if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1105 return std::move(ec);
1106 return std::unique_ptr<File>(std::move(file));
1109 llvm::Expected<std::unique_ptr<lld::File>>
1110 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1112 // Instantiate SharedLibraryFile object.
1113 std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1114 if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1115 return std::move(ec);
1116 return std::unique_ptr<File>(std::move(file));
1119 } // anonymous namespace
1121 namespace normalized {
1123 static bool isObjCImageInfo(const Section §) {
1124 return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1125 (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
1129 normalizedObjectToAtoms(MachOFile *file,
1130 const NormalizedFile &normalizedFile,
1132 DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1133 << file->path() << "\n");
1134 bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1136 // Create atoms from each section.
1137 for (auto § : normalizedFile.sections) {
1138 DEBUG(llvm::dbgs() << "Creating atoms: "; sect.dump());
1139 if (isDebugInfoSection(sect))
1143 // If the file contains an objc_image_info struct, then we should parse the
1144 // ObjC flags and Swift version.
1145 if (isObjCImageInfo(sect)) {
1146 if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1148 // We then skip adding atoms for this section as we use the ObjCPass to
1149 // re-emit this data after it has been aggregated for all files.
1153 bool customSectionName;
1154 DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1156 if (auto ec = processSection(atomType, sect, customSectionName,
1157 normalizedFile, *file, scatterable, copyRefs))
1160 // Create atoms from undefined symbols.
1161 for (auto &sym : normalizedFile.undefinedSymbols) {
1162 // Undefinded symbols with n_value != 0 are actually tentative definitions.
1163 if (sym.value == Hex64(0)) {
1164 file->addUndefinedAtom(sym.name, copyRefs);
1166 file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1167 DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1172 // Convert mach-o relocations to References
1173 std::unique_ptr<mach_o::ArchHandler> handler
1174 = ArchHandler::create(normalizedFile.arch);
1175 for (auto § : normalizedFile.sections) {
1176 if (isDebugInfoSection(sect))
1178 if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1183 // Add additional arch-specific References
1184 file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
1185 handler->addAdditionalReferences(*atom);
1188 // Each __eh_frame section needs references to both __text (the function we're
1189 // providing unwind info for) and itself (FDE -> CIE). These aren't
1190 // represented in the relocations on some architectures, so we have to add
1191 // them back in manually there.
1192 if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1195 // Process mach-o data-in-code regions array. That information is encoded in
1196 // atoms as References at each transition point.
1197 unsigned nextIndex = 0;
1198 for (const DataInCode &entry : normalizedFile.dataInCode) {
1200 const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1202 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1203 + Twine(entry.offset)
1204 + ") is not in any section"));
1206 uint64_t offsetInSect = entry.offset - s->address;
1207 uint32_t offsetInAtom;
1208 MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1210 if (offsetInAtom + entry.length > atom->size()) {
1211 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1213 + Twine(entry.offset)
1215 + Twine(entry.length)
1216 + ") crosses atom boundary."));
1218 // Add reference that marks start of data-in-code.
1219 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1220 handler->dataInCodeTransitionStart(*atom),
1221 offsetInAtom, atom, entry.kind);
1223 // Peek at next entry, if it starts where this one ends, skip ending ref.
1224 if (nextIndex < normalizedFile.dataInCode.size()) {
1225 const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1226 if (nextEntry.offset == (entry.offset + entry.length))
1230 // If data goes to end of function, skip ending ref.
1231 if ((offsetInAtom + entry.length) == atom->size())
1234 // Add reference that marks end of data-in-code.
1235 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1236 handler->dataInCodeTransitionEnd(*atom),
1237 offsetInAtom+entry.length, atom, 0);
1240 // Cache some attributes on the file for use later.
1241 file->setFlags(normalizedFile.flags);
1242 file->setArch(normalizedFile.arch);
1243 file->setOS(normalizedFile.os);
1244 file->setMinVersion(normalizedFile.minOSverson);
1245 file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1247 // Sort references in each atom to their canonical order.
1248 for (const DefinedAtom* defAtom : file->defined()) {
1249 reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1251 return llvm::Error();
1255 normalizedDylibToAtoms(MachODylibFile *file,
1256 const NormalizedFile &normalizedFile,
1258 file->setInstallName(normalizedFile.installName);
1259 file->setCompatVersion(normalizedFile.compatVersion);
1260 file->setCurrentVersion(normalizedFile.currentVersion);
1262 // Tell MachODylibFile object about all symbols it exports.
1263 if (!normalizedFile.exportInfo.empty()) {
1264 // If exports trie exists, use it instead of traditional symbol table.
1265 for (const Export &exp : normalizedFile.exportInfo) {
1266 bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1267 // StringRefs from export iterator are ephemeral, so force copy.
1268 file->addExportedSymbol(exp.name, weakDef, true);
1271 for (auto &sym : normalizedFile.globalSymbols) {
1272 assert((sym.scope & N_EXT) && "only expect external symbols here");
1273 bool weakDef = (sym.desc & N_WEAK_DEF);
1274 file->addExportedSymbol(sym.name, weakDef, copyRefs);
1277 // Tell MachODylibFile object about all dylibs it re-exports.
1278 for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1279 if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1280 file->addReExportedDylib(dep.path);
1282 return llvm::Error();
1285 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1286 StringRef &segmentName,
1287 StringRef §ionName,
1288 SectionType §ionType,
1289 SectionAttr §ionAttrs,
1290 bool &relocsToDefinedCanBeImplicit) {
1292 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1293 p->atomType != DefinedAtom::typeUnknown; ++p) {
1294 if (p->atomType != atomType)
1296 // Wild carded entries are ignored for reverse lookups.
1297 if (p->segmentName.empty() || p->sectionName.empty())
1299 segmentName = p->segmentName;
1300 sectionName = p->sectionName;
1301 sectionType = p->sectionType;
1303 relocsToDefinedCanBeImplicit = false;
1304 if (atomType == DefinedAtom::typeCode)
1305 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1306 if (atomType == DefinedAtom::typeCFI)
1307 relocsToDefinedCanBeImplicit = true;
1310 llvm_unreachable("content type not yet supported");
1313 llvm::Expected<std::unique_ptr<lld::File>>
1314 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1316 switch (normalizedFile.fileType) {
1319 return dylibToAtoms(normalizedFile, path, copyRefs);
1321 return objectToAtoms(normalizedFile, path, copyRefs);
1323 llvm_unreachable("unhandled MachO file type!");
1328 void Section::dump(llvm::raw_ostream &OS) const {
1329 OS << "Section (\"" << segmentName << ", " << sectionName << "\"";
1330 OS << ", addr: " << llvm::format_hex(address, 16, true);
1331 OS << ", size: " << llvm::format_hex(content.size(), 8, true) << ")\n";
1335 } // namespace normalized
1336 } // namespace mach_o