1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// \file Converts from in-memory normalized mach-o to in-memory Atoms.
22 #include "ArchHandler.h"
25 #include "MachONormalizedFile.h"
26 #include "MachONormalizedFileBinaryUtils.h"
27 #include "lld/Common/LLVM.h"
28 #include "lld/Core/Error.h"
29 #include "llvm/BinaryFormat/Dwarf.h"
30 #include "llvm/BinaryFormat/MachO.h"
31 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
32 #include "llvm/Support/DataExtractor.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/Error.h"
35 #include "llvm/Support/Format.h"
36 #include "llvm/Support/LEB128.h"
37 #include "llvm/Support/raw_ostream.h"
39 using namespace llvm::MachO;
40 using namespace lld::mach_o::normalized;
42 #define DEBUG_TYPE "normalized-file-to-atoms"
48 namespace { // anonymous
51 #define ENTRY(seg, sect, type, atomType) \
52 {seg, sect, type, DefinedAtom::atomType }
54 struct MachORelocatableSectionToAtomType {
55 StringRef segmentName;
56 StringRef sectionName;
57 SectionType sectionType;
58 DefinedAtom::ContentType atomType;
61 const MachORelocatableSectionToAtomType sectsToAtomType[] = {
62 ENTRY("__TEXT", "__text", S_REGULAR, typeCode),
63 ENTRY("__TEXT", "__text", S_REGULAR, typeResolver),
64 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString),
65 ENTRY("", "", S_CSTRING_LITERALS, typeCString),
66 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String),
67 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant),
68 ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant),
69 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI),
70 ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI),
71 ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4),
72 ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8),
73 ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16),
74 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA),
75 ENTRY("__DATA", "__data", S_REGULAR, typeData),
76 ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData),
77 ENTRY("__DATA", "__const", S_REGULAR, typeConstData),
78 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString),
79 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS,
81 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS,
83 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS,
85 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill),
86 ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS,
88 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples),
89 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES,
91 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
92 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL,
93 typeTLVInitialZeroFill),
94 ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo),
95 ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList),
96 ENTRY("", "", S_INTERPOSING, typeInterposingTuples),
97 ENTRY("__LD", "__compact_unwind", S_REGULAR,
98 typeCompactUnwindInfo),
99 ENTRY("", "", S_REGULAR, typeUnknown)
104 /// Figures out ContentType of a mach-o section.
105 DefinedAtom::ContentType atomTypeFromSection(const Section §ion,
106 bool &customSectionName) {
107 // First look for match of name and type. Empty names in table are wildcards.
108 customSectionName = false;
109 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
110 p->atomType != DefinedAtom::typeUnknown; ++p) {
111 if (p->sectionType != section.type)
113 if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
115 if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
117 customSectionName = p->segmentName.empty() && p->sectionName.empty();
120 // Look for code denoted by section attributes
121 if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
122 return DefinedAtom::typeCode;
124 return DefinedAtom::typeUnknown;
138 /// Returns info on how to atomize a section of the specified ContentType.
139 void sectionParseInfo(DefinedAtom::ContentType atomType,
140 unsigned int &sizeMultiple,
141 DefinedAtom::Scope &scope,
142 DefinedAtom::Merge &merge,
143 AtomizeModel &atomizeModel) {
145 DefinedAtom::ContentType atomType;
146 unsigned int sizeMultiple;
147 DefinedAtom::Scope scope;
148 DefinedAtom::Merge merge;
149 AtomizeModel atomizeModel;
152 #define ENTRY(type, size, scope, merge, model) \
153 {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
155 static const ParseInfo parseInfo[] = {
156 ENTRY(typeCode, 1, scopeGlobal, mergeNo,
158 ENTRY(typeData, 1, scopeGlobal, mergeNo,
160 ENTRY(typeConstData, 1, scopeGlobal, mergeNo,
162 ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo,
164 ENTRY(typeConstant, 1, scopeGlobal, mergeNo,
166 ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent,
168 ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent,
170 ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo,
172 ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent,
174 ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent,
176 ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent,
178 ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent,
180 ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo,
182 ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo,
184 ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
186 ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent,
188 ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
190 ENTRY(typeUnknown, 1, scopeGlobal, mergeNo,
194 const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
195 for (int i=0; i < tableLen; ++i) {
196 if (parseInfo[i].atomType == atomType) {
197 sizeMultiple = parseInfo[i].sizeMultiple;
198 scope = parseInfo[i].scope;
199 merge = parseInfo[i].merge;
200 atomizeModel = parseInfo[i].atomizeModel;
205 // Unknown type is atomized by symbols.
207 scope = DefinedAtom::scopeGlobal;
208 merge = DefinedAtom::mergeNo;
209 atomizeModel = atomizeAtSymbols;
213 Atom::Scope atomScope(uint8_t scope) {
216 return Atom::scopeGlobal;
219 return Atom::scopeLinkageUnit;
221 return Atom::scopeTranslationUnit;
223 llvm_unreachable("unknown scope value!");
226 void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
227 uint32_t sectionIndex,
228 SmallVector<const Symbol *, 64> &outSyms) {
229 for (const Symbol &sym : inSymbols) {
230 // Only look at definition symbols.
231 if ((sym.type & N_TYPE) != N_SECT)
233 if (sym.sect != sectionIndex)
235 outSyms.push_back(&sym);
239 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion,
240 MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
241 uint16_t symbolDescFlags, Atom::Scope symbolScope,
242 uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
243 // Mach-O symbol table does have size in it. Instead the size is the
244 // difference between this and the next symbol.
245 uint64_t size = nextSymbolAddr - symbolAddr;
246 uint64_t offset = symbolAddr - section.address;
247 bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
248 if (isZeroFillSection(section.type)) {
249 file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
250 noDeadStrip, copyRefs, §ion);
252 DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
253 ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
254 bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
255 if (atomType == DefinedAtom::typeUnknown) {
256 // Mach-O needs a segment and section name. Concatentate those two
257 // with a / separator (e.g. "seg/sect") to fit into the lld model
258 // of just a section name.
259 std::string segSectName = section.segmentName.str()
260 + "/" + section.sectionName.str();
261 file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
262 merge, thumb, noDeadStrip, offset,
263 size, segSectName, true, §ion);
265 if ((atomType == lld::DefinedAtom::typeCode) &&
266 (symbolDescFlags & N_SYMBOL_RESOLVER)) {
267 atomType = lld::DefinedAtom::typeResolver;
269 file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
270 offset, size, thumb, noDeadStrip, copyRefs, §ion);
275 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
276 const Section §ion,
277 const NormalizedFile &normalizedFile,
278 MachOFile &file, bool scatterable,
280 // Find section's index.
281 uint32_t sectIndex = 1;
282 for (auto § : normalizedFile.sections) {
283 if (§ == §ion)
288 // Find all symbols in this section.
289 SmallVector<const Symbol *, 64> symbols;
290 appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
291 appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols);
294 std::sort(symbols.begin(), symbols.end(),
295 [](const Symbol *lhs, const Symbol *rhs) -> bool {
299 uint64_t lhsAddr = lhs->value;
300 uint64_t rhsAddr = rhs->value;
301 if (lhsAddr != rhsAddr)
302 return lhsAddr < rhsAddr;
303 // If same address, one is an alias so sort by scope.
304 Atom::Scope lScope = atomScope(lhs->scope);
305 Atom::Scope rScope = atomScope(rhs->scope);
306 if (lScope != rScope)
307 return lScope < rScope;
308 // If same address and scope, see if one might be better as
310 bool lPrivate = (lhs->name.front() == 'l');
311 bool rPrivate = (rhs->name.front() == 'l');
312 if (lPrivate != rPrivate)
314 // If same address and scope, sort by name.
315 return lhs->name < rhs->name;
318 // Debug logging of symbols.
319 //for (const Symbol *sym : symbols)
320 // llvm::errs() << " sym: "
321 // << llvm::format("0x%08llx ", (uint64_t)sym->value)
322 // << ", " << sym->name << "\n";
324 // If section has no symbols and no content, there are no atoms.
325 if (symbols.empty() && section.content.empty())
326 return llvm::Error::success();
328 if (symbols.empty()) {
329 // Section has no symbols, put all content in one anoymous atom.
330 atomFromSymbol(atomType, section, file, section.address, StringRef(),
331 0, Atom::scopeTranslationUnit,
332 section.address + section.content.size(),
333 scatterable, copyRefs);
335 else if (symbols.front()->value != section.address) {
336 // Section has anonymous content before first symbol.
337 atomFromSymbol(atomType, section, file, section.address, StringRef(),
338 0, Atom::scopeTranslationUnit, symbols.front()->value,
339 scatterable, copyRefs);
342 const Symbol *lastSym = nullptr;
343 for (const Symbol *sym : symbols) {
344 if (lastSym != nullptr) {
345 // Ignore any assembler added "ltmpNNN" symbol at start of section
346 // if there is another symbol at the start.
347 if ((lastSym->value != sym->value)
348 || lastSym->value != section.address
349 || !lastSym->name.startswith("ltmp")) {
350 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
351 lastSym->desc, atomScope(lastSym->scope), sym->value,
352 scatterable, copyRefs);
357 if (lastSym != nullptr) {
358 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
359 lastSym->desc, atomScope(lastSym->scope),
360 section.address + section.content.size(),
361 scatterable, copyRefs);
364 // If object built without .subsections_via_symbols, add reference chain.
366 MachODefinedAtom *prevAtom = nullptr;
367 file.eachAtomInSection(section,
368 [&](MachODefinedAtom *atom, uint64_t offset)->void {
370 prevAtom->addReference(Reference::KindNamespace::all,
371 Reference::KindArch::all,
372 Reference::kindLayoutAfter, 0, atom, 0);
377 return llvm::Error::success();
380 llvm::Error processSection(DefinedAtom::ContentType atomType,
381 const Section §ion,
382 bool customSectionName,
383 const NormalizedFile &normalizedFile,
384 MachOFile &file, bool scatterable,
386 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
387 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
389 // Get info on how to atomize section.
390 unsigned int sizeMultiple;
391 DefinedAtom::Scope scope;
392 DefinedAtom::Merge merge;
393 AtomizeModel atomizeModel;
394 sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
396 // Validate section size.
397 if ((section.content.size() % sizeMultiple) != 0)
398 return llvm::make_error<GenericError>(Twine("Section ")
399 + section.segmentName
400 + "/" + section.sectionName
402 + Twine(section.content.size())
403 + ") which is not a multiple of "
404 + Twine(sizeMultiple));
406 if (atomizeModel == atomizeAtSymbols) {
407 // Break section up into atoms each with a fixed size.
408 return processSymboledSection(atomType, section, normalizedFile, file,
409 scatterable, copyRefs);
412 for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
413 switch (atomizeModel) {
414 case atomizeFixedSize:
415 // Break section up into atoms each with a fixed size.
418 case atomizePointerSize:
419 // Break section up into atoms each the size of a pointer.
423 // Break section up into zero terminated c-strings.
425 for (unsigned int i = offset; i < e; ++i) {
426 if (section.content[i] == 0) {
427 size = i + 1 - offset;
433 // Break section up into zero terminated UTF16 strings.
435 for (unsigned int i = offset; i < e; i += 2) {
436 if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
437 size = i + 2 - offset;
443 // Break section up into dwarf unwind CFIs (FDE or CIE).
444 size = read32(§ion.content[offset], isBig) + 4;
445 if (offset+size > section.content.size()) {
446 return llvm::make_error<GenericError>(Twine("Section ")
447 + section.segmentName
448 + "/" + section.sectionName
449 + " is malformed. Size of CFI "
450 "starting at offset ("
452 + ") is past end of section.");
456 // Break section up into compact unwind entries.
457 size = is64 ? 32 : 20;
459 case atomizeCFString:
460 // Break section up into NS/CFString objects.
461 size = is64 ? 32 : 16;
463 case atomizeAtSymbols:
467 return llvm::make_error<GenericError>(Twine("Section ")
468 + section.segmentName
469 + "/" + section.sectionName
470 + " is malformed. The last atom "
471 "is not zero terminated.");
473 if (customSectionName) {
474 // Mach-O needs a segment and section name. Concatentate those two
475 // with a / separator (e.g. "seg/sect") to fit into the lld model
476 // of just a section name.
477 std::string segSectName = section.segmentName.str()
478 + "/" + section.sectionName.str();
479 file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
480 merge, false, false, offset,
481 size, segSectName, true, §ion);
483 file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
484 false, false, copyRefs, §ion);
489 return llvm::Error::success();
492 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
494 for (const Section &s : normalizedFile.sections) {
495 uint64_t sAddr = s.address;
496 if ((sAddr <= address) && (address < sAddr+s.content.size())) {
503 const MachODefinedAtom *
504 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
505 uint64_t addr, Reference::Addend &addend) {
506 const Section *sect = nullptr;
507 sect = findSectionCoveringAddress(normalizedFile, addr);
511 uint32_t offsetInTarget;
512 uint64_t offsetInSect = addr - sect->address;
514 file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
515 addend = offsetInTarget;
519 // Walks all relocations for a section in a normalized .o file and
520 // creates corresponding lld::Reference objects.
521 llvm::Error convertRelocs(const Section §ion,
522 const NormalizedFile &normalizedFile,
525 ArchHandler &handler) {
526 // Utility function for ArchHandler to find atom by its address.
527 auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
528 const lld::Atom **atom, Reference::Addend *addend)
530 if (sectIndex > normalizedFile.sections.size())
531 return llvm::make_error<GenericError>(Twine("out of range section "
532 "index (") + Twine(sectIndex) + ")");
533 const Section *sect = nullptr;
534 if (sectIndex == 0) {
535 sect = findSectionCoveringAddress(normalizedFile, addr);
537 return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
538 + ") is not in any section"));
540 sect = &normalizedFile.sections[sectIndex-1];
542 uint32_t offsetInTarget;
543 uint64_t offsetInSect = addr - sect->address;
544 *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
545 *addend = offsetInTarget;
546 return llvm::Error::success();
549 // Utility function for ArchHandler to find atom by its symbol index.
550 auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
552 // Find symbol from index.
553 const Symbol *sym = nullptr;
554 uint32_t numStabs = normalizedFile.stabsSymbols.size();
555 uint32_t numLocal = normalizedFile.localSymbols.size();
556 uint32_t numGlobal = normalizedFile.globalSymbols.size();
557 uint32_t numUndef = normalizedFile.undefinedSymbols.size();
558 assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
559 if (symbolIndex < numStabs+numLocal) {
560 sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
561 } else if (symbolIndex < numStabs+numLocal+numGlobal) {
562 sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
563 } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
564 sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
567 return llvm::make_error<GenericError>(Twine("symbol index (")
568 + Twine(symbolIndex) + ") out of range");
571 // Find atom from symbol.
572 if ((sym->type & N_TYPE) == N_SECT) {
573 if (sym->sect > normalizedFile.sections.size())
574 return llvm::make_error<GenericError>(Twine("symbol section index (")
575 + Twine(sym->sect) + ") out of range ");
576 const Section &symSection = normalizedFile.sections[sym->sect-1];
577 uint64_t targetOffsetInSect = sym->value - symSection.address;
578 MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
582 return llvm::Error::success();
584 return llvm::make_error<GenericError>("no atom found for defined symbol");
585 } else if ((sym->type & N_TYPE) == N_UNDF) {
586 const lld::Atom *target = file.findUndefAtom(sym->name);
589 return llvm::Error::success();
591 return llvm::make_error<GenericError>("no undefined atom found for sym");
594 return llvm::make_error<GenericError>("no atom found for symbol");
598 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
599 // Use old-school iterator so that paired relocations can be grouped.
600 for (auto it=section.relocations.begin(), e=section.relocations.end();
602 const Relocation &reloc = *it;
603 // Find atom this relocation is in.
604 if (reloc.offset > section.content.size())
605 return llvm::make_error<GenericError>(
606 Twine("r_address (") + Twine(reloc.offset)
607 + ") is larger than section size ("
608 + Twine(section.content.size()) + ")");
609 uint32_t offsetInAtom;
610 MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
613 assert(inAtom && "r_address in range, should have found atom");
614 uint64_t fixupAddress = section.address + reloc.offset;
616 const lld::Atom *target = nullptr;
617 Reference::Addend addend = 0;
618 Reference::KindValue kind;
619 if (handler.isPairedReloc(reloc)) {
620 // Handle paired relocations together.
621 const Relocation &reloc2 = *++it;
622 auto relocErr = handler.getPairReferenceInfo(
623 reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
624 atomByAddr, atomBySymbol, &kind, &target, &addend);
626 return handleErrors(std::move(relocErr),
627 [&](std::unique_ptr<GenericError> GE) {
628 return llvm::make_error<GenericError>(
629 Twine("bad relocation (") + GE->getMessage()
631 + section.segmentName + "/" + section.sectionName
632 + " (r1_address=" + Twine::utohexstr(reloc.offset)
633 + ", r1_type=" + Twine(reloc.type)
634 + ", r1_extern=" + Twine(reloc.isExtern)
635 + ", r1_length=" + Twine((int)reloc.length)
636 + ", r1_pcrel=" + Twine(reloc.pcRel)
637 + (!reloc.scattered ? (Twine(", r1_symbolnum=")
638 + Twine(reloc.symbol))
639 : (Twine(", r1_scattered=1, r1_value=")
640 + Twine(reloc.value)))
642 + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
643 + ", r2_type=" + Twine(reloc2.type)
644 + ", r2_extern=" + Twine(reloc2.isExtern)
645 + ", r2_length=" + Twine((int)reloc2.length)
646 + ", r2_pcrel=" + Twine(reloc2.pcRel)
647 + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
648 + Twine(reloc2.symbol))
649 : (Twine(", r2_scattered=1, r2_value=")
650 + Twine(reloc2.value)))
656 // Use ArchHandler to convert relocation record into information
657 // needed to instantiate an lld::Reference object.
658 auto relocErr = handler.getReferenceInfo(
659 reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
660 atomBySymbol, &kind, &target, &addend);
662 return handleErrors(std::move(relocErr),
663 [&](std::unique_ptr<GenericError> GE) {
664 return llvm::make_error<GenericError>(
665 Twine("bad relocation (") + GE->getMessage()
667 + section.segmentName + "/" + section.sectionName
668 + " (r_address=" + Twine::utohexstr(reloc.offset)
669 + ", r_type=" + Twine(reloc.type)
670 + ", r_extern=" + Twine(reloc.isExtern)
671 + ", r_length=" + Twine((int)reloc.length)
672 + ", r_pcrel=" + Twine(reloc.pcRel)
673 + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
674 : (Twine(", r_scattered=1, r_value=")
675 + Twine(reloc.value)))
680 // Instantiate an lld::Reference object and add to its atom.
681 inAtom->addReference(Reference::KindNamespace::mach_o,
683 kind, offsetInAtom, target, addend);
686 return llvm::Error::success();
689 bool isDebugInfoSection(const Section §ion) {
690 if ((section.attributes & S_ATTR_DEBUG) == 0)
692 return section.segmentName.equals("__DWARF");
695 static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
696 std::string strName = name.str();
697 for (auto *atom : file.defined())
698 if (atom->name() == strName)
703 static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
704 char *strCopy = alloc.Allocate<char>(str.size() + 1);
705 memcpy(strCopy, str.data(), str.size());
706 strCopy[str.size()] = '\0';
710 llvm::Error parseStabs(MachOFile &file,
711 const NormalizedFile &normalizedFile,
714 if (normalizedFile.stabsSymbols.empty())
715 return llvm::Error::success();
717 // FIXME: Kill this off when we can move to sane yaml parsing.
718 std::unique_ptr<BumpPtrAllocator> allocator;
720 allocator = llvm::make_unique<BumpPtrAllocator>();
722 enum { start, inBeginEnd } state = start;
724 const Atom *currentAtom = nullptr;
725 uint64_t currentAtomAddress = 0;
726 StabsDebugInfo::StabsList stabsList;
727 for (const auto &stabSym : normalizedFile.stabsSymbols) {
728 Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
729 stabSym.value, stabSym.name);
732 switch (static_cast<StabType>(stabSym.type)) {
735 currentAtomAddress = stabSym.value;
736 Reference::Addend addend;
737 currentAtom = findAtomCoveringAddress(normalizedFile, file,
738 currentAtomAddress, addend);
740 return llvm::make_error<GenericError>(
741 "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
744 stab.atom = currentAtom;
746 // FIXME: ld64 just issues a warning here - should we match that?
747 return llvm::make_error<GenericError>(
748 "can't find atom for stabs BNSYM at " +
749 Twine::utohexstr(stabSym.value) + " in " + file.path());
754 // Not associated with an atom, just copy.
756 stab.str = copyDebugString(stabSym.name, *allocator);
758 stab.str = stabSym.name;
761 auto colonIdx = stabSym.name.find(':');
762 if (colonIdx != StringRef::npos) {
763 StringRef name = stabSym.name.substr(0, colonIdx);
764 currentAtom = findDefinedAtomByName(file, "_" + name);
765 stab.atom = currentAtom;
767 stab.str = copyDebugString(stabSym.name, *allocator);
769 stab.str = stabSym.name;
771 currentAtom = findDefinedAtomByName(file, stabSym.name);
772 stab.atom = currentAtom;
774 stab.str = copyDebugString(stabSym.name, *allocator);
776 stab.str = stabSym.name;
778 if (stab.atom == nullptr)
779 return llvm::make_error<GenericError>(
780 "can't find atom for N_GSYM stabs" + stabSym.name +
781 " in " + file.path());
785 return llvm::make_error<GenericError>(
786 "old-style N_FUN stab '" + stabSym.name + "' unsupported");
788 return llvm::make_error<GenericError>(
789 "unrecognized stab symbol '" + stabSym.name + "'");
793 stab.atom = currentAtom;
794 switch (static_cast<StabType>(stabSym.type)) {
797 currentAtom = nullptr;
800 // Just copy the string.
802 stab.str = copyDebugString(stabSym.name, *allocator);
804 stab.str = stabSym.name;
807 return llvm::make_error<GenericError>(
808 "unrecognized stab symbol '" + stabSym.name + "'");
811 llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
812 stabsList.push_back(stab);
815 file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
817 // FIXME: Kill this off when we fix YAML memory ownership.
818 file.debugInfo()->setAllocator(std::move(allocator));
820 return llvm::Error::success();
823 static llvm::DataExtractor
824 dataExtractorFromSection(const NormalizedFile &normalizedFile,
826 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
827 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
828 StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
830 return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
833 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
834 // inspection" code if possible.
835 static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
839 while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
841 abbrevData.getULEB128(&offset);
843 abbrevData.getU8(&offset);
845 while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
851 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
852 // inspection" code if possible.
853 static Expected<const char *>
854 getIndexedString(const NormalizedFile &normalizedFile,
855 llvm::dwarf::Form form, llvm::DataExtractor infoData,
856 uint32_t &infoOffset, const Section &stringsSection) {
857 if (form == llvm::dwarf::DW_FORM_string)
858 return infoData.getCStr(&infoOffset);
859 if (form != llvm::dwarf::DW_FORM_strp)
860 return llvm::make_error<GenericError>(
861 "string field encoded without DW_FORM_strp");
862 uint32_t stringOffset = infoData.getU32(&infoOffset);
863 llvm::DataExtractor stringsData =
864 dataExtractorFromSection(normalizedFile, stringsSection);
865 return stringsData.getCStr(&stringOffset);
868 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
869 // inspection" code if possible.
870 static llvm::Expected<TranslationUnitSource>
871 readCompUnit(const NormalizedFile &normalizedFile,
873 const Section &abbrev,
874 const Section &strings,
876 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
877 // inspection" code if possible.
879 llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
880 auto infoData = dataExtractorFromSection(normalizedFile, info);
881 uint32_t length = infoData.getU32(&offset);
882 if (length == 0xffffffff) {
883 Format = llvm::dwarf::DwarfFormat::DWARF64;
884 infoData.getU64(&offset);
886 else if (length > 0xffffff00)
887 return llvm::make_error<GenericError>("Malformed DWARF in " + path);
889 uint16_t version = infoData.getU16(&offset);
891 if (version < 2 || version > 4)
892 return llvm::make_error<GenericError>("Unsupported DWARF version in " +
895 infoData.getU32(&offset); // Abbrev offset (should be zero)
896 uint8_t addrSize = infoData.getU8(&offset);
898 uint32_t abbrCode = infoData.getULEB128(&offset);
899 auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
900 uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
901 uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
902 if (tag != llvm::dwarf::DW_TAG_compile_unit)
903 return llvm::make_error<GenericError>("top level DIE is not a compile unit");
905 abbrevData.getU8(&abbrevOffset);
907 llvm::dwarf::Form form;
908 llvm::dwarf::FormParams formParams = {version, addrSize, Format};
909 TranslationUnitSource tu;
910 while ((name = abbrevData.getULEB128(&abbrevOffset)) |
911 (form = static_cast<llvm::dwarf::Form>(
912 abbrevData.getULEB128(&abbrevOffset))) &&
913 (name != 0 || form != 0)) {
915 case llvm::dwarf::DW_AT_name: {
916 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
920 return eName.takeError();
923 case llvm::dwarf::DW_AT_comp_dir: {
924 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
928 return eName.takeError();
932 llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
938 llvm::Error parseDebugInfo(MachOFile &file,
939 const NormalizedFile &normalizedFile, bool copyRefs) {
941 // Find the interesting debug info sections.
942 const Section *debugInfo = nullptr;
943 const Section *debugAbbrev = nullptr;
944 const Section *debugStrings = nullptr;
946 for (auto &s : normalizedFile.sections) {
947 if (s.segmentName == "__DWARF") {
948 if (s.sectionName == "__debug_info")
950 else if (s.sectionName == "__debug_abbrev")
952 else if (s.sectionName == "__debug_str")
958 return parseStabs(file, normalizedFile, copyRefs);
960 if (debugInfo->content.size() == 0)
961 return llvm::Error::success();
963 if (debugInfo->content.size() < 12)
964 return llvm::make_error<GenericError>("Malformed __debug_info section in " +
965 file.path() + ": too small");
968 return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
971 if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
972 *debugStrings, file.path())) {
973 // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
975 std::unique_ptr<BumpPtrAllocator> allocator;
977 allocator = llvm::make_unique<BumpPtrAllocator>();
978 tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
979 tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
981 file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
983 file.debugInfo()->setAllocator(std::move(allocator));
985 return tuOrErr.takeError();
987 return llvm::Error::success();
990 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
992 return read64(addr, isBig);
994 int32_t res = read32(addr, isBig);
998 /// --- Augmentation String Processing ---
1001 bool _augmentationDataPresent = false;
1002 bool _mayHaveEH = false;
1003 uint32_t _offsetOfLSDA = ~0U;
1004 uint32_t _offsetOfPersonality = ~0U;
1005 uint32_t _offsetOfFDEPointerEncoding = ~0U;
1006 uint32_t _augmentationDataLength = ~0U;
1009 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1011 static llvm::Error processAugmentationString(const uint8_t *augStr,
1015 if (augStr[0] == '\0') {
1017 return llvm::Error::success();
1020 if (augStr[0] != 'z')
1021 return llvm::make_error<GenericError>("expected 'z' at start of "
1022 "augmentation string");
1024 cieInfo._augmentationDataPresent = true;
1027 uint32_t offsetInAugmentationData = 0;
1028 while (augStr[idx] != '\0') {
1029 if (augStr[idx] == 'L') {
1030 cieInfo._offsetOfLSDA = offsetInAugmentationData;
1031 // This adds a single byte to the augmentation data.
1032 ++offsetInAugmentationData;
1036 if (augStr[idx] == 'P') {
1037 cieInfo._offsetOfPersonality = offsetInAugmentationData;
1038 // This adds a single byte to the augmentation data for the encoding,
1039 // then a number of bytes for the pointer data.
1040 // FIXME: We are assuming 4 is correct here for the pointer size as we
1041 // always currently use delta32ToGOT.
1042 offsetInAugmentationData += 5;
1046 if (augStr[idx] == 'R') {
1047 cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1048 // This adds a single byte to the augmentation data.
1049 ++offsetInAugmentationData;
1053 if (augStr[idx] == 'e') {
1054 if (augStr[idx + 1] != 'h')
1055 return llvm::make_error<GenericError>("expected 'eh' in "
1056 "augmentation string");
1057 cieInfo._mayHaveEH = true;
1064 cieInfo._augmentationDataLength = offsetInAugmentationData;
1067 return llvm::Error::success();
1070 static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1072 mach_o::ArchHandler &handler,
1073 const Section *ehFrameSection,
1074 MachODefinedAtom *atom,
1076 CIEInfoMap &cieInfos) {
1077 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1078 const uint8_t *frameData = atom->rawContent().data();
1082 uint32_t size = read32(frameData, isBig);
1083 uint64_t cieIDField = size == 0xffffffffU
1084 ? sizeof(uint32_t) + sizeof(uint64_t)
1086 uint64_t versionField = cieIDField + sizeof(uint32_t);
1087 uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1089 unsigned augmentationStringLength = 0;
1090 if (auto err = processAugmentationString(frameData + augmentationStringField,
1091 cieInfo, augmentationStringLength))
1094 if (cieInfo._offsetOfPersonality != ~0U) {
1095 // If we have augmentation data for the personality function, then we may
1096 // need to implicitly generate its relocation.
1098 // Parse the EH Data field which is pointer sized.
1099 uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1100 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1101 unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
1103 // Parse Code Align Factor which is a ULEB128.
1104 uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1105 unsigned lengthFieldSize = 0;
1106 llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1108 // Parse Data Align Factor which is a SLEB128.
1109 uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1110 llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1112 // Parse Return Address Register which is a byte.
1113 uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1115 // Parse the augmentation length which is a ULEB128.
1116 uint64_t AugmentationLengthField = ReturnAddressField + 1;
1117 uint64_t AugmentationLength =
1118 llvm::decodeULEB128(frameData + AugmentationLengthField,
1121 if (AugmentationLength != cieInfo._augmentationDataLength)
1122 return llvm::make_error<GenericError>("CIE augmentation data length "
1125 // Get the start address of the augmentation data.
1126 uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1128 // Parse the personality function from the augmentation data.
1129 uint64_t PersonalityField =
1130 AugmentationDataField + cieInfo._offsetOfPersonality;
1132 // Parse the personality encoding.
1133 // FIXME: Verify that this is a 32-bit pcrel offset.
1134 uint64_t PersonalityFunctionField = PersonalityField + 1;
1136 if (atom->begin() != atom->end()) {
1137 // If we have an explicit relocation, then make sure it matches this
1138 // offset as this is where we'd expect it to be applied to.
1139 DefinedAtom::reference_iterator CurrentRef = atom->begin();
1140 if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1141 return llvm::make_error<GenericError>("CIE personality reloc at "
1144 if (++CurrentRef != atom->end())
1145 return llvm::make_error<GenericError>("CIE contains too many relocs");
1147 // Implicitly generate the personality function reloc. It's assumed to
1148 // be a delta32 offset to a GOT entry.
1149 // FIXME: Parse the encoding and check this.
1150 int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1151 uint64_t funcAddress = ehFrameSection->address + offset +
1152 PersonalityFunctionField;
1153 funcAddress += funcDelta;
1155 const MachODefinedAtom *func = nullptr;
1156 Reference::Addend addend;
1157 func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1159 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1160 handler.unwindRefToPersonalityFunctionKind(),
1161 PersonalityFunctionField, func, addend);
1163 } else if (atom->begin() != atom->end()) {
1164 // Otherwise, we expect there to be no relocations in this atom as the only
1165 // relocation would have been to the personality function.
1166 return llvm::make_error<GenericError>("unexpected relocation in CIE");
1170 cieInfos[atom] = std::move(cieInfo);
1172 return llvm::Error::success();
1175 static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1177 mach_o::ArchHandler &handler,
1178 const Section *ehFrameSection,
1179 MachODefinedAtom *atom,
1181 const CIEInfoMap &cieInfos) {
1183 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1184 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1186 // Compiler wasn't lazy and actually told us what it meant.
1187 // Unfortunately, the compiler may not have generated references for all of
1188 // [cie, func, lsda] and so we still need to parse the FDE and add references
1189 // for any the compiler didn't generate.
1190 if (atom->begin() != atom->end())
1191 atom->sortReferences();
1193 DefinedAtom::reference_iterator CurrentRef = atom->begin();
1195 // This helper returns the reference (if one exists) at the offset we are
1196 // currently processing. It automatically increments the ref iterator if we
1197 // do return a ref, and throws an error if we pass over a ref without
1199 auto currentRefGetter = [&CurrentRef,
1200 &atom](uint64_t Offset)->const Reference* {
1201 // If there are no more refs found, then we are done.
1202 if (CurrentRef == atom->end())
1205 const Reference *Ref = *CurrentRef;
1207 // If we haven't reached the offset for this reference, then return that
1208 // we don't yet have a reference to process.
1209 if (Offset < Ref->offsetInAtom())
1212 // If the offset is equal, then we want to process this ref.
1213 if (Offset == Ref->offsetInAtom()) {
1218 // The current ref is at an offset which is earlier than the current
1219 // offset, then we failed to consume it when we should have. In this case
1221 llvm::report_fatal_error("Skipped reference when processing FDE");
1224 // Helper to either get the reference at this current location, and verify
1225 // that it is of the expected type, or add a reference of that type.
1226 // Returns the reference target.
1227 auto verifyOrAddReference = [&](uint64_t targetAddress,
1228 Reference::KindValue refKind,
1229 uint64_t refAddress,
1230 bool allowsAddend)->const Atom* {
1231 if (auto *ref = currentRefGetter(refAddress)) {
1232 // The compiler already emitted a relocation for the CIE ref. This should
1233 // have been converted to the correct type of reference in
1234 // get[Pair]ReferenceInfo().
1235 assert(ref->kindValue() == refKind &&
1236 "Incorrect EHFrame reference kind");
1237 return ref->target();
1239 Reference::Addend addend;
1240 auto *target = findAtomCoveringAddress(normalizedFile, file,
1241 targetAddress, addend);
1242 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1243 refKind, refAddress, target, addend);
1246 assert(!addend && "EHFrame reference cannot have addend");
1250 const uint8_t *startFrameData = atom->rawContent().data();
1251 const uint8_t *frameData = startFrameData;
1253 uint32_t size = read32(frameData, isBig);
1254 uint64_t cieFieldInFDE = size == 0xffffffffU
1255 ? sizeof(uint32_t) + sizeof(uint64_t)
1258 // Linker needs to fixup a reference from the FDE to its parent CIE (a
1259 // 32-bit byte offset backwards in the __eh_frame section).
1260 uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1261 uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1262 cieAddress -= cieDelta;
1264 auto *cieRefTarget = verifyOrAddReference(cieAddress,
1265 handler.unwindRefToCIEKind(),
1266 cieFieldInFDE, false);
1267 const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1268 assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1269 "FDE's CIE field does not point at the start of a CIE.");
1271 const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1273 // Linker needs to fixup reference from the FDE to the function it's
1274 // describing. FIXME: there are actually different ways to do this, and the
1275 // particular method used is specified in the CIE's augmentation fields
1277 uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1279 int64_t functionFromFDE = readSPtr(is64, isBig,
1280 frameData + rangeFieldInFDE);
1281 uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1282 rangeStart += functionFromFDE;
1284 verifyOrAddReference(rangeStart,
1285 handler.unwindRefToFunctionKind(),
1286 rangeFieldInFDE, true);
1288 // Handle the augmentation data if there is any.
1289 if (cieInfo._augmentationDataPresent) {
1290 // First process the augmentation data length field.
1291 uint64_t augmentationDataLengthFieldInFDE =
1292 rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
1293 unsigned lengthFieldSize = 0;
1294 uint64_t augmentationDataLength =
1295 llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1298 if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
1300 // Look at the augmentation data field.
1301 uint64_t augmentationDataFieldInFDE =
1302 augmentationDataLengthFieldInFDE + lengthFieldSize;
1304 int64_t lsdaFromFDE = readSPtr(is64, isBig,
1305 frameData + augmentationDataFieldInFDE);
1306 uint64_t lsdaStart =
1307 ehFrameSection->address + offset + augmentationDataFieldInFDE +
1310 verifyOrAddReference(lsdaStart,
1311 handler.unwindRefToFunctionKind(),
1312 augmentationDataFieldInFDE, true);
1316 return llvm::Error::success();
1319 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1321 mach_o::ArchHandler &handler) {
1323 const Section *ehFrameSection = nullptr;
1324 for (auto §ion : normalizedFile.sections)
1325 if (section.segmentName == "__TEXT" &&
1326 section.sectionName == "__eh_frame") {
1327 ehFrameSection = §ion;
1331 // No __eh_frame so nothing to do.
1332 if (!ehFrameSection)
1333 return llvm::Error::success();
1335 llvm::Error ehFrameErr = llvm::Error::success();
1336 CIEInfoMap cieInfos;
1338 file.eachAtomInSection(*ehFrameSection,
1339 [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1340 assert(atom->contentType() == DefinedAtom::typeCFI);
1342 // Bail out if we've encountered an error.
1346 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1347 if (ArchHandler::isDwarfCIE(isBig, atom))
1348 ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1349 atom, offset, cieInfos);
1351 ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1352 atom, offset, cieInfos);
1358 llvm::Error parseObjCImageInfo(const Section §,
1359 const NormalizedFile &normalizedFile,
1362 // struct objc_image_info {
1363 // uint32_t version; // initially 0
1367 ArrayRef<uint8_t> content = sect.content;
1368 if (content.size() != 8)
1369 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1371 " in file " + file.path() +
1372 " should be 8 bytes in size");
1374 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1375 uint32_t version = read32(content.data(), isBig);
1377 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1379 " in file " + file.path() +
1380 " should have version=0");
1382 uint32_t flags = read32(content.data() + 4, isBig);
1383 if (flags & (MachOLinkingContext::objc_supports_gc |
1384 MachOLinkingContext::objc_gc_only))
1385 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1387 " in file " + file.path() +
1388 " uses GC. This is not supported");
1390 if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1391 file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1393 file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1395 file.setSwiftVersion((flags >> 8) & 0xFF);
1397 return llvm::Error::success();
1400 /// Converts normalized mach-o file into an lld::File and lld::Atoms.
1401 llvm::Expected<std::unique_ptr<lld::File>>
1402 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1404 std::unique_ptr<MachOFile> file(new MachOFile(path));
1405 if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1406 return std::move(ec);
1407 return std::unique_ptr<File>(std::move(file));
1410 llvm::Expected<std::unique_ptr<lld::File>>
1411 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1413 // Instantiate SharedLibraryFile object.
1414 std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1415 if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1416 return std::move(ec);
1417 return std::unique_ptr<File>(std::move(file));
1420 } // anonymous namespace
1422 namespace normalized {
1424 static bool isObjCImageInfo(const Section §) {
1425 return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1426 (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
1430 normalizedObjectToAtoms(MachOFile *file,
1431 const NormalizedFile &normalizedFile,
1433 LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1434 << file->path() << "\n");
1435 bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1437 // Create atoms from each section.
1438 for (auto § : normalizedFile.sections) {
1440 // If this is a debug-info section parse it specially.
1441 if (isDebugInfoSection(sect))
1444 // If the file contains an objc_image_info struct, then we should parse the
1445 // ObjC flags and Swift version.
1446 if (isObjCImageInfo(sect)) {
1447 if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1449 // We then skip adding atoms for this section as we use the ObjCPass to
1450 // re-emit this data after it has been aggregated for all files.
1454 bool customSectionName;
1455 DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1457 if (auto ec = processSection(atomType, sect, customSectionName,
1458 normalizedFile, *file, scatterable, copyRefs))
1461 // Create atoms from undefined symbols.
1462 for (auto &sym : normalizedFile.undefinedSymbols) {
1463 // Undefinded symbols with n_value != 0 are actually tentative definitions.
1464 if (sym.value == Hex64(0)) {
1465 file->addUndefinedAtom(sym.name, copyRefs);
1467 file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1468 DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1473 // Convert mach-o relocations to References
1474 std::unique_ptr<mach_o::ArchHandler> handler
1475 = ArchHandler::create(normalizedFile.arch);
1476 for (auto § : normalizedFile.sections) {
1477 if (isDebugInfoSection(sect))
1479 if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1484 // Add additional arch-specific References
1485 file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
1486 handler->addAdditionalReferences(*atom);
1489 // Each __eh_frame section needs references to both __text (the function we're
1490 // providing unwind info for) and itself (FDE -> CIE). These aren't
1491 // represented in the relocations on some architectures, so we have to add
1492 // them back in manually there.
1493 if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1496 // Process mach-o data-in-code regions array. That information is encoded in
1497 // atoms as References at each transition point.
1498 unsigned nextIndex = 0;
1499 for (const DataInCode &entry : normalizedFile.dataInCode) {
1501 const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1503 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1504 + Twine(entry.offset)
1505 + ") is not in any section"));
1507 uint64_t offsetInSect = entry.offset - s->address;
1508 uint32_t offsetInAtom;
1509 MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1511 if (offsetInAtom + entry.length > atom->size()) {
1512 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1514 + Twine(entry.offset)
1516 + Twine(entry.length)
1517 + ") crosses atom boundary."));
1519 // Add reference that marks start of data-in-code.
1520 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1521 handler->dataInCodeTransitionStart(*atom),
1522 offsetInAtom, atom, entry.kind);
1524 // Peek at next entry, if it starts where this one ends, skip ending ref.
1525 if (nextIndex < normalizedFile.dataInCode.size()) {
1526 const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1527 if (nextEntry.offset == (entry.offset + entry.length))
1531 // If data goes to end of function, skip ending ref.
1532 if ((offsetInAtom + entry.length) == atom->size())
1535 // Add reference that marks end of data-in-code.
1536 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1537 handler->dataInCodeTransitionEnd(*atom),
1538 offsetInAtom+entry.length, atom, 0);
1541 // Cache some attributes on the file for use later.
1542 file->setFlags(normalizedFile.flags);
1543 file->setArch(normalizedFile.arch);
1544 file->setOS(normalizedFile.os);
1545 file->setMinVersion(normalizedFile.minOSverson);
1546 file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1548 // Sort references in each atom to their canonical order.
1549 for (const DefinedAtom* defAtom : file->defined()) {
1550 reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1553 if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1556 return llvm::Error::success();
1560 normalizedDylibToAtoms(MachODylibFile *file,
1561 const NormalizedFile &normalizedFile,
1563 file->setInstallName(normalizedFile.installName);
1564 file->setCompatVersion(normalizedFile.compatVersion);
1565 file->setCurrentVersion(normalizedFile.currentVersion);
1567 // Tell MachODylibFile object about all symbols it exports.
1568 if (!normalizedFile.exportInfo.empty()) {
1569 // If exports trie exists, use it instead of traditional symbol table.
1570 for (const Export &exp : normalizedFile.exportInfo) {
1571 bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1572 // StringRefs from export iterator are ephemeral, so force copy.
1573 file->addExportedSymbol(exp.name, weakDef, true);
1576 for (auto &sym : normalizedFile.globalSymbols) {
1577 assert((sym.scope & N_EXT) && "only expect external symbols here");
1578 bool weakDef = (sym.desc & N_WEAK_DEF);
1579 file->addExportedSymbol(sym.name, weakDef, copyRefs);
1582 // Tell MachODylibFile object about all dylibs it re-exports.
1583 for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1584 if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1585 file->addReExportedDylib(dep.path);
1587 return llvm::Error::success();
1590 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1591 StringRef &segmentName,
1592 StringRef §ionName,
1593 SectionType §ionType,
1594 SectionAttr §ionAttrs,
1595 bool &relocsToDefinedCanBeImplicit) {
1597 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1598 p->atomType != DefinedAtom::typeUnknown; ++p) {
1599 if (p->atomType != atomType)
1601 // Wild carded entries are ignored for reverse lookups.
1602 if (p->segmentName.empty() || p->sectionName.empty())
1604 segmentName = p->segmentName;
1605 sectionName = p->sectionName;
1606 sectionType = p->sectionType;
1608 relocsToDefinedCanBeImplicit = false;
1609 if (atomType == DefinedAtom::typeCode)
1610 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1611 if (atomType == DefinedAtom::typeCFI)
1612 relocsToDefinedCanBeImplicit = true;
1615 llvm_unreachable("content type not yet supported");
1618 llvm::Expected<std::unique_ptr<lld::File>>
1619 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1621 switch (normalizedFile.fileType) {
1624 return dylibToAtoms(normalizedFile, path, copyRefs);
1626 return objectToAtoms(normalizedFile, path, copyRefs);
1628 llvm_unreachable("unhandled MachO file type!");
1632 } // namespace normalized
1633 } // namespace mach_o