1 //===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \file Converts from in-memory normalized mach-o to in-memory Atoms.
23 #include "ArchHandler.h"
26 #include "MachONormalizedFile.h"
27 #include "MachONormalizedFileBinaryUtils.h"
28 #include "lld/Common/LLVM.h"
29 #include "lld/Core/Error.h"
30 #include "llvm/BinaryFormat/Dwarf.h"
31 #include "llvm/BinaryFormat/MachO.h"
32 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
33 #include "llvm/Support/DataExtractor.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Error.h"
36 #include "llvm/Support/Format.h"
37 #include "llvm/Support/LEB128.h"
38 #include "llvm/Support/raw_ostream.h"
40 using namespace llvm::MachO;
41 using namespace lld::mach_o::normalized;
43 #define DEBUG_TYPE "normalized-file-to-atoms"
49 namespace { // anonymous
52 #define ENTRY(seg, sect, type, atomType) \
53 {seg, sect, type, DefinedAtom::atomType }
55 struct MachORelocatableSectionToAtomType {
56 StringRef segmentName;
57 StringRef sectionName;
58 SectionType sectionType;
59 DefinedAtom::ContentType atomType;
62 const MachORelocatableSectionToAtomType sectsToAtomType[] = {
63 ENTRY("__TEXT", "__text", S_REGULAR, typeCode),
64 ENTRY("__TEXT", "__text", S_REGULAR, typeResolver),
65 ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString),
66 ENTRY("", "", S_CSTRING_LITERALS, typeCString),
67 ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String),
68 ENTRY("__TEXT", "__const", S_REGULAR, typeConstant),
69 ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant),
70 ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI),
71 ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI),
72 ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4),
73 ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8),
74 ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16),
75 ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA),
76 ENTRY("__DATA", "__data", S_REGULAR, typeData),
77 ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData),
78 ENTRY("__DATA", "__const", S_REGULAR, typeConstData),
79 ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString),
80 ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS,
82 ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS,
84 ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS,
86 ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill),
87 ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS,
89 ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples),
90 ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES,
92 ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData),
93 ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL,
94 typeTLVInitialZeroFill),
95 ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo),
96 ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList),
97 ENTRY("", "", S_INTERPOSING, typeInterposingTuples),
98 ENTRY("__LD", "__compact_unwind", S_REGULAR,
99 typeCompactUnwindInfo),
100 ENTRY("", "", S_REGULAR, typeUnknown)
105 /// Figures out ContentType of a mach-o section.
106 DefinedAtom::ContentType atomTypeFromSection(const Section §ion,
107 bool &customSectionName) {
108 // First look for match of name and type. Empty names in table are wildcards.
109 customSectionName = false;
110 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
111 p->atomType != DefinedAtom::typeUnknown; ++p) {
112 if (p->sectionType != section.type)
114 if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty())
116 if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty())
118 customSectionName = p->segmentName.empty() && p->sectionName.empty();
121 // Look for code denoted by section attributes
122 if (section.attributes & S_ATTR_PURE_INSTRUCTIONS)
123 return DefinedAtom::typeCode;
125 return DefinedAtom::typeUnknown;
139 /// Returns info on how to atomize a section of the specified ContentType.
140 void sectionParseInfo(DefinedAtom::ContentType atomType,
141 unsigned int &sizeMultiple,
142 DefinedAtom::Scope &scope,
143 DefinedAtom::Merge &merge,
144 AtomizeModel &atomizeModel) {
146 DefinedAtom::ContentType atomType;
147 unsigned int sizeMultiple;
148 DefinedAtom::Scope scope;
149 DefinedAtom::Merge merge;
150 AtomizeModel atomizeModel;
153 #define ENTRY(type, size, scope, merge, model) \
154 {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model }
156 static const ParseInfo parseInfo[] = {
157 ENTRY(typeCode, 1, scopeGlobal, mergeNo,
159 ENTRY(typeData, 1, scopeGlobal, mergeNo,
161 ENTRY(typeConstData, 1, scopeGlobal, mergeNo,
163 ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo,
165 ENTRY(typeConstant, 1, scopeGlobal, mergeNo,
167 ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent,
169 ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent,
171 ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo,
173 ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent,
175 ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent,
177 ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent,
179 ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent,
181 ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo,
183 ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo,
185 ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo,
187 ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent,
189 ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent,
191 ENTRY(typeUnknown, 1, scopeGlobal, mergeNo,
195 const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo);
196 for (int i=0; i < tableLen; ++i) {
197 if (parseInfo[i].atomType == atomType) {
198 sizeMultiple = parseInfo[i].sizeMultiple;
199 scope = parseInfo[i].scope;
200 merge = parseInfo[i].merge;
201 atomizeModel = parseInfo[i].atomizeModel;
206 // Unknown type is atomized by symbols.
208 scope = DefinedAtom::scopeGlobal;
209 merge = DefinedAtom::mergeNo;
210 atomizeModel = atomizeAtSymbols;
214 Atom::Scope atomScope(uint8_t scope) {
217 return Atom::scopeGlobal;
220 return Atom::scopeLinkageUnit;
222 return Atom::scopeTranslationUnit;
224 llvm_unreachable("unknown scope value!");
227 void appendSymbolsInSection(const std::vector<Symbol> &inSymbols,
228 uint32_t sectionIndex,
229 SmallVector<const Symbol *, 64> &outSyms) {
230 for (const Symbol &sym : inSymbols) {
231 // Only look at definition symbols.
232 if ((sym.type & N_TYPE) != N_SECT)
234 if (sym.sect != sectionIndex)
236 outSyms.push_back(&sym);
240 void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion,
241 MachOFile &file, uint64_t symbolAddr, StringRef symbolName,
242 uint16_t symbolDescFlags, Atom::Scope symbolScope,
243 uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) {
244 // Mach-O symbol table does have size in it. Instead the size is the
245 // difference between this and the next symbol.
246 uint64_t size = nextSymbolAddr - symbolAddr;
247 uint64_t offset = symbolAddr - section.address;
248 bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable;
249 if (isZeroFillSection(section.type)) {
250 file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size,
251 noDeadStrip, copyRefs, §ion);
253 DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF)
254 ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo;
255 bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF);
256 if (atomType == DefinedAtom::typeUnknown) {
257 // Mach-O needs a segment and section name. Concatentate those two
258 // with a / separator (e.g. "seg/sect") to fit into the lld model
259 // of just a section name.
260 std::string segSectName = section.segmentName.str()
261 + "/" + section.sectionName.str();
262 file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType,
263 merge, thumb, noDeadStrip, offset,
264 size, segSectName, true, §ion);
266 if ((atomType == lld::DefinedAtom::typeCode) &&
267 (symbolDescFlags & N_SYMBOL_RESOLVER)) {
268 atomType = lld::DefinedAtom::typeResolver;
270 file.addDefinedAtom(symbolName, symbolScope, atomType, merge,
271 offset, size, thumb, noDeadStrip, copyRefs, §ion);
276 llvm::Error processSymboledSection(DefinedAtom::ContentType atomType,
277 const Section §ion,
278 const NormalizedFile &normalizedFile,
279 MachOFile &file, bool scatterable,
281 // Find section's index.
282 uint32_t sectIndex = 1;
283 for (auto § : normalizedFile.sections) {
284 if (§ == §ion)
289 // Find all symbols in this section.
290 SmallVector<const Symbol *, 64> symbols;
291 appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols);
292 appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols);
295 std::sort(symbols.begin(), symbols.end(),
296 [](const Symbol *lhs, const Symbol *rhs) -> bool {
300 uint64_t lhsAddr = lhs->value;
301 uint64_t rhsAddr = rhs->value;
302 if (lhsAddr != rhsAddr)
303 return lhsAddr < rhsAddr;
304 // If same address, one is an alias so sort by scope.
305 Atom::Scope lScope = atomScope(lhs->scope);
306 Atom::Scope rScope = atomScope(rhs->scope);
307 if (lScope != rScope)
308 return lScope < rScope;
309 // If same address and scope, see if one might be better as
311 bool lPrivate = (lhs->name.front() == 'l');
312 bool rPrivate = (rhs->name.front() == 'l');
313 if (lPrivate != rPrivate)
315 // If same address and scope, sort by name.
316 return lhs->name < rhs->name;
319 // Debug logging of symbols.
320 //for (const Symbol *sym : symbols)
321 // llvm::errs() << " sym: "
322 // << llvm::format("0x%08llx ", (uint64_t)sym->value)
323 // << ", " << sym->name << "\n";
325 // If section has no symbols and no content, there are no atoms.
326 if (symbols.empty() && section.content.empty())
327 return llvm::Error::success();
329 if (symbols.empty()) {
330 // Section has no symbols, put all content in one anoymous atom.
331 atomFromSymbol(atomType, section, file, section.address, StringRef(),
332 0, Atom::scopeTranslationUnit,
333 section.address + section.content.size(),
334 scatterable, copyRefs);
336 else if (symbols.front()->value != section.address) {
337 // Section has anonymous content before first symbol.
338 atomFromSymbol(atomType, section, file, section.address, StringRef(),
339 0, Atom::scopeTranslationUnit, symbols.front()->value,
340 scatterable, copyRefs);
343 const Symbol *lastSym = nullptr;
344 for (const Symbol *sym : symbols) {
345 if (lastSym != nullptr) {
346 // Ignore any assembler added "ltmpNNN" symbol at start of section
347 // if there is another symbol at the start.
348 if ((lastSym->value != sym->value)
349 || lastSym->value != section.address
350 || !lastSym->name.startswith("ltmp")) {
351 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
352 lastSym->desc, atomScope(lastSym->scope), sym->value,
353 scatterable, copyRefs);
358 if (lastSym != nullptr) {
359 atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name,
360 lastSym->desc, atomScope(lastSym->scope),
361 section.address + section.content.size(),
362 scatterable, copyRefs);
365 // If object built without .subsections_via_symbols, add reference chain.
367 MachODefinedAtom *prevAtom = nullptr;
368 file.eachAtomInSection(section,
369 [&](MachODefinedAtom *atom, uint64_t offset)->void {
371 prevAtom->addReference(Reference::KindNamespace::all,
372 Reference::KindArch::all,
373 Reference::kindLayoutAfter, 0, atom, 0);
378 return llvm::Error::success();
381 llvm::Error processSection(DefinedAtom::ContentType atomType,
382 const Section §ion,
383 bool customSectionName,
384 const NormalizedFile &normalizedFile,
385 MachOFile &file, bool scatterable,
387 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
388 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
390 // Get info on how to atomize section.
391 unsigned int sizeMultiple;
392 DefinedAtom::Scope scope;
393 DefinedAtom::Merge merge;
394 AtomizeModel atomizeModel;
395 sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel);
397 // Validate section size.
398 if ((section.content.size() % sizeMultiple) != 0)
399 return llvm::make_error<GenericError>(Twine("Section ")
400 + section.segmentName
401 + "/" + section.sectionName
403 + Twine(section.content.size())
404 + ") which is not a multiple of "
405 + Twine(sizeMultiple));
407 if (atomizeModel == atomizeAtSymbols) {
408 // Break section up into atoms each with a fixed size.
409 return processSymboledSection(atomType, section, normalizedFile, file,
410 scatterable, copyRefs);
413 for (unsigned int offset = 0, e = section.content.size(); offset != e;) {
414 switch (atomizeModel) {
415 case atomizeFixedSize:
416 // Break section up into atoms each with a fixed size.
419 case atomizePointerSize:
420 // Break section up into atoms each the size of a pointer.
424 // Break section up into zero terminated c-strings.
426 for (unsigned int i = offset; i < e; ++i) {
427 if (section.content[i] == 0) {
428 size = i + 1 - offset;
434 // Break section up into zero terminated UTF16 strings.
436 for (unsigned int i = offset; i < e; i += 2) {
437 if ((section.content[i] == 0) && (section.content[i + 1] == 0)) {
438 size = i + 2 - offset;
444 // Break section up into dwarf unwind CFIs (FDE or CIE).
445 size = read32(§ion.content[offset], isBig) + 4;
446 if (offset+size > section.content.size()) {
447 return llvm::make_error<GenericError>(Twine("Section ")
448 + section.segmentName
449 + "/" + section.sectionName
450 + " is malformed. Size of CFI "
451 "starting at offset ("
453 + ") is past end of section.");
457 // Break section up into compact unwind entries.
458 size = is64 ? 32 : 20;
460 case atomizeCFString:
461 // Break section up into NS/CFString objects.
462 size = is64 ? 32 : 16;
464 case atomizeAtSymbols:
468 return llvm::make_error<GenericError>(Twine("Section ")
469 + section.segmentName
470 + "/" + section.sectionName
471 + " is malformed. The last atom "
472 "is not zero terminated.");
474 if (customSectionName) {
475 // Mach-O needs a segment and section name. Concatentate those two
476 // with a / separator (e.g. "seg/sect") to fit into the lld model
477 // of just a section name.
478 std::string segSectName = section.segmentName.str()
479 + "/" + section.sectionName.str();
480 file.addDefinedAtomInCustomSection(StringRef(), scope, atomType,
481 merge, false, false, offset,
482 size, segSectName, true, §ion);
484 file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size,
485 false, false, copyRefs, §ion);
490 return llvm::Error::success();
493 const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
495 for (const Section &s : normalizedFile.sections) {
496 uint64_t sAddr = s.address;
497 if ((sAddr <= address) && (address < sAddr+s.content.size())) {
504 const MachODefinedAtom *
505 findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file,
506 uint64_t addr, Reference::Addend &addend) {
507 const Section *sect = nullptr;
508 sect = findSectionCoveringAddress(normalizedFile, addr);
512 uint32_t offsetInTarget;
513 uint64_t offsetInSect = addr - sect->address;
515 file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
516 addend = offsetInTarget;
520 // Walks all relocations for a section in a normalized .o file and
521 // creates corresponding lld::Reference objects.
522 llvm::Error convertRelocs(const Section §ion,
523 const NormalizedFile &normalizedFile,
526 ArchHandler &handler) {
527 // Utility function for ArchHandler to find atom by its address.
528 auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr,
529 const lld::Atom **atom, Reference::Addend *addend)
531 if (sectIndex > normalizedFile.sections.size())
532 return llvm::make_error<GenericError>(Twine("out of range section "
533 "index (") + Twine(sectIndex) + ")");
534 const Section *sect = nullptr;
535 if (sectIndex == 0) {
536 sect = findSectionCoveringAddress(normalizedFile, addr);
538 return llvm::make_error<GenericError>(Twine("address (" + Twine(addr)
539 + ") is not in any section"));
541 sect = &normalizedFile.sections[sectIndex-1];
543 uint32_t offsetInTarget;
544 uint64_t offsetInSect = addr - sect->address;
545 *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget);
546 *addend = offsetInTarget;
547 return llvm::Error::success();
550 // Utility function for ArchHandler to find atom by its symbol index.
551 auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result)
553 // Find symbol from index.
554 const Symbol *sym = nullptr;
555 uint32_t numStabs = normalizedFile.stabsSymbols.size();
556 uint32_t numLocal = normalizedFile.localSymbols.size();
557 uint32_t numGlobal = normalizedFile.globalSymbols.size();
558 uint32_t numUndef = normalizedFile.undefinedSymbols.size();
559 assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?");
560 if (symbolIndex < numStabs+numLocal) {
561 sym = &normalizedFile.localSymbols[symbolIndex-numStabs];
562 } else if (symbolIndex < numStabs+numLocal+numGlobal) {
563 sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal];
564 } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) {
565 sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal-
568 return llvm::make_error<GenericError>(Twine("symbol index (")
569 + Twine(symbolIndex) + ") out of range");
572 // Find atom from symbol.
573 if ((sym->type & N_TYPE) == N_SECT) {
574 if (sym->sect > normalizedFile.sections.size())
575 return llvm::make_error<GenericError>(Twine("symbol section index (")
576 + Twine(sym->sect) + ") out of range ");
577 const Section &symSection = normalizedFile.sections[sym->sect-1];
578 uint64_t targetOffsetInSect = sym->value - symSection.address;
579 MachODefinedAtom *target = file.findAtomCoveringAddress(symSection,
583 return llvm::Error::success();
585 return llvm::make_error<GenericError>("no atom found for defined symbol");
586 } else if ((sym->type & N_TYPE) == N_UNDF) {
587 const lld::Atom *target = file.findUndefAtom(sym->name);
590 return llvm::Error::success();
592 return llvm::make_error<GenericError>("no undefined atom found for sym");
595 return llvm::make_error<GenericError>("no atom found for symbol");
599 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
600 // Use old-school iterator so that paired relocations can be grouped.
601 for (auto it=section.relocations.begin(), e=section.relocations.end();
603 const Relocation &reloc = *it;
604 // Find atom this relocation is in.
605 if (reloc.offset > section.content.size())
606 return llvm::make_error<GenericError>(
607 Twine("r_address (") + Twine(reloc.offset)
608 + ") is larger than section size ("
609 + Twine(section.content.size()) + ")");
610 uint32_t offsetInAtom;
611 MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section,
614 assert(inAtom && "r_address in range, should have found atom");
615 uint64_t fixupAddress = section.address + reloc.offset;
617 const lld::Atom *target = nullptr;
618 Reference::Addend addend = 0;
619 Reference::KindValue kind;
620 if (handler.isPairedReloc(reloc)) {
621 // Handle paired relocations together.
622 const Relocation &reloc2 = *++it;
623 auto relocErr = handler.getPairReferenceInfo(
624 reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable,
625 atomByAddr, atomBySymbol, &kind, &target, &addend);
627 return handleErrors(std::move(relocErr),
628 [&](std::unique_ptr<GenericError> GE) {
629 return llvm::make_error<GenericError>(
630 Twine("bad relocation (") + GE->getMessage()
632 + section.segmentName + "/" + section.sectionName
633 + " (r1_address=" + Twine::utohexstr(reloc.offset)
634 + ", r1_type=" + Twine(reloc.type)
635 + ", r1_extern=" + Twine(reloc.isExtern)
636 + ", r1_length=" + Twine((int)reloc.length)
637 + ", r1_pcrel=" + Twine(reloc.pcRel)
638 + (!reloc.scattered ? (Twine(", r1_symbolnum=")
639 + Twine(reloc.symbol))
640 : (Twine(", r1_scattered=1, r1_value=")
641 + Twine(reloc.value)))
643 + ", (r2_address=" + Twine::utohexstr(reloc2.offset)
644 + ", r2_type=" + Twine(reloc2.type)
645 + ", r2_extern=" + Twine(reloc2.isExtern)
646 + ", r2_length=" + Twine((int)reloc2.length)
647 + ", r2_pcrel=" + Twine(reloc2.pcRel)
648 + (!reloc2.scattered ? (Twine(", r2_symbolnum=")
649 + Twine(reloc2.symbol))
650 : (Twine(", r2_scattered=1, r2_value=")
651 + Twine(reloc2.value)))
657 // Use ArchHandler to convert relocation record into information
658 // needed to instantiate an lld::Reference object.
659 auto relocErr = handler.getReferenceInfo(
660 reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr,
661 atomBySymbol, &kind, &target, &addend);
663 return handleErrors(std::move(relocErr),
664 [&](std::unique_ptr<GenericError> GE) {
665 return llvm::make_error<GenericError>(
666 Twine("bad relocation (") + GE->getMessage()
668 + section.segmentName + "/" + section.sectionName
669 + " (r_address=" + Twine::utohexstr(reloc.offset)
670 + ", r_type=" + Twine(reloc.type)
671 + ", r_extern=" + Twine(reloc.isExtern)
672 + ", r_length=" + Twine((int)reloc.length)
673 + ", r_pcrel=" + Twine(reloc.pcRel)
674 + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol))
675 : (Twine(", r_scattered=1, r_value=")
676 + Twine(reloc.value)))
681 // Instantiate an lld::Reference object and add to its atom.
682 inAtom->addReference(Reference::KindNamespace::mach_o,
684 kind, offsetInAtom, target, addend);
687 return llvm::Error::success();
690 bool isDebugInfoSection(const Section §ion) {
691 if ((section.attributes & S_ATTR_DEBUG) == 0)
693 return section.segmentName.equals("__DWARF");
696 static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) {
697 std::string strName = name.str();
698 for (auto *atom : file.defined())
699 if (atom->name() == strName)
704 static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) {
705 char *strCopy = alloc.Allocate<char>(str.size() + 1);
706 memcpy(strCopy, str.data(), str.size());
707 strCopy[str.size()] = '\0';
711 llvm::Error parseStabs(MachOFile &file,
712 const NormalizedFile &normalizedFile,
715 if (normalizedFile.stabsSymbols.empty())
716 return llvm::Error::success();
718 // FIXME: Kill this off when we can move to sane yaml parsing.
719 std::unique_ptr<BumpPtrAllocator> allocator;
721 allocator = llvm::make_unique<BumpPtrAllocator>();
723 enum { start, inBeginEnd } state = start;
725 const Atom *currentAtom = nullptr;
726 uint64_t currentAtomAddress = 0;
727 StabsDebugInfo::StabsList stabsList;
728 for (const auto &stabSym : normalizedFile.stabsSymbols) {
729 Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc,
730 stabSym.value, stabSym.name);
733 switch (static_cast<StabType>(stabSym.type)) {
736 currentAtomAddress = stabSym.value;
737 Reference::Addend addend;
738 currentAtom = findAtomCoveringAddress(normalizedFile, file,
739 currentAtomAddress, addend);
741 return llvm::make_error<GenericError>(
742 "Non-zero addend for BNSYM '" + stabSym.name + "' in " +
745 stab.atom = currentAtom;
747 // FIXME: ld64 just issues a warning here - should we match that?
748 return llvm::make_error<GenericError>(
749 "can't find atom for stabs BNSYM at " +
750 Twine::utohexstr(stabSym.value) + " in " + file.path());
755 // Not associated with an atom, just copy.
757 stab.str = copyDebugString(stabSym.name, *allocator);
759 stab.str = stabSym.name;
762 auto colonIdx = stabSym.name.find(':');
763 if (colonIdx != StringRef::npos) {
764 StringRef name = stabSym.name.substr(0, colonIdx);
765 currentAtom = findDefinedAtomByName(file, "_" + name);
766 stab.atom = currentAtom;
768 stab.str = copyDebugString(stabSym.name, *allocator);
770 stab.str = stabSym.name;
772 currentAtom = findDefinedAtomByName(file, stabSym.name);
773 stab.atom = currentAtom;
775 stab.str = copyDebugString(stabSym.name, *allocator);
777 stab.str = stabSym.name;
779 if (stab.atom == nullptr)
780 return llvm::make_error<GenericError>(
781 "can't find atom for N_GSYM stabs" + stabSym.name +
782 " in " + file.path());
786 return llvm::make_error<GenericError>(
787 "old-style N_FUN stab '" + stabSym.name + "' unsupported");
789 return llvm::make_error<GenericError>(
790 "unrecognized stab symbol '" + stabSym.name + "'");
794 stab.atom = currentAtom;
795 switch (static_cast<StabType>(stabSym.type)) {
798 currentAtom = nullptr;
801 // Just copy the string.
803 stab.str = copyDebugString(stabSym.name, *allocator);
805 stab.str = stabSym.name;
808 return llvm::make_error<GenericError>(
809 "unrecognized stab symbol '" + stabSym.name + "'");
812 llvm::dbgs() << "Adding to stabsList: " << stab << "\n";
813 stabsList.push_back(stab);
816 file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList)));
818 // FIXME: Kill this off when we fix YAML memory ownership.
819 file.debugInfo()->setAllocator(std::move(allocator));
821 return llvm::Error::success();
824 static llvm::DataExtractor
825 dataExtractorFromSection(const NormalizedFile &normalizedFile,
827 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
828 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
829 StringRef SecData(reinterpret_cast<const char*>(S.content.data()),
831 return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4);
834 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
835 // inspection" code if possible.
836 static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData,
840 while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) {
842 abbrevData.getULEB128(&offset);
844 abbrevData.getU8(&offset);
846 while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset))
852 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
853 // inspection" code if possible.
854 static Expected<const char *>
855 getIndexedString(const NormalizedFile &normalizedFile,
856 llvm::dwarf::Form form, llvm::DataExtractor infoData,
857 uint32_t &infoOffset, const Section &stringsSection) {
858 if (form == llvm::dwarf::DW_FORM_string)
859 return infoData.getCStr(&infoOffset);
860 if (form != llvm::dwarf::DW_FORM_strp)
861 return llvm::make_error<GenericError>(
862 "string field encoded without DW_FORM_strp");
863 uint32_t stringOffset = infoData.getU32(&infoOffset);
864 llvm::DataExtractor stringsData =
865 dataExtractorFromSection(normalizedFile, stringsSection);
866 return stringsData.getCStr(&stringOffset);
869 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
870 // inspection" code if possible.
871 static llvm::Expected<TranslationUnitSource>
872 readCompUnit(const NormalizedFile &normalizedFile,
874 const Section &abbrev,
875 const Section &strings,
877 // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE
878 // inspection" code if possible.
880 llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32;
881 auto infoData = dataExtractorFromSection(normalizedFile, info);
882 uint32_t length = infoData.getU32(&offset);
883 if (length == 0xffffffff) {
884 Format = llvm::dwarf::DwarfFormat::DWARF64;
885 infoData.getU64(&offset);
887 else if (length > 0xffffff00)
888 return llvm::make_error<GenericError>("Malformed DWARF in " + path);
890 uint16_t version = infoData.getU16(&offset);
892 if (version < 2 || version > 4)
893 return llvm::make_error<GenericError>("Unsupported DWARF version in " +
896 infoData.getU32(&offset); // Abbrev offset (should be zero)
897 uint8_t addrSize = infoData.getU8(&offset);
899 uint32_t abbrCode = infoData.getULEB128(&offset);
900 auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev);
901 uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode);
902 uint64_t tag = abbrevData.getULEB128(&abbrevOffset);
903 if (tag != llvm::dwarf::DW_TAG_compile_unit)
904 return llvm::make_error<GenericError>("top level DIE is not a compile unit");
906 abbrevData.getU8(&abbrevOffset);
908 llvm::dwarf::Form form;
909 llvm::dwarf::FormParams formParams = {version, addrSize, Format};
910 TranslationUnitSource tu;
911 while ((name = abbrevData.getULEB128(&abbrevOffset)) |
912 (form = static_cast<llvm::dwarf::Form>(
913 abbrevData.getULEB128(&abbrevOffset))) &&
914 (name != 0 || form != 0)) {
916 case llvm::dwarf::DW_AT_name: {
917 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
921 return eName.takeError();
924 case llvm::dwarf::DW_AT_comp_dir: {
925 if (auto eName = getIndexedString(normalizedFile, form, infoData, offset,
929 return eName.takeError();
933 llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams);
939 llvm::Error parseDebugInfo(MachOFile &file,
940 const NormalizedFile &normalizedFile, bool copyRefs) {
942 // Find the interesting debug info sections.
943 const Section *debugInfo = nullptr;
944 const Section *debugAbbrev = nullptr;
945 const Section *debugStrings = nullptr;
947 for (auto &s : normalizedFile.sections) {
948 if (s.segmentName == "__DWARF") {
949 if (s.sectionName == "__debug_info")
951 else if (s.sectionName == "__debug_abbrev")
953 else if (s.sectionName == "__debug_str")
959 return parseStabs(file, normalizedFile, copyRefs);
961 if (debugInfo->content.size() == 0)
962 return llvm::Error::success();
964 if (debugInfo->content.size() < 12)
965 return llvm::make_error<GenericError>("Malformed __debug_info section in " +
966 file.path() + ": too small");
969 return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " +
972 if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev,
973 *debugStrings, file.path())) {
974 // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML
976 std::unique_ptr<BumpPtrAllocator> allocator;
978 allocator = llvm::make_unique<BumpPtrAllocator>();
979 tuOrErr->name = copyDebugString(tuOrErr->name, *allocator);
980 tuOrErr->path = copyDebugString(tuOrErr->path, *allocator);
982 file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr)));
984 file.debugInfo()->setAllocator(std::move(allocator));
986 return tuOrErr.takeError();
988 return llvm::Error::success();
991 static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) {
993 return read64(addr, isBig);
995 int32_t res = read32(addr, isBig);
999 /// --- Augmentation String Processing ---
1002 bool _augmentationDataPresent = false;
1003 bool _mayHaveEH = false;
1004 uint32_t _offsetOfLSDA = ~0U;
1005 uint32_t _offsetOfPersonality = ~0U;
1006 uint32_t _offsetOfFDEPointerEncoding = ~0U;
1007 uint32_t _augmentationDataLength = ~0U;
1010 typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap;
1012 static llvm::Error processAugmentationString(const uint8_t *augStr,
1016 if (augStr[0] == '\0') {
1018 return llvm::Error::success();
1021 if (augStr[0] != 'z')
1022 return llvm::make_error<GenericError>("expected 'z' at start of "
1023 "augmentation string");
1025 cieInfo._augmentationDataPresent = true;
1028 uint32_t offsetInAugmentationData = 0;
1029 while (augStr[idx] != '\0') {
1030 if (augStr[idx] == 'L') {
1031 cieInfo._offsetOfLSDA = offsetInAugmentationData;
1032 // This adds a single byte to the augmentation data.
1033 ++offsetInAugmentationData;
1037 if (augStr[idx] == 'P') {
1038 cieInfo._offsetOfPersonality = offsetInAugmentationData;
1039 // This adds a single byte to the augmentation data for the encoding,
1040 // then a number of bytes for the pointer data.
1041 // FIXME: We are assuming 4 is correct here for the pointer size as we
1042 // always currently use delta32ToGOT.
1043 offsetInAugmentationData += 5;
1047 if (augStr[idx] == 'R') {
1048 cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData;
1049 // This adds a single byte to the augmentation data.
1050 ++offsetInAugmentationData;
1054 if (augStr[idx] == 'e') {
1055 if (augStr[idx + 1] != 'h')
1056 return llvm::make_error<GenericError>("expected 'eh' in "
1057 "augmentation string");
1058 cieInfo._mayHaveEH = true;
1065 cieInfo._augmentationDataLength = offsetInAugmentationData;
1068 return llvm::Error::success();
1071 static llvm::Error processCIE(const NormalizedFile &normalizedFile,
1073 mach_o::ArchHandler &handler,
1074 const Section *ehFrameSection,
1075 MachODefinedAtom *atom,
1077 CIEInfoMap &cieInfos) {
1078 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1079 const uint8_t *frameData = atom->rawContent().data();
1083 uint32_t size = read32(frameData, isBig);
1084 uint64_t cieIDField = size == 0xffffffffU
1085 ? sizeof(uint32_t) + sizeof(uint64_t)
1087 uint64_t versionField = cieIDField + sizeof(uint32_t);
1088 uint64_t augmentationStringField = versionField + sizeof(uint8_t);
1090 unsigned augmentationStringLength = 0;
1091 if (auto err = processAugmentationString(frameData + augmentationStringField,
1092 cieInfo, augmentationStringLength))
1095 if (cieInfo._offsetOfPersonality != ~0U) {
1096 // If we have augmentation data for the personality function, then we may
1097 // need to implicitly generate its relocation.
1099 // Parse the EH Data field which is pointer sized.
1100 uint64_t EHDataField = augmentationStringField + augmentationStringLength;
1101 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1102 unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0);
1104 // Parse Code Align Factor which is a ULEB128.
1105 uint64_t CodeAlignField = EHDataField + EHDataFieldSize;
1106 unsigned lengthFieldSize = 0;
1107 llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize);
1109 // Parse Data Align Factor which is a SLEB128.
1110 uint64_t DataAlignField = CodeAlignField + lengthFieldSize;
1111 llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize);
1113 // Parse Return Address Register which is a byte.
1114 uint64_t ReturnAddressField = DataAlignField + lengthFieldSize;
1116 // Parse the augmentation length which is a ULEB128.
1117 uint64_t AugmentationLengthField = ReturnAddressField + 1;
1118 uint64_t AugmentationLength =
1119 llvm::decodeULEB128(frameData + AugmentationLengthField,
1122 if (AugmentationLength != cieInfo._augmentationDataLength)
1123 return llvm::make_error<GenericError>("CIE augmentation data length "
1126 // Get the start address of the augmentation data.
1127 uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize;
1129 // Parse the personality function from the augmentation data.
1130 uint64_t PersonalityField =
1131 AugmentationDataField + cieInfo._offsetOfPersonality;
1133 // Parse the personality encoding.
1134 // FIXME: Verify that this is a 32-bit pcrel offset.
1135 uint64_t PersonalityFunctionField = PersonalityField + 1;
1137 if (atom->begin() != atom->end()) {
1138 // If we have an explicit relocation, then make sure it matches this
1139 // offset as this is where we'd expect it to be applied to.
1140 DefinedAtom::reference_iterator CurrentRef = atom->begin();
1141 if (CurrentRef->offsetInAtom() != PersonalityFunctionField)
1142 return llvm::make_error<GenericError>("CIE personality reloc at "
1145 if (++CurrentRef != atom->end())
1146 return llvm::make_error<GenericError>("CIE contains too many relocs");
1148 // Implicitly generate the personality function reloc. It's assumed to
1149 // be a delta32 offset to a GOT entry.
1150 // FIXME: Parse the encoding and check this.
1151 int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig);
1152 uint64_t funcAddress = ehFrameSection->address + offset +
1153 PersonalityFunctionField;
1154 funcAddress += funcDelta;
1156 const MachODefinedAtom *func = nullptr;
1157 Reference::Addend addend;
1158 func = findAtomCoveringAddress(normalizedFile, file, funcAddress,
1160 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1161 handler.unwindRefToPersonalityFunctionKind(),
1162 PersonalityFunctionField, func, addend);
1164 } else if (atom->begin() != atom->end()) {
1165 // Otherwise, we expect there to be no relocations in this atom as the only
1166 // relocation would have been to the personality function.
1167 return llvm::make_error<GenericError>("unexpected relocation in CIE");
1171 cieInfos[atom] = std::move(cieInfo);
1173 return llvm::Error::success();
1176 static llvm::Error processFDE(const NormalizedFile &normalizedFile,
1178 mach_o::ArchHandler &handler,
1179 const Section *ehFrameSection,
1180 MachODefinedAtom *atom,
1182 const CIEInfoMap &cieInfos) {
1184 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1185 const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch);
1187 // Compiler wasn't lazy and actually told us what it meant.
1188 // Unfortunately, the compiler may not have generated references for all of
1189 // [cie, func, lsda] and so we still need to parse the FDE and add references
1190 // for any the compiler didn't generate.
1191 if (atom->begin() != atom->end())
1192 atom->sortReferences();
1194 DefinedAtom::reference_iterator CurrentRef = atom->begin();
1196 // This helper returns the reference (if one exists) at the offset we are
1197 // currently processing. It automatically increments the ref iterator if we
1198 // do return a ref, and throws an error if we pass over a ref without
1200 auto currentRefGetter = [&CurrentRef,
1201 &atom](uint64_t Offset)->const Reference* {
1202 // If there are no more refs found, then we are done.
1203 if (CurrentRef == atom->end())
1206 const Reference *Ref = *CurrentRef;
1208 // If we haven't reached the offset for this reference, then return that
1209 // we don't yet have a reference to process.
1210 if (Offset < Ref->offsetInAtom())
1213 // If the offset is equal, then we want to process this ref.
1214 if (Offset == Ref->offsetInAtom()) {
1219 // The current ref is at an offset which is earlier than the current
1220 // offset, then we failed to consume it when we should have. In this case
1222 llvm::report_fatal_error("Skipped reference when processing FDE");
1225 // Helper to either get the reference at this current location, and verify
1226 // that it is of the expected type, or add a reference of that type.
1227 // Returns the reference target.
1228 auto verifyOrAddReference = [&](uint64_t targetAddress,
1229 Reference::KindValue refKind,
1230 uint64_t refAddress,
1231 bool allowsAddend)->const Atom* {
1232 if (auto *ref = currentRefGetter(refAddress)) {
1233 // The compiler already emitted a relocation for the CIE ref. This should
1234 // have been converted to the correct type of reference in
1235 // get[Pair]ReferenceInfo().
1236 assert(ref->kindValue() == refKind &&
1237 "Incorrect EHFrame reference kind");
1238 return ref->target();
1240 Reference::Addend addend;
1241 auto *target = findAtomCoveringAddress(normalizedFile, file,
1242 targetAddress, addend);
1243 atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(),
1244 refKind, refAddress, target, addend);
1247 assert(!addend && "EHFrame reference cannot have addend");
1251 const uint8_t *startFrameData = atom->rawContent().data();
1252 const uint8_t *frameData = startFrameData;
1254 uint32_t size = read32(frameData, isBig);
1255 uint64_t cieFieldInFDE = size == 0xffffffffU
1256 ? sizeof(uint32_t) + sizeof(uint64_t)
1259 // Linker needs to fixup a reference from the FDE to its parent CIE (a
1260 // 32-bit byte offset backwards in the __eh_frame section).
1261 uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig);
1262 uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE;
1263 cieAddress -= cieDelta;
1265 auto *cieRefTarget = verifyOrAddReference(cieAddress,
1266 handler.unwindRefToCIEKind(),
1267 cieFieldInFDE, false);
1268 const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget);
1269 assert(cie && cie->contentType() == DefinedAtom::typeCFI &&
1270 "FDE's CIE field does not point at the start of a CIE.");
1272 const CIEInfo &cieInfo = cieInfos.find(cie)->second;
1274 // Linker needs to fixup reference from the FDE to the function it's
1275 // describing. FIXME: there are actually different ways to do this, and the
1276 // particular method used is specified in the CIE's augmentation fields
1278 uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t);
1280 int64_t functionFromFDE = readSPtr(is64, isBig,
1281 frameData + rangeFieldInFDE);
1282 uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE;
1283 rangeStart += functionFromFDE;
1285 verifyOrAddReference(rangeStart,
1286 handler.unwindRefToFunctionKind(),
1287 rangeFieldInFDE, true);
1289 // Handle the augmentation data if there is any.
1290 if (cieInfo._augmentationDataPresent) {
1291 // First process the augmentation data length field.
1292 uint64_t augmentationDataLengthFieldInFDE =
1293 rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t));
1294 unsigned lengthFieldSize = 0;
1295 uint64_t augmentationDataLength =
1296 llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE,
1299 if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) {
1301 // Look at the augmentation data field.
1302 uint64_t augmentationDataFieldInFDE =
1303 augmentationDataLengthFieldInFDE + lengthFieldSize;
1305 int64_t lsdaFromFDE = readSPtr(is64, isBig,
1306 frameData + augmentationDataFieldInFDE);
1307 uint64_t lsdaStart =
1308 ehFrameSection->address + offset + augmentationDataFieldInFDE +
1311 verifyOrAddReference(lsdaStart,
1312 handler.unwindRefToFunctionKind(),
1313 augmentationDataFieldInFDE, true);
1317 return llvm::Error::success();
1320 llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile,
1322 mach_o::ArchHandler &handler) {
1324 const Section *ehFrameSection = nullptr;
1325 for (auto §ion : normalizedFile.sections)
1326 if (section.segmentName == "__TEXT" &&
1327 section.sectionName == "__eh_frame") {
1328 ehFrameSection = §ion;
1332 // No __eh_frame so nothing to do.
1333 if (!ehFrameSection)
1334 return llvm::Error::success();
1336 llvm::Error ehFrameErr = llvm::Error::success();
1337 CIEInfoMap cieInfos;
1339 file.eachAtomInSection(*ehFrameSection,
1340 [&](MachODefinedAtom *atom, uint64_t offset) -> void {
1341 assert(atom->contentType() == DefinedAtom::typeCFI);
1343 // Bail out if we've encountered an error.
1347 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1348 if (ArchHandler::isDwarfCIE(isBig, atom))
1349 ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection,
1350 atom, offset, cieInfos);
1352 ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection,
1353 atom, offset, cieInfos);
1359 llvm::Error parseObjCImageInfo(const Section §,
1360 const NormalizedFile &normalizedFile,
1363 // struct objc_image_info {
1364 // uint32_t version; // initially 0
1368 ArrayRef<uint8_t> content = sect.content;
1369 if (content.size() != 8)
1370 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1372 " in file " + file.path() +
1373 " should be 8 bytes in size");
1375 const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch);
1376 uint32_t version = read32(content.data(), isBig);
1378 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1380 " in file " + file.path() +
1381 " should have version=0");
1383 uint32_t flags = read32(content.data() + 4, isBig);
1384 if (flags & (MachOLinkingContext::objc_supports_gc |
1385 MachOLinkingContext::objc_gc_only))
1386 return llvm::make_error<GenericError>(sect.segmentName + "/" +
1388 " in file " + file.path() +
1389 " uses GC. This is not supported");
1391 if (flags & MachOLinkingContext::objc_retainReleaseForSimulator)
1392 file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator);
1394 file.setObjcConstraint(MachOLinkingContext::objc_retainRelease);
1396 file.setSwiftVersion((flags >> 8) & 0xFF);
1398 return llvm::Error::success();
1401 /// Converts normalized mach-o file into an lld::File and lld::Atoms.
1402 llvm::Expected<std::unique_ptr<lld::File>>
1403 objectToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1405 std::unique_ptr<MachOFile> file(new MachOFile(path));
1406 if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs))
1407 return std::move(ec);
1408 return std::unique_ptr<File>(std::move(file));
1411 llvm::Expected<std::unique_ptr<lld::File>>
1412 dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1414 // Instantiate SharedLibraryFile object.
1415 std::unique_ptr<MachODylibFile> file(new MachODylibFile(path));
1416 if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs))
1417 return std::move(ec);
1418 return std::unique_ptr<File>(std::move(file));
1421 } // anonymous namespace
1423 namespace normalized {
1425 static bool isObjCImageInfo(const Section §) {
1426 return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") ||
1427 (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo");
1431 normalizedObjectToAtoms(MachOFile *file,
1432 const NormalizedFile &normalizedFile,
1434 LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: "
1435 << file->path() << "\n");
1436 bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0);
1438 // Create atoms from each section.
1439 for (auto § : normalizedFile.sections) {
1441 // If this is a debug-info section parse it specially.
1442 if (isDebugInfoSection(sect))
1445 // If the file contains an objc_image_info struct, then we should parse the
1446 // ObjC flags and Swift version.
1447 if (isObjCImageInfo(sect)) {
1448 if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file))
1450 // We then skip adding atoms for this section as we use the ObjCPass to
1451 // re-emit this data after it has been aggregated for all files.
1455 bool customSectionName;
1456 DefinedAtom::ContentType atomType = atomTypeFromSection(sect,
1458 if (auto ec = processSection(atomType, sect, customSectionName,
1459 normalizedFile, *file, scatterable, copyRefs))
1462 // Create atoms from undefined symbols.
1463 for (auto &sym : normalizedFile.undefinedSymbols) {
1464 // Undefinded symbols with n_value != 0 are actually tentative definitions.
1465 if (sym.value == Hex64(0)) {
1466 file->addUndefinedAtom(sym.name, copyRefs);
1468 file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value,
1469 DefinedAtom::Alignment(1 << (sym.desc >> 8)),
1474 // Convert mach-o relocations to References
1475 std::unique_ptr<mach_o::ArchHandler> handler
1476 = ArchHandler::create(normalizedFile.arch);
1477 for (auto § : normalizedFile.sections) {
1478 if (isDebugInfoSection(sect))
1480 if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable,
1485 // Add additional arch-specific References
1486 file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void {
1487 handler->addAdditionalReferences(*atom);
1490 // Each __eh_frame section needs references to both __text (the function we're
1491 // providing unwind info for) and itself (FDE -> CIE). These aren't
1492 // represented in the relocations on some architectures, so we have to add
1493 // them back in manually there.
1494 if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler))
1497 // Process mach-o data-in-code regions array. That information is encoded in
1498 // atoms as References at each transition point.
1499 unsigned nextIndex = 0;
1500 for (const DataInCode &entry : normalizedFile.dataInCode) {
1502 const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
1504 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address ("
1505 + Twine(entry.offset)
1506 + ") is not in any section"));
1508 uint64_t offsetInSect = entry.offset - s->address;
1509 uint32_t offsetInAtom;
1510 MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
1512 if (offsetInAtom + entry.length > atom->size()) {
1513 return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry "
1515 + Twine(entry.offset)
1517 + Twine(entry.length)
1518 + ") crosses atom boundary."));
1520 // Add reference that marks start of data-in-code.
1521 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1522 handler->dataInCodeTransitionStart(*atom),
1523 offsetInAtom, atom, entry.kind);
1525 // Peek at next entry, if it starts where this one ends, skip ending ref.
1526 if (nextIndex < normalizedFile.dataInCode.size()) {
1527 const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
1528 if (nextEntry.offset == (entry.offset + entry.length))
1532 // If data goes to end of function, skip ending ref.
1533 if ((offsetInAtom + entry.length) == atom->size())
1536 // Add reference that marks end of data-in-code.
1537 atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(),
1538 handler->dataInCodeTransitionEnd(*atom),
1539 offsetInAtom+entry.length, atom, 0);
1542 // Cache some attributes on the file for use later.
1543 file->setFlags(normalizedFile.flags);
1544 file->setArch(normalizedFile.arch);
1545 file->setOS(normalizedFile.os);
1546 file->setMinVersion(normalizedFile.minOSverson);
1547 file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind);
1549 // Sort references in each atom to their canonical order.
1550 for (const DefinedAtom* defAtom : file->defined()) {
1551 reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();
1554 if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs))
1557 return llvm::Error::success();
1561 normalizedDylibToAtoms(MachODylibFile *file,
1562 const NormalizedFile &normalizedFile,
1564 file->setInstallName(normalizedFile.installName);
1565 file->setCompatVersion(normalizedFile.compatVersion);
1566 file->setCurrentVersion(normalizedFile.currentVersion);
1568 // Tell MachODylibFile object about all symbols it exports.
1569 if (!normalizedFile.exportInfo.empty()) {
1570 // If exports trie exists, use it instead of traditional symbol table.
1571 for (const Export &exp : normalizedFile.exportInfo) {
1572 bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION);
1573 // StringRefs from export iterator are ephemeral, so force copy.
1574 file->addExportedSymbol(exp.name, weakDef, true);
1577 for (auto &sym : normalizedFile.globalSymbols) {
1578 assert((sym.scope & N_EXT) && "only expect external symbols here");
1579 bool weakDef = (sym.desc & N_WEAK_DEF);
1580 file->addExportedSymbol(sym.name, weakDef, copyRefs);
1583 // Tell MachODylibFile object about all dylibs it re-exports.
1584 for (const DependentDylib &dep : normalizedFile.dependentDylibs) {
1585 if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB)
1586 file->addReExportedDylib(dep.path);
1588 return llvm::Error::success();
1591 void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType,
1592 StringRef &segmentName,
1593 StringRef §ionName,
1594 SectionType §ionType,
1595 SectionAttr §ionAttrs,
1596 bool &relocsToDefinedCanBeImplicit) {
1598 for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ;
1599 p->atomType != DefinedAtom::typeUnknown; ++p) {
1600 if (p->atomType != atomType)
1602 // Wild carded entries are ignored for reverse lookups.
1603 if (p->segmentName.empty() || p->sectionName.empty())
1605 segmentName = p->segmentName;
1606 sectionName = p->sectionName;
1607 sectionType = p->sectionType;
1609 relocsToDefinedCanBeImplicit = false;
1610 if (atomType == DefinedAtom::typeCode)
1611 sectionAttrs = S_ATTR_PURE_INSTRUCTIONS;
1612 if (atomType == DefinedAtom::typeCFI)
1613 relocsToDefinedCanBeImplicit = true;
1616 llvm_unreachable("content type not yet supported");
1619 llvm::Expected<std::unique_ptr<lld::File>>
1620 normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path,
1622 switch (normalizedFile.fileType) {
1625 return dylibToAtoms(normalizedFile, path, copyRefs);
1627 return objectToAtoms(normalizedFile, path, copyRefs);
1629 llvm_unreachable("unhandled MachO file type!");
1633 } // namespace normalized
1634 } // namespace mach_o