1 //===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \file For mach-o object files, this implementation converts normalized
12 /// mach-o in memory to mach-o binary on disk.
24 #include "MachONormalizedFile.h"
25 #include "MachONormalizedFileBinaryUtils.h"
26 #include "lld/Core/Error.h"
27 #include "lld/Core/LLVM.h"
28 #include "llvm/ADT/ilist.h"
29 #include "llvm/ADT/ilist_node.h"
30 #include "llvm/ADT/SmallString.h"
31 #include "llvm/ADT/SmallVector.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/Support/Casting.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/Errc.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/FileOutputBuffer.h"
38 #include "llvm/Support/Format.h"
39 #include "llvm/Support/Host.h"
40 #include "llvm/Support/MachO.h"
41 #include "llvm/Support/MemoryBuffer.h"
42 #include "llvm/Support/raw_ostream.h"
46 #include <system_error>
48 using namespace llvm::MachO;
52 namespace normalized {
54 struct TrieNode; // Forward declaration.
56 struct TrieEdge : public llvm::ilist_node<TrieEdge> {
57 TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {}
60 struct TrieNode *_child;
63 } // namespace normalized
69 using lld::mach_o::normalized::TrieEdge;
71 struct ilist_traits<TrieEdge>
72 : public ilist_default_traits<TrieEdge> {
74 mutable ilist_half_node<TrieEdge> Sentinel;
76 TrieEdge *createSentinel() const {
77 return static_cast<TrieEdge*>(&Sentinel);
79 void destroySentinel(TrieEdge *) const {}
81 TrieEdge *provideInitialHead() const { return createSentinel(); }
82 TrieEdge *ensureHead(TrieEdge*) const { return createSentinel(); }
83 static void noteHead(TrieEdge*, TrieEdge*) {}
84 void deleteNode(TrieEdge *N) {}
87 void createNode(const TrieEdge &);
94 namespace normalized {
97 typedef llvm::ilist<TrieEdge> TrieEdgeList;
100 : _cummulativeString(s), _address(0), _flags(0), _other(0),
101 _trieOffset(0), _hasExportInfo(false) {}
102 ~TrieNode() = default;
104 void addSymbol(const Export &entry, BumpPtrAllocator &allocator,
105 std::vector<TrieNode *> &allNodes);
106 bool updateOffset(uint32_t &offset);
107 void appendToByteBuffer(ByteBuffer &out);
110 StringRef _cummulativeString;
111 TrieEdgeList _children;
115 StringRef _importedName;
116 uint32_t _trieOffset;
120 /// Utility class for writing a mach-o binary file given an in-memory
122 class MachOFileLayout {
124 /// All layout computation is done in the constructor.
125 MachOFileLayout(const NormalizedFile &file);
127 /// Returns the final file size as computed in the constructor.
130 // Returns size of the mach_header and load commands.
131 size_t headerAndLoadCommandsSize() const;
133 /// Writes the normalized file as a binary mach-o file to the specified
134 /// path. This does not have a stream interface because the generated
135 /// file may need the 'x' bit set.
136 llvm::Error writeBinary(StringRef path);
139 uint32_t loadCommandsSize(uint32_t &count);
140 void buildFileOffsets();
141 void writeMachHeader();
142 llvm::Error writeLoadCommands();
143 void writeSectionContent();
144 void writeRelocations();
145 void writeSymbolTable();
146 void writeRebaseInfo();
147 void writeBindingInfo();
148 void writeLazyBindingInfo();
149 void writeExportInfo();
150 void writeFunctionStartsInfo();
151 void writeDataInCodeInfo();
152 void writeLinkEditContent();
153 void buildLinkEditInfo();
154 void buildRebaseInfo();
155 void buildBindInfo();
156 void buildLazyBindInfo();
157 void buildExportTrie();
158 void computeFunctionStartsSize();
159 void computeDataInCodeSize();
160 void computeSymbolTableSizes();
161 void buildSectionRelocations();
162 void appendSymbols(const std::vector<Symbol> &symbols,
163 uint32_t &symOffset, uint32_t &strOffset);
164 uint32_t indirectSymbolIndex(const Section §, uint32_t &index);
165 uint32_t indirectSymbolElementSize(const Section §);
167 // For use as template parameter to load command methods.
168 struct MachO64Trait {
169 typedef llvm::MachO::segment_command_64 command;
170 typedef llvm::MachO::section_64 section;
171 enum { LC = llvm::MachO::LC_SEGMENT_64 };
174 // For use as template parameter to load command methods.
175 struct MachO32Trait {
176 typedef llvm::MachO::segment_command command;
177 typedef llvm::MachO::section section;
178 enum { LC = llvm::MachO::LC_SEGMENT };
181 template <typename T>
182 llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc);
183 template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc);
185 uint32_t pointerAlign(uint32_t value);
186 static StringRef dyldPath();
188 struct SegExtraInfo {
191 std::vector<const Section*> sections;
193 typedef std::map<const Segment*, SegExtraInfo> SegMap;
194 struct SectionExtraInfo {
197 typedef std::map<const Section*, SectionExtraInfo> SectionMap;
199 const NormalizedFile &_file;
204 const bool _bigEndianArch;
206 uint32_t _startOfLoadCommands;
207 uint32_t _countOfLoadCommands;
208 uint32_t _endOfLoadCommands;
209 uint32_t _startOfRelocations;
210 uint32_t _startOfFunctionStarts;
211 uint32_t _startOfDataInCode;
212 uint32_t _startOfSymbols;
213 uint32_t _startOfIndirectSymbols;
214 uint32_t _startOfSymbolStrings;
215 uint32_t _endOfSymbolStrings;
216 uint32_t _symbolTableLocalsStartIndex;
217 uint32_t _symbolTableGlobalsStartIndex;
218 uint32_t _symbolTableUndefinesStartIndex;
219 uint32_t _symbolStringPoolSize;
220 uint32_t _symbolTableSize;
221 uint32_t _functionStartsSize;
222 uint32_t _dataInCodeSize;
223 uint32_t _indirectSymbolTableCount;
224 // Used in object file creation only
225 uint32_t _startOfSectionsContent;
226 uint32_t _endOfSectionsContent;
227 // Used in final linked image only
228 uint32_t _startOfLinkEdit;
229 uint32_t _startOfRebaseInfo;
230 uint32_t _endOfRebaseInfo;
231 uint32_t _startOfBindingInfo;
232 uint32_t _endOfBindingInfo;
233 uint32_t _startOfLazyBindingInfo;
234 uint32_t _endOfLazyBindingInfo;
235 uint32_t _startOfExportTrie;
236 uint32_t _endOfExportTrie;
237 uint32_t _endOfLinkEdit;
238 uint64_t _addressOfLinkEdit;
240 SectionMap _sectInfo;
241 ByteBuffer _rebaseInfo;
242 ByteBuffer _bindingInfo;
243 ByteBuffer _lazyBindingInfo;
244 ByteBuffer _weakBindingInfo;
245 ByteBuffer _exportTrie;
248 size_t headerAndLoadCommandsSize(const NormalizedFile &file) {
249 MachOFileLayout layout(file);
250 return layout.headerAndLoadCommandsSize();
253 StringRef MachOFileLayout::dyldPath() {
254 return "/usr/lib/dyld";
257 uint32_t MachOFileLayout::pointerAlign(uint32_t value) {
258 return llvm::alignTo(value, _is64 ? 8 : 4);
262 size_t MachOFileLayout::headerAndLoadCommandsSize() const {
263 return _endOfLoadCommands;
266 MachOFileLayout::MachOFileLayout(const NormalizedFile &file)
268 _is64(MachOLinkingContext::is64Bit(file.arch)),
269 _swap(!MachOLinkingContext::isHostEndian(file.arch)),
270 _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)),
271 _seg1addr(INT64_MAX) {
272 _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header);
273 const size_t segCommandBaseSize =
274 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
275 const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section));
276 if (file.fileType == llvm::MachO::MH_OBJECT) {
277 // object files have just one segment load command containing all sections
278 _endOfLoadCommands = _startOfLoadCommands
280 + file.sections.size() * sectsSize
281 + sizeof(symtab_command);
282 _countOfLoadCommands = 2;
283 if (file.hasMinVersionLoadCommand) {
284 _endOfLoadCommands += sizeof(version_min_command);
285 _countOfLoadCommands++;
287 if (!_file.functionStarts.empty()) {
288 _endOfLoadCommands += sizeof(linkedit_data_command);
289 _countOfLoadCommands++;
291 if (_file.generateDataInCodeLoadCommand) {
292 _endOfLoadCommands += sizeof(linkedit_data_command);
293 _countOfLoadCommands++;
295 // Assign file offsets to each section.
296 _startOfSectionsContent = _endOfLoadCommands;
297 unsigned relocCount = 0;
298 uint64_t offset = _startOfSectionsContent;
299 for (const Section § : file.sections) {
300 if (isZeroFillSection(sect.type))
301 _sectInfo[§].fileOffset = 0;
303 offset = llvm::alignTo(offset, sect.alignment);
304 _sectInfo[§].fileOffset = offset;
305 offset += sect.content.size();
307 relocCount += sect.relocations.size();
309 _endOfSectionsContent = offset;
311 computeSymbolTableSizes();
312 computeFunctionStartsSize();
313 computeDataInCodeSize();
315 // Align start of relocations.
316 _startOfRelocations = pointerAlign(_endOfSectionsContent);
317 _startOfFunctionStarts = _startOfRelocations + relocCount * 8;
318 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
319 _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
320 // Add Indirect symbol table.
321 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
322 // Align start of symbol table and symbol strings.
323 _startOfSymbolStrings = _startOfIndirectSymbols
324 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
325 _endOfSymbolStrings = _startOfSymbolStrings
326 + pointerAlign(_symbolStringPoolSize);
327 _endOfLinkEdit = _endOfSymbolStrings;
328 DEBUG_WITH_TYPE("MachOFileLayout",
329 llvm::dbgs() << "MachOFileLayout()\n"
330 << " startOfLoadCommands=" << _startOfLoadCommands << "\n"
331 << " countOfLoadCommands=" << _countOfLoadCommands << "\n"
332 << " endOfLoadCommands=" << _endOfLoadCommands << "\n"
333 << " startOfRelocations=" << _startOfRelocations << "\n"
334 << " startOfSymbols=" << _startOfSymbols << "\n"
335 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
336 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
337 << " startOfSectionsContent=" << _startOfSectionsContent << "\n"
338 << " endOfSectionsContent=" << _endOfSectionsContent << "\n");
340 // Final linked images have one load command per segment.
341 _endOfLoadCommands = _startOfLoadCommands
342 + loadCommandsSize(_countOfLoadCommands);
344 // Assign section file offsets.
348 // LINKEDIT of final linked images has in order:
349 // rebase info, binding info, lazy binding info, weak binding info,
350 // data-in-code, symbol table, indirect symbol table, symbol table strings.
351 _startOfRebaseInfo = _startOfLinkEdit;
352 _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size();
353 _startOfBindingInfo = _endOfRebaseInfo;
354 _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size();
355 _startOfLazyBindingInfo = _endOfBindingInfo;
356 _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size();
357 _startOfExportTrie = _endOfLazyBindingInfo;
358 _endOfExportTrie = _startOfExportTrie + _exportTrie.size();
359 _startOfFunctionStarts = _endOfExportTrie;
360 _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize;
361 _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
362 _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
363 _startOfSymbolStrings = _startOfIndirectSymbols
364 + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
365 _endOfSymbolStrings = _startOfSymbolStrings
366 + pointerAlign(_symbolStringPoolSize);
367 _endOfLinkEdit = _endOfSymbolStrings;
368 DEBUG_WITH_TYPE("MachOFileLayout",
369 llvm::dbgs() << "MachOFileLayout()\n"
370 << " startOfLoadCommands=" << _startOfLoadCommands << "\n"
371 << " countOfLoadCommands=" << _countOfLoadCommands << "\n"
372 << " endOfLoadCommands=" << _endOfLoadCommands << "\n"
373 << " startOfLinkEdit=" << _startOfLinkEdit << "\n"
374 << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n"
375 << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n"
376 << " startOfBindingInfo=" << _startOfBindingInfo << "\n"
377 << " endOfBindingInfo=" << _endOfBindingInfo << "\n"
378 << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n"
379 << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n"
380 << " startOfExportTrie=" << _startOfExportTrie << "\n"
381 << " endOfExportTrie=" << _endOfExportTrie << "\n"
382 << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n"
383 << " startOfDataInCode=" << _startOfDataInCode << "\n"
384 << " startOfSymbols=" << _startOfSymbols << "\n"
385 << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
386 << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
387 << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n");
391 uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) {
395 const size_t segCommandSize =
396 (_is64 ? sizeof(segment_command_64) : sizeof(segment_command));
397 const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section));
399 // Add LC_SEGMENT for each segment.
400 size += _file.segments.size() * segCommandSize;
401 count += _file.segments.size();
402 // Add section record for each section.
403 size += _file.sections.size() * sectionSize;
405 // If creating a dylib, add LC_ID_DYLIB.
406 if (_file.fileType == llvm::MachO::MH_DYLIB) {
407 size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1);
412 size += sizeof(dyld_info_command);
416 size += sizeof(symtab_command);
420 if (_file.fileType != llvm::MachO::MH_PRELOAD) {
421 size += sizeof(dysymtab_command);
425 // If main executable add LC_LOAD_DYLINKER
426 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
427 size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1);
431 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS,
432 // LC_VERSION_MIN_TVOS
433 if (_file.hasMinVersionLoadCommand) {
434 size += sizeof(version_min_command);
438 // Add LC_SOURCE_VERSION
439 size += sizeof(source_version_command);
442 // If main executable add LC_MAIN
443 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
444 size += sizeof(entry_point_command);
448 // Add LC_LOAD_DYLIB for each dependent dylib.
449 for (const DependentDylib &dep : _file.dependentDylibs) {
450 size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
455 for (const StringRef &path : _file.rpaths) {
456 size += pointerAlign(sizeof(rpath_command) + path.size() + 1);
460 // Add LC_FUNCTION_STARTS if needed
461 if (!_file.functionStarts.empty()) {
462 size += sizeof(linkedit_data_command);
466 // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries.
467 // FIXME: Zero length entries is only to match ld64. Should we change this?
468 if (_file.generateDataInCodeLoadCommand) {
469 size += sizeof(linkedit_data_command);
476 static bool overlaps(const Segment &s1, const Segment &s2) {
477 if (s2.address >= s1.address+s1.size)
479 if (s1.address >= s2.address+s2.size)
484 static bool overlaps(const Section &s1, const Section &s2) {
485 if (s2.address >= s1.address+s1.content.size())
487 if (s1.address >= s2.address+s2.content.size())
492 void MachOFileLayout::buildFileOffsets() {
493 // Verify no segments overlap
494 for (const Segment &sg1 : _file.segments) {
495 for (const Segment &sg2 : _file.segments) {
498 if (overlaps(sg1,sg2)) {
499 _ec = make_error_code(llvm::errc::executable_format_error);
505 // Verify no sections overlap
506 for (const Section &s1 : _file.sections) {
507 for (const Section &s2 : _file.sections) {
510 if (overlaps(s1,s2)) {
511 _ec = make_error_code(llvm::errc::executable_format_error);
517 // Build side table of extra info about segments and sections.
520 for (const Segment &sg : _file.segments) {
525 // Assign sections to segments.
526 for (const Section &s : _file.sections) {
528 bool foundSegment = false;
529 for (const Segment &sg : _file.segments) {
530 if (sg.name.equals(s.segmentName)) {
531 if ((s.address >= sg.address)
532 && (s.address+s.content.size() <= sg.address+sg.size)) {
533 _segInfo[&sg].sections.push_back(&s);
540 _ec = make_error_code(llvm::errc::executable_format_error);
545 // Assign file offsets.
546 uint32_t fileOffset = 0;
547 DEBUG_WITH_TYPE("MachOFileLayout",
548 llvm::dbgs() << "buildFileOffsets()\n");
549 for (const Segment &sg : _file.segments) {
550 _segInfo[&sg].fileOffset = fileOffset;
551 if ((_seg1addr == INT64_MAX) && sg.init_access)
552 _seg1addr = sg.address;
553 DEBUG_WITH_TYPE("MachOFileLayout",
554 llvm::dbgs() << " segment=" << sg.name
555 << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n");
557 uint32_t segFileSize = 0;
558 // A segment that is not zero-fill must use a least one page of disk space.
560 segFileSize = _file.pageSize;
561 for (const Section *s : _segInfo[&sg].sections) {
562 uint32_t sectOffset = s->address - sg.address;
563 uint32_t sectFileSize =
564 isZeroFillSection(s->type) ? 0 : s->content.size();
565 segFileSize = std::max(segFileSize, sectOffset + sectFileSize);
567 _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset;
568 DEBUG_WITH_TYPE("MachOFileLayout",
569 llvm::dbgs() << " section=" << s->sectionName
570 << ", fileOffset=" << fileOffset << "\n");
573 // round up all segments to page aligned, except __LINKEDIT
574 if (!sg.name.equals("__LINKEDIT")) {
575 _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize);
576 fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize);
578 _addressOfLinkEdit = sg.address + sg.size;
580 _startOfLinkEdit = fileOffset;
583 size_t MachOFileLayout::size() const {
584 return _endOfSymbolStrings;
587 void MachOFileLayout::writeMachHeader() {
588 auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch);
589 // dynamic x86 executables on newer OS version should also set the
590 // CPU_SUBTYPE_LIB64 mask in the CPU subtype.
591 // FIXME: Check that this is a dynamic executable, not a static one.
592 if (_file.fileType == llvm::MachO::MH_EXECUTE &&
593 cpusubtype == CPU_SUBTYPE_X86_64_ALL &&
594 _file.os == MachOLinkingContext::OS::macOSX) {
596 bool failed = MachOLinkingContext::parsePackedVersion("10.5", version);
597 if (!failed && _file.minOSverson >= version)
598 cpusubtype |= CPU_SUBTYPE_LIB64;
601 mach_header *mh = reinterpret_cast<mach_header*>(_buffer);
602 mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC;
603 mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch);
604 mh->cpusubtype = cpusubtype;
605 mh->filetype = _file.fileType;
606 mh->ncmds = _countOfLoadCommands;
607 mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands;
608 mh->flags = _file.flags;
613 uint32_t MachOFileLayout::indirectSymbolIndex(const Section §,
615 if (sect.indirectSymbols.empty())
617 uint32_t result = index;
618 index += sect.indirectSymbols.size();
622 uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) {
623 if (sect.indirectSymbols.empty())
625 if (sect.type != S_SYMBOL_STUBS)
627 return sect.content.size() / sect.indirectSymbols.size();
630 template <typename T>
631 llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) {
632 typename T::command* seg = reinterpret_cast<typename T::command*>(lc);
634 seg->cmdsize = sizeof(typename T::command)
635 + _file.sections.size() * sizeof(typename T::section);
636 uint8_t *next = lc + seg->cmdsize;
637 memset(seg->segname, 0, 16);
639 seg->vmsize = _file.sections.back().address
640 + _file.sections.back().content.size();
641 seg->fileoff = _endOfLoadCommands;
642 seg->filesize = seg->vmsize;
643 seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
644 seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE;
645 seg->nsects = _file.sections.size();
649 typename T::section *sout = reinterpret_cast<typename T::section*>
650 (lc+sizeof(typename T::command));
651 uint32_t relOffset = _startOfRelocations;
652 uint32_t indirectSymRunningIndex = 0;
653 for (const Section &sin : _file.sections) {
654 setString16(sin.sectionName, sout->sectname);
655 setString16(sin.segmentName, sout->segname);
656 sout->addr = sin.address;
657 sout->size = sin.content.size();
658 sout->offset = _sectInfo[&sin].fileOffset;
659 sout->align = llvm::Log2_32(sin.alignment);
660 sout->reloff = sin.relocations.empty() ? 0 : relOffset;
661 sout->nreloc = sin.relocations.size();
662 sout->flags = sin.type | sin.attributes;
663 sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex);
664 sout->reserved2 = indirectSymbolElementSize(sin);
665 relOffset += sin.relocations.size() * sizeof(any_relocation_info);
671 return llvm::Error();
674 template <typename T>
675 llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) {
676 uint32_t indirectSymRunningIndex = 0;
677 for (const Segment &seg : _file.segments) {
678 // Link edit has no sections and a custom range of address, so handle it
680 SegExtraInfo &segInfo = _segInfo[&seg];
681 if (seg.name.equals("__LINKEDIT")) {
682 size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit;
683 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
685 cmd->cmdsize = sizeof(typename T::command);
686 uint8_t *next = lc + cmd->cmdsize;
687 setString16("__LINKEDIT", cmd->segname);
688 cmd->vmaddr = _addressOfLinkEdit;
689 cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize);
690 cmd->fileoff = _startOfLinkEdit;
691 cmd->filesize = linkeditSize;
692 cmd->initprot = seg.init_access;
693 cmd->maxprot = seg.max_access;
701 // Write segment command with trailing sections.
702 typename T::command* cmd = reinterpret_cast<typename T::command*>(lc);
704 cmd->cmdsize = sizeof(typename T::command)
705 + segInfo.sections.size() * sizeof(typename T::section);
706 uint8_t *next = lc + cmd->cmdsize;
707 setString16(seg.name, cmd->segname);
708 cmd->vmaddr = seg.address;
709 cmd->vmsize = seg.size;
710 cmd->fileoff = segInfo.fileOffset;
711 cmd->filesize = segInfo.fileSize;
712 cmd->initprot = seg.init_access;
713 cmd->maxprot = seg.max_access;
714 cmd->nsects = segInfo.sections.size();
718 typename T::section *sect = reinterpret_cast<typename T::section*>
719 (lc+sizeof(typename T::command));
720 for (const Section *section : segInfo.sections) {
721 setString16(section->sectionName, sect->sectname);
722 setString16(section->segmentName, sect->segname);
723 sect->addr = section->address;
724 sect->size = section->content.size();
725 if (isZeroFillSection(section->type))
728 sect->offset = section->address - seg.address + segInfo.fileOffset;
729 sect->align = llvm::Log2_32(section->alignment);
732 sect->flags = section->type | section->attributes;
733 sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex);
734 sect->reserved2 = indirectSymbolElementSize(*section);
739 lc = reinterpret_cast<uint8_t*>(next);
741 return llvm::Error();
744 static void writeVersionMinLoadCommand(const NormalizedFile &_file,
747 if (!_file.hasMinVersionLoadCommand)
749 version_min_command *vm = reinterpret_cast<version_min_command*>(lc);
751 case MachOLinkingContext::OS::unknown:
752 vm->cmd = _file.minOSVersionKind;
753 vm->cmdsize = sizeof(version_min_command);
754 vm->version = _file.minOSverson;
757 case MachOLinkingContext::OS::macOSX:
758 vm->cmd = LC_VERSION_MIN_MACOSX;
759 vm->cmdsize = sizeof(version_min_command);
760 vm->version = _file.minOSverson;
761 vm->sdk = _file.sdkVersion;
763 case MachOLinkingContext::OS::iOS:
764 case MachOLinkingContext::OS::iOS_simulator:
765 vm->cmd = LC_VERSION_MIN_IPHONEOS;
766 vm->cmdsize = sizeof(version_min_command);
767 vm->version = _file.minOSverson;
768 vm->sdk = _file.sdkVersion;
773 lc += sizeof(version_min_command);
776 llvm::Error MachOFileLayout::writeLoadCommands() {
777 uint8_t *lc = &_buffer[_startOfLoadCommands];
778 if (_file.fileType == llvm::MachO::MH_OBJECT) {
779 // Object files have one unnamed segment which holds all sections.
781 if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc))
784 if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc))
787 // Add LC_SYMTAB with symbol table info
788 symtab_command* st = reinterpret_cast<symtab_command*>(lc);
790 st->cmdsize = sizeof(symtab_command);
791 st->symoff = _startOfSymbols;
792 st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size()
793 + _file.undefinedSymbols.size();
794 st->stroff = _startOfSymbolStrings;
795 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
798 lc += sizeof(symtab_command);
800 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS,
801 // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS
802 writeVersionMinLoadCommand(_file, _swap, lc);
804 // Add LC_FUNCTION_STARTS if needed.
805 if (_functionStartsSize != 0) {
806 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
807 dl->cmd = LC_FUNCTION_STARTS;
808 dl->cmdsize = sizeof(linkedit_data_command);
809 dl->dataoff = _startOfFunctionStarts;
810 dl->datasize = _functionStartsSize;
813 lc += sizeof(linkedit_data_command);
816 // Add LC_DATA_IN_CODE if requested.
817 if (_file.generateDataInCodeLoadCommand) {
818 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
819 dl->cmd = LC_DATA_IN_CODE;
820 dl->cmdsize = sizeof(linkedit_data_command);
821 dl->dataoff = _startOfDataInCode;
822 dl->datasize = _dataInCodeSize;
825 lc += sizeof(linkedit_data_command);
828 // Final linked images have sections under segments.
830 if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc))
833 if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc))
837 // Add LC_ID_DYLIB command for dynamic libraries.
838 if (_file.fileType == llvm::MachO::MH_DYLIB) {
839 dylib_command *dc = reinterpret_cast<dylib_command*>(lc);
840 StringRef path = _file.installName;
841 uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1);
842 dc->cmd = LC_ID_DYLIB;
844 dc->dylib.name = sizeof(dylib_command); // offset
845 // needs to be some constant value different than the one in LC_LOAD_DYLIB
846 dc->dylib.timestamp = 1;
847 dc->dylib.current_version = _file.currentVersion;
848 dc->dylib.compatibility_version = _file.compatVersion;
851 memcpy(lc + sizeof(dylib_command), path.begin(), path.size());
852 lc[sizeof(dylib_command) + path.size()] = '\0';
856 // Add LC_DYLD_INFO_ONLY.
857 dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc);
858 di->cmd = LC_DYLD_INFO_ONLY;
859 di->cmdsize = sizeof(dyld_info_command);
860 di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0;
861 di->rebase_size = _rebaseInfo.size();
862 di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0;
863 di->bind_size = _bindingInfo.size();
864 di->weak_bind_off = 0;
865 di->weak_bind_size = 0;
866 di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0;
867 di->lazy_bind_size = _lazyBindingInfo.size();
868 di->export_off = _exportTrie.size() ? _startOfExportTrie : 0;
869 di->export_size = _exportTrie.size();
872 lc += sizeof(dyld_info_command);
874 // Add LC_SYMTAB with symbol table info.
875 symtab_command* st = reinterpret_cast<symtab_command*>(lc);
877 st->cmdsize = sizeof(symtab_command);
878 st->symoff = _startOfSymbols;
879 st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size()
880 + _file.undefinedSymbols.size();
881 st->stroff = _startOfSymbolStrings;
882 st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
885 lc += sizeof(symtab_command);
888 if (_file.fileType != llvm::MachO::MH_PRELOAD) {
889 dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc);
890 dst->cmd = LC_DYSYMTAB;
891 dst->cmdsize = sizeof(dysymtab_command);
892 dst->ilocalsym = _symbolTableLocalsStartIndex;
893 dst->nlocalsym = _file.localSymbols.size();
894 dst->iextdefsym = _symbolTableGlobalsStartIndex;
895 dst->nextdefsym = _file.globalSymbols.size();
896 dst->iundefsym = _symbolTableUndefinesStartIndex;
897 dst->nundefsym = _file.undefinedSymbols.size();
902 dst->extrefsymoff = 0;
903 dst->nextrefsyms = 0;
904 dst->indirectsymoff = _startOfIndirectSymbols;
905 dst->nindirectsyms = _indirectSymbolTableCount;
912 lc += sizeof(dysymtab_command);
915 // If main executable, add LC_LOAD_DYLINKER
916 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
917 // Build LC_LOAD_DYLINKER load command.
918 uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1);
919 dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc);
920 dl->cmd = LC_LOAD_DYLINKER;
922 dl->name = sizeof(dylinker_command); // offset
925 memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size());
926 lc[sizeof(dylinker_command)+dyldPath().size()] = '\0';
930 // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS,
931 // LC_VERSION_MIN_TVOS
932 writeVersionMinLoadCommand(_file, _swap, lc);
934 // Add LC_SOURCE_VERSION
936 // Note, using a temporary here to appease UB as we may not be aligned
937 // enough for a struct containing a uint64_t when emitting a 32-bit binary
938 source_version_command sv;
939 sv.cmd = LC_SOURCE_VERSION;
940 sv.cmdsize = sizeof(source_version_command);
941 sv.version = _file.sourceVersion;
944 memcpy(lc, &sv, sizeof(source_version_command));
945 lc += sizeof(source_version_command);
948 // If main executable, add LC_MAIN.
949 if (_file.fileType == llvm::MachO::MH_EXECUTE) {
950 // Build LC_MAIN load command.
951 // Note, using a temporary here to appease UB as we may not be aligned
952 // enough for a struct containing a uint64_t when emitting a 32-bit binary
953 entry_point_command ep;
955 ep.cmdsize = sizeof(entry_point_command);
956 ep.entryoff = _file.entryAddress - _seg1addr;
957 ep.stacksize = _file.stackSize;
960 memcpy(lc, &ep, sizeof(entry_point_command));
961 lc += sizeof(entry_point_command);
964 // Add LC_LOAD_DYLIB commands
965 for (const DependentDylib &dep : _file.dependentDylibs) {
966 dylib_command* dc = reinterpret_cast<dylib_command*>(lc);
967 uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1);
970 dc->dylib.name = sizeof(dylib_command); // offset
971 // needs to be some constant value different than the one in LC_ID_DYLIB
972 dc->dylib.timestamp = 2;
973 dc->dylib.current_version = dep.currentVersion;
974 dc->dylib.compatibility_version = dep.compatVersion;
977 memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size());
978 lc[sizeof(dylib_command)+dep.path.size()] = '\0';
983 for (const StringRef &path : _file.rpaths) {
984 rpath_command *rpc = reinterpret_cast<rpath_command *>(lc);
985 uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1);
988 rpc->path = sizeof(rpath_command); // offset
991 memcpy(lc+sizeof(rpath_command), path.begin(), path.size());
992 lc[sizeof(rpath_command)+path.size()] = '\0';
996 // Add LC_FUNCTION_STARTS if needed.
997 if (_functionStartsSize != 0) {
998 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
999 dl->cmd = LC_FUNCTION_STARTS;
1000 dl->cmdsize = sizeof(linkedit_data_command);
1001 dl->dataoff = _startOfFunctionStarts;
1002 dl->datasize = _functionStartsSize;
1005 lc += sizeof(linkedit_data_command);
1008 // Add LC_DATA_IN_CODE if requested.
1009 if (_file.generateDataInCodeLoadCommand) {
1010 linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
1011 dl->cmd = LC_DATA_IN_CODE;
1012 dl->cmdsize = sizeof(linkedit_data_command);
1013 dl->dataoff = _startOfDataInCode;
1014 dl->datasize = _dataInCodeSize;
1017 lc += sizeof(linkedit_data_command);
1020 return llvm::Error();
1023 void MachOFileLayout::writeSectionContent() {
1024 for (const Section &s : _file.sections) {
1025 // Copy all section content to output buffer.
1026 if (isZeroFillSection(s.type))
1028 if (s.content.empty())
1030 uint32_t offset = _sectInfo[&s].fileOffset;
1031 uint8_t *p = &_buffer[offset];
1032 memcpy(p, &s.content[0], s.content.size());
1033 p += s.content.size();
1037 void MachOFileLayout::writeRelocations() {
1038 uint32_t relOffset = _startOfRelocations;
1039 for (Section sect : _file.sections) {
1040 for (Relocation r : sect.relocations) {
1041 any_relocation_info* rb = reinterpret_cast<any_relocation_info*>(
1042 &_buffer[relOffset]);
1043 *rb = packRelocation(r, _swap, _bigEndianArch);
1044 relOffset += sizeof(any_relocation_info);
1049 void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols,
1050 uint32_t &symOffset, uint32_t &strOffset) {
1051 for (const Symbol &sym : symbols) {
1053 nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]);
1054 nb->n_strx = strOffset - _startOfSymbolStrings;
1055 nb->n_type = sym.type | sym.scope;
1056 nb->n_sect = sym.sect;
1057 nb->n_desc = sym.desc;
1058 nb->n_value = sym.value;
1061 symOffset += sizeof(nlist_64);
1063 nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]);
1064 nb->n_strx = strOffset - _startOfSymbolStrings;
1065 nb->n_type = sym.type | sym.scope;
1066 nb->n_sect = sym.sect;
1067 nb->n_desc = sym.desc;
1068 nb->n_value = sym.value;
1071 symOffset += sizeof(nlist);
1073 memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size());
1074 strOffset += sym.name.size();
1075 _buffer[strOffset++] ='\0'; // Strings in table have nul terminator.
1079 void MachOFileLayout::writeFunctionStartsInfo() {
1080 if (!_functionStartsSize)
1082 memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(),
1083 _functionStartsSize);
1086 void MachOFileLayout::writeDataInCodeInfo() {
1087 uint32_t offset = _startOfDataInCode;
1088 for (const DataInCode &entry : _file.dataInCode) {
1089 data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>(
1091 dst->offset = entry.offset;
1092 dst->length = entry.length;
1093 dst->kind = entry.kind;
1096 offset += sizeof(data_in_code_entry);
1100 void MachOFileLayout::writeSymbolTable() {
1101 // Write symbol table and symbol strings in parallel.
1102 uint32_t symOffset = _startOfSymbols;
1103 uint32_t strOffset = _startOfSymbolStrings;
1104 _buffer[strOffset++] = '\0'; // Reserve n_strx offset of zero to mean no name.
1105 appendSymbols(_file.localSymbols, symOffset, strOffset);
1106 appendSymbols(_file.globalSymbols, symOffset, strOffset);
1107 appendSymbols(_file.undefinedSymbols, symOffset, strOffset);
1108 // Write indirect symbol table array.
1109 uint32_t *indirects = reinterpret_cast<uint32_t*>
1110 (&_buffer[_startOfIndirectSymbols]);
1111 if (_file.fileType == llvm::MachO::MH_OBJECT) {
1112 // Object files have sections in same order as input normalized file.
1113 for (const Section §ion : _file.sections) {
1114 for (uint32_t index : section.indirectSymbols) {
1116 *indirects++ = llvm::sys::getSwappedBytes(index);
1118 *indirects++ = index;
1122 // Final linked images must sort sections from normalized file.
1123 for (const Segment &seg : _file.segments) {
1124 SegExtraInfo &segInfo = _segInfo[&seg];
1125 for (const Section *section : segInfo.sections) {
1126 for (uint32_t index : section->indirectSymbols) {
1128 *indirects++ = llvm::sys::getSwappedBytes(index);
1130 *indirects++ = index;
1137 void MachOFileLayout::writeRebaseInfo() {
1138 memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size());
1141 void MachOFileLayout::writeBindingInfo() {
1142 memcpy(&_buffer[_startOfBindingInfo],
1143 _bindingInfo.bytes(), _bindingInfo.size());
1146 void MachOFileLayout::writeLazyBindingInfo() {
1147 memcpy(&_buffer[_startOfLazyBindingInfo],
1148 _lazyBindingInfo.bytes(), _lazyBindingInfo.size());
1151 void MachOFileLayout::writeExportInfo() {
1152 memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size());
1155 void MachOFileLayout::buildLinkEditInfo() {
1158 buildLazyBindInfo();
1160 computeSymbolTableSizes();
1161 computeFunctionStartsSize();
1162 computeDataInCodeSize();
1165 void MachOFileLayout::buildSectionRelocations() {
1169 void MachOFileLayout::buildRebaseInfo() {
1170 // TODO: compress rebasing info.
1171 for (const RebaseLocation& entry : _file.rebasingInfo) {
1172 _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind);
1173 _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1175 _rebaseInfo.append_uleb128(entry.segOffset);
1176 _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1);
1178 _rebaseInfo.append_byte(REBASE_OPCODE_DONE);
1179 _rebaseInfo.align(_is64 ? 8 : 4);
1182 void MachOFileLayout::buildBindInfo() {
1183 // TODO: compress bind info.
1184 uint64_t lastAddend = 0;
1185 for (const BindLocation& entry : _file.bindingInfo) {
1186 _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind);
1187 _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1189 _bindingInfo.append_uleb128(entry.segOffset);
1190 if (entry.ordinal > 0)
1191 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
1192 (entry.ordinal & 0xF));
1194 _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
1195 (entry.ordinal & 0xF));
1196 _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM);
1197 _bindingInfo.append_string(entry.symbolName);
1198 if (entry.addend != lastAddend) {
1199 _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB);
1200 _bindingInfo.append_sleb128(entry.addend);
1201 lastAddend = entry.addend;
1203 _bindingInfo.append_byte(BIND_OPCODE_DO_BIND);
1205 _bindingInfo.append_byte(BIND_OPCODE_DONE);
1206 _bindingInfo.align(_is64 ? 8 : 4);
1209 void MachOFileLayout::buildLazyBindInfo() {
1210 for (const BindLocation& entry : _file.lazyBindingInfo) {
1211 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind);
1212 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
1214 _lazyBindingInfo.append_uleb128Fixed(entry.segOffset, 5);
1215 if (entry.ordinal > 0)
1216 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM |
1217 (entry.ordinal & 0xF));
1219 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM |
1220 (entry.ordinal & 0xF));
1221 _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM);
1222 _lazyBindingInfo.append_string(entry.symbolName);
1223 _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND);
1224 _lazyBindingInfo.append_byte(BIND_OPCODE_DONE);
1226 _lazyBindingInfo.append_byte(BIND_OPCODE_DONE);
1227 _lazyBindingInfo.align(_is64 ? 8 : 4);
1230 void TrieNode::addSymbol(const Export& entry,
1231 BumpPtrAllocator &allocator,
1232 std::vector<TrieNode*> &allNodes) {
1233 StringRef partialStr = entry.name.drop_front(_cummulativeString.size());
1234 for (TrieEdge &edge : _children) {
1235 StringRef edgeStr = edge._subString;
1236 if (partialStr.startswith(edgeStr)) {
1237 // Already have matching edge, go down that path.
1238 edge._child->addSymbol(entry, allocator, allNodes);
1241 // See if string has commmon prefix with existing edge.
1242 for (int n=edgeStr.size()-1; n > 0; --n) {
1243 if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) {
1244 // Splice in new node: was A -> C, now A -> B -> C
1245 StringRef bNodeStr = edge._child->_cummulativeString;
1246 bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator);
1247 auto *bNode = new (allocator) TrieNode(bNodeStr);
1248 allNodes.push_back(bNode);
1249 TrieNode* cNode = edge._child;
1250 StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator);
1251 StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator);
1252 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
1253 << "splice in TrieNode('" << bNodeStr
1254 << "') between edge '"
1255 << abEdgeStr << "' and edge='"
1256 << bcEdgeStr<< "'\n");
1257 TrieEdge& abEdge = edge;
1258 abEdge._subString = abEdgeStr;
1259 abEdge._child = bNode;
1260 auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode);
1261 bNode->_children.insert(bNode->_children.end(), bcEdge);
1262 bNode->addSymbol(entry, allocator, allNodes);
1267 if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1268 assert(entry.otherOffset != 0);
1270 if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
1271 assert(entry.otherOffset != 0);
1273 // No commonality with any existing child, make a new edge.
1274 auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator));
1275 auto *newEdge = new (allocator) TrieEdge(partialStr, newNode);
1276 _children.insert(_children.end(), newEdge);
1277 DEBUG_WITH_TYPE("trie-builder", llvm::dbgs()
1278 << "new TrieNode('" << entry.name << "') with edge '"
1279 << partialStr << "' from node='"
1280 << _cummulativeString << "'\n");
1281 newNode->_address = entry.offset;
1282 newNode->_flags = entry.flags | entry.kind;
1283 newNode->_other = entry.otherOffset;
1284 if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty())
1285 newNode->_importedName = entry.otherName.copy(allocator);
1286 newNode->_hasExportInfo = true;
1287 allNodes.push_back(newNode);
1290 bool TrieNode::updateOffset(uint32_t& offset) {
1291 uint32_t nodeSize = 1; // Length when no export info
1292 if (_hasExportInfo) {
1293 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1294 nodeSize = llvm::getULEB128Size(_flags);
1295 nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal.
1296 nodeSize += _importedName.size();
1297 ++nodeSize; // Trailing zero in imported name.
1299 nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address);
1300 if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
1301 nodeSize += llvm::getULEB128Size(_other);
1303 // Overall node size so far is uleb128 of export info + actual export info.
1304 nodeSize += llvm::getULEB128Size(nodeSize);
1306 // Compute size of all child edges.
1307 ++nodeSize; // Byte for number of chidren.
1308 for (TrieEdge &edge : _children) {
1309 nodeSize += edge._subString.size() + 1 // String length.
1310 + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len.
1312 // On input, 'offset' is new prefered location for this node.
1313 bool result = (_trieOffset != offset);
1314 // Store new location in node object for use by parents.
1315 _trieOffset = offset;
1316 // Update offset for next iteration.
1318 // Return true if _trieOffset was changed.
1322 void TrieNode::appendToByteBuffer(ByteBuffer &out) {
1323 if (_hasExportInfo) {
1324 if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1325 if (!_importedName.empty()) {
1326 // nodes with re-export info: size, flags, ordinal, import-name
1327 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1328 + llvm::getULEB128Size(_other)
1329 + _importedName.size() + 1;
1330 assert(nodeSize < 256);
1331 out.append_byte(nodeSize);
1332 out.append_uleb128(_flags);
1333 out.append_uleb128(_other);
1334 out.append_string(_importedName);
1336 // nodes without re-export info: size, flags, ordinal, empty-string
1337 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1338 + llvm::getULEB128Size(_other) + 1;
1339 assert(nodeSize < 256);
1340 out.append_byte(nodeSize);
1341 out.append_uleb128(_flags);
1342 out.append_uleb128(_other);
1345 } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
1346 // Nodes with export info: size, flags, address, other
1347 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1348 + llvm::getULEB128Size(_address)
1349 + llvm::getULEB128Size(_other);
1350 assert(nodeSize < 256);
1351 out.append_byte(nodeSize);
1352 out.append_uleb128(_flags);
1353 out.append_uleb128(_address);
1354 out.append_uleb128(_other);
1356 // Nodes with export info: size, flags, address
1357 uint32_t nodeSize = llvm::getULEB128Size(_flags)
1358 + llvm::getULEB128Size(_address);
1359 assert(nodeSize < 256);
1360 out.append_byte(nodeSize);
1361 out.append_uleb128(_flags);
1362 out.append_uleb128(_address);
1365 // Node with no export info.
1366 uint32_t nodeSize = 0;
1367 out.append_byte(nodeSize);
1369 // Add number of children.
1370 assert(_children.size() < 256);
1371 out.append_byte(_children.size());
1372 // Append each child edge substring and node offset.
1373 for (TrieEdge &edge : _children) {
1374 out.append_string(edge._subString);
1375 out.append_uleb128(edge._child->_trieOffset);
1379 void MachOFileLayout::buildExportTrie() {
1380 if (_file.exportInfo.empty())
1383 // For all temporary strings and objects used building trie.
1384 BumpPtrAllocator allocator;
1386 // Build trie of all exported symbols.
1387 auto *rootNode = new (allocator) TrieNode(StringRef());
1388 std::vector<TrieNode*> allNodes;
1389 allNodes.reserve(_file.exportInfo.size()*2);
1390 allNodes.push_back(rootNode);
1391 for (const Export& entry : _file.exportInfo) {
1392 rootNode->addSymbol(entry, allocator, allNodes);
1395 // Assign each node in the vector an offset in the trie stream, iterating
1396 // until all uleb128 sizes have stabilized.
1399 uint32_t offset = 0;
1401 for (TrieNode* node : allNodes) {
1402 if (node->updateOffset(offset))
1407 // Serialize trie to ByteBuffer.
1408 for (TrieNode* node : allNodes) {
1409 node->appendToByteBuffer(_exportTrie);
1411 _exportTrie.align(_is64 ? 8 : 4);
1414 void MachOFileLayout::computeSymbolTableSizes() {
1415 // MachO symbol tables have three ranges: locals, globals, and undefines
1416 const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist));
1417 _symbolTableSize = nlistSize * (_file.localSymbols.size()
1418 + _file.globalSymbols.size()
1419 + _file.undefinedSymbols.size());
1420 _symbolStringPoolSize = 1; // Always reserve 1-byte for the empty string.
1421 for (const Symbol &sym : _file.localSymbols) {
1422 _symbolStringPoolSize += (sym.name.size()+1);
1424 for (const Symbol &sym : _file.globalSymbols) {
1425 _symbolStringPoolSize += (sym.name.size()+1);
1427 for (const Symbol &sym : _file.undefinedSymbols) {
1428 _symbolStringPoolSize += (sym.name.size()+1);
1430 _symbolTableLocalsStartIndex = 0;
1431 _symbolTableGlobalsStartIndex = _file.localSymbols.size();
1432 _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex
1433 + _file.globalSymbols.size();
1435 _indirectSymbolTableCount = 0;
1436 for (const Section § : _file.sections) {
1437 _indirectSymbolTableCount += sect.indirectSymbols.size();
1441 void MachOFileLayout::computeFunctionStartsSize() {
1442 _functionStartsSize = _file.functionStarts.size();
1445 void MachOFileLayout::computeDataInCodeSize() {
1446 _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry);
1449 void MachOFileLayout::writeLinkEditContent() {
1450 if (_file.fileType == llvm::MachO::MH_OBJECT) {
1452 writeFunctionStartsInfo();
1453 writeDataInCodeInfo();
1458 writeLazyBindingInfo();
1459 // TODO: add weak binding info
1461 writeFunctionStartsInfo();
1462 writeDataInCodeInfo();
1467 llvm::Error MachOFileLayout::writeBinary(StringRef path) {
1468 // Check for pending error from constructor.
1470 return llvm::errorCodeToError(_ec);
1471 // Create FileOutputBuffer with calculated size.
1473 if (_file.fileType != llvm::MachO::MH_OBJECT)
1474 flags = llvm::FileOutputBuffer::F_executable;
1475 ErrorOr<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr =
1476 llvm::FileOutputBuffer::create(path, size(), flags);
1477 if (std::error_code ec = fobOrErr.getError())
1478 return llvm::errorCodeToError(ec);
1479 std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr;
1481 _buffer = fob->getBufferStart();
1483 if (auto ec = writeLoadCommands())
1485 writeSectionContent();
1486 writeLinkEditContent();
1489 return llvm::Error();
1492 /// Takes in-memory normalized view and writes a mach-o object file.
1493 llvm::Error writeBinary(const NormalizedFile &file, StringRef path) {
1494 MachOFileLayout layout(file);
1495 return layout.writeBinary(path);
1498 } // namespace normalized
1499 } // namespace mach_o