1 //===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file A pass to convert MachO's __compact_unwind sections into the final
11 /// __unwind_info format used during runtime. See
12 /// mach-o/compact_unwind_encoding.h for more details on the formats involved.
14 //===----------------------------------------------------------------------===//
16 #include "ArchHandler.h"
18 #include "MachONormalizedFileBinaryUtils.h"
19 #include "MachOPasses.h"
20 #include "lld/Core/DefinedAtom.h"
21 #include "lld/Core/File.h"
22 #include "lld/Core/LLVM.h"
23 #include "lld/Core/Reference.h"
24 #include "lld/Core/Simple.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/Format.h"
29 #define DEBUG_TYPE "macho-compact-unwind"
35 struct CompactUnwindEntry {
36 const Atom *rangeStart;
37 const Atom *personalityFunction;
38 const Atom *lsdaLocation;
43 // There are 3 types of compact unwind entry, distinguished by the encoding
44 // value: 0 indicates a function with no unwind info;
45 // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to
46 // __eh_frame, and that the ehFrame entry will be valid; any other value is a
47 // real compact unwind entry -- personalityFunction will be set and
48 // lsdaLocation may be.
51 CompactUnwindEntry(const DefinedAtom *function)
52 : rangeStart(function), personalityFunction(nullptr),
53 lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()),
57 : rangeStart(nullptr), personalityFunction(nullptr),
58 lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {}
61 struct UnwindInfoPage {
62 ArrayRef<CompactUnwindEntry> entries;
66 class UnwindInfoAtom : public SimpleDefinedAtom {
68 UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig,
69 std::vector<const Atom *> &personalities,
70 std::vector<uint32_t> &commonEncodings,
71 std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs)
72 : SimpleDefinedAtom(file), _archHandler(archHandler),
73 _commonEncodingsOffset(7 * sizeof(uint32_t)),
74 _personalityArrayOffset(_commonEncodingsOffset +
75 commonEncodings.size() * sizeof(uint32_t)),
76 _topLevelIndexOffset(_personalityArrayOffset +
77 personalities.size() * sizeof(uint32_t)),
78 _lsdaIndexOffset(_topLevelIndexOffset +
79 3 * (pages.size() + 1) * sizeof(uint32_t)),
80 _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)),
83 addHeader(commonEncodings.size(), personalities.size(), pages.size());
84 addCommonEncodings(commonEncodings);
85 addPersonalityFunctions(personalities);
86 addTopLevelIndexes(pages);
87 addLSDAIndexes(pages, numLSDAs);
88 addSecondLevelPages(pages);
91 ~UnwindInfoAtom() override = default;
93 ContentType contentType() const override {
94 return DefinedAtom::typeProcessedUnwindInfo;
97 Alignment alignment() const override { return 4; }
99 uint64_t size() const override { return _contents.size(); }
101 ContentPermissions permissions() const override {
102 return DefinedAtom::permR__;
105 ArrayRef<uint8_t> rawContent() const override { return _contents; }
107 void addHeader(uint32_t numCommon, uint32_t numPersonalities,
109 using normalized::write32;
111 uint32_t headerSize = 7 * sizeof(uint32_t);
112 _contents.resize(headerSize);
114 uint8_t *headerEntries = _contents.data();
116 write32(headerEntries, 1, _isBig);
117 // commonEncodingsArraySectionOffset
118 write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig);
119 // commonEncodingsArrayCount
120 write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig);
121 // personalityArraySectionOffset
122 write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset,
124 // personalityArrayCount
125 write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig);
126 // indexSectionOffset
127 write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig);
129 write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig);
132 /// Add the list of common encodings to the section; this is simply an array
133 /// of uint32_t compact values. Size has already been specified in the header.
134 void addCommonEncodings(std::vector<uint32_t> &commonEncodings) {
135 using normalized::write32;
137 _contents.resize(_commonEncodingsOffset +
138 commonEncodings.size() * sizeof(uint32_t));
139 uint8_t *commonEncodingsArea =
140 reinterpret_cast<uint8_t *>(_contents.data() + _commonEncodingsOffset);
142 for (uint32_t encoding : commonEncodings) {
143 write32(commonEncodingsArea, encoding, _isBig);
144 commonEncodingsArea += sizeof(uint32_t);
148 void addPersonalityFunctions(std::vector<const Atom *> personalities) {
149 _contents.resize(_personalityArrayOffset +
150 personalities.size() * sizeof(uint32_t));
152 for (unsigned i = 0; i < personalities.size(); ++i)
153 addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t),
157 void addTopLevelIndexes(std::vector<UnwindInfoPage> &pages) {
158 using normalized::write32;
160 uint32_t numIndexes = pages.size() + 1;
161 _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t));
163 uint32_t pageLoc = _firstPageOffset;
165 // The most difficult job here is calculating the LSDAs; everything else
166 // follows fairly naturally, but we can't state where the first
167 uint8_t *indexData = &_contents[_topLevelIndexOffset];
168 uint32_t numLSDAs = 0;
169 for (unsigned i = 0; i < pages.size(); ++i) {
171 addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t),
172 pages[i].entries[0].rangeStart);
173 // secondLevelPagesSectionOffset
174 write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig);
175 write32(indexData + (3 * i + 2) * sizeof(uint32_t),
176 _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig);
178 for (auto &entry : pages[i].entries)
179 if (entry.lsdaLocation)
183 // Finally, write out the final sentinel index
184 auto &finalEntry = pages[pages.size() - 1].entries.back();
185 addImageReference(_topLevelIndexOffset +
186 3 * pages.size() * sizeof(uint32_t),
187 finalEntry.rangeStart, finalEntry.rangeLength);
188 // secondLevelPagesSectionOffset => 0
189 write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t),
190 _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig);
193 void addLSDAIndexes(std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) {
194 _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t));
196 uint32_t curOffset = _lsdaIndexOffset;
197 for (auto &page : pages) {
198 for (auto &entry : page.entries) {
199 if (!entry.lsdaLocation)
202 addImageReference(curOffset, entry.rangeStart);
203 addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation);
204 curOffset += 2 * sizeof(uint32_t);
209 void addSecondLevelPages(std::vector<UnwindInfoPage> &pages) {
210 for (auto &page : pages) {
211 addRegularSecondLevelPage(page);
215 void addRegularSecondLevelPage(const UnwindInfoPage &page) {
216 uint32_t curPageOffset = _contents.size();
217 const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t);
218 uint32_t curPageSize =
219 headerSize + 2 * page.entries.size() * sizeof(uint32_t);
220 _contents.resize(curPageOffset + curPageSize);
222 using normalized::write32;
223 using normalized::write16;
225 write32(&_contents[curPageOffset], 2, _isBig);
226 // offset of 1st entry
227 write16(&_contents[curPageOffset + 4], headerSize, _isBig);
228 write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig);
230 uint32_t pagePos = curPageOffset + headerSize;
231 for (auto &entry : page.entries) {
232 addImageReference(pagePos, entry.rangeStart);
234 write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding,
236 if ((entry.encoding & 0x0f000000U) ==
237 _archHandler.dwarfCompactUnwindType())
238 addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame);
240 pagePos += 2 * sizeof(uint32_t);
244 void addEhFrameReference(uint32_t offset, const Atom *dest,
245 Reference::Addend addend = 0) {
246 addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
247 _archHandler.unwindRefToEhFrameKind(), offset, dest, addend);
250 void addImageReference(uint32_t offset, const Atom *dest,
251 Reference::Addend addend = 0) {
252 addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
253 _archHandler.imageOffsetKind(), offset, dest, addend);
256 void addImageReferenceIndirect(uint32_t offset, const Atom *dest) {
257 addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
258 _archHandler.imageOffsetKindIndirect(), offset, dest, 0);
262 mach_o::ArchHandler &_archHandler;
263 std::vector<uint8_t> _contents;
264 uint32_t _commonEncodingsOffset;
265 uint32_t _personalityArrayOffset;
266 uint32_t _topLevelIndexOffset;
267 uint32_t _lsdaIndexOffset;
268 uint32_t _firstPageOffset;
272 /// Pass for instantiating and optimizing GOT slots.
274 class CompactUnwindPass : public Pass {
276 CompactUnwindPass(const MachOLinkingContext &context)
277 : _ctx(context), _archHandler(_ctx.archHandler()),
278 _file(*_ctx.make_file<MachOFile>("<mach-o Compact Unwind Pass>")),
279 _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) {
280 _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
284 llvm::Error perform(SimpleFile &mergedFile) override {
285 DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n");
287 std::map<const Atom *, CompactUnwindEntry> unwindLocs;
288 std::map<const Atom *, const Atom *> dwarfFrames;
289 std::vector<const Atom *> personalities;
290 uint32_t numLSDAs = 0;
292 // First collect all __compact_unwind and __eh_frame entries, addressable by
293 // the function referred to.
294 collectCompactUnwindEntries(mergedFile, unwindLocs, personalities,
297 collectDwarfFrameEntries(mergedFile, dwarfFrames);
299 // Skip rest of pass if no unwind info.
300 if (unwindLocs.empty() && dwarfFrames.empty())
301 return llvm::Error::success();
303 // FIXME: if there are more than 4 personality functions then we need to
304 // defer to DWARF info for the ones we don't put in the list. They should
305 // also probably be sorted by frequency.
306 assert(personalities.size() <= 4);
308 // TODO: Find commmon encodings for use by compressed pages.
309 std::vector<uint32_t> commonEncodings;
311 // Now sort the entries by final address and fixup the compact encoding to
312 // its final form (i.e. set personality function bits & create DWARF
313 // references where needed).
314 std::vector<CompactUnwindEntry> unwindInfos = createUnwindInfoEntries(
315 mergedFile, unwindLocs, personalities, dwarfFrames);
317 // Remove any unused eh-frame atoms.
318 pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames);
320 // Finally, we can start creating pages based on these entries.
322 DEBUG(llvm::dbgs() << " Splitting entries into pages\n");
323 // FIXME: we split the entries into pages naively: lots of 4k pages followed
324 // by a small one. ld64 tried to minimize space and align them to real 4k
325 // boundaries. That might be worth doing, or perhaps we could perform some
326 // minor balancing for expected number of lookups.
327 std::vector<UnwindInfoPage> pages;
328 auto remainingInfos = llvm::makeArrayRef(unwindInfos);
330 pages.push_back(UnwindInfoPage());
332 // FIXME: we only create regular pages at the moment. These can hold up to
333 // 1021 entries according to the documentation.
334 unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size());
336 pages.back().entries = remainingInfos.slice(0, entriesInPage);
337 remainingInfos = remainingInfos.slice(entriesInPage);
340 << " Page from " << pages.back().entries[0].rangeStart->name()
341 << " to " << pages.back().entries.back().rangeStart->name() << " + "
342 << llvm::format("0x%x", pages.back().entries.back().rangeLength)
343 << " has " << entriesInPage << " entries\n");
344 } while (!remainingInfos.empty());
346 auto *unwind = new (_file.allocator())
347 UnwindInfoAtom(_archHandler, _file, _isBig, personalities,
348 commonEncodings, pages, numLSDAs);
349 mergedFile.addAtom(*unwind);
351 // Finally, remove all __compact_unwind atoms now that we've processed them.
352 mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) {
353 return atom->contentType() == DefinedAtom::typeCompactUnwindInfo;
356 return llvm::Error::success();
359 void collectCompactUnwindEntries(
360 const SimpleFile &mergedFile,
361 std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
362 std::vector<const Atom *> &personalities, uint32_t &numLSDAs) {
363 DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n");
365 for (const DefinedAtom *atom : mergedFile.defined()) {
366 if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo)
369 auto unwindEntry = extractCompactUnwindEntry(atom);
370 unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry));
372 DEBUG(llvm::dbgs() << " Entry for " << unwindEntry.rangeStart->name()
374 << llvm::format("0x%08x", unwindEntry.encoding));
375 if (unwindEntry.personalityFunction)
376 DEBUG(llvm::dbgs() << ", personality="
377 << unwindEntry.personalityFunction->name()
378 << ", lsdaLoc=" << unwindEntry.lsdaLocation->name());
379 DEBUG(llvm::dbgs() << '\n');
381 // Count number of LSDAs we see, since we need to know how big the index
382 // will be while laying out the section.
383 if (unwindEntry.lsdaLocation)
386 // Gather the personality functions now, so that they're in deterministic
387 // order (derived from the DefinedAtom order).
388 if (unwindEntry.personalityFunction) {
389 auto pFunc = std::find(personalities.begin(), personalities.end(),
390 unwindEntry.personalityFunction);
391 if (pFunc == personalities.end())
392 personalities.push_back(unwindEntry.personalityFunction);
397 CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) {
398 CompactUnwindEntry entry;
400 for (const Reference *ref : *atom) {
401 switch (ref->offsetInAtom()) {
403 // FIXME: there could legitimately be functions with multiple encoding
404 // entries. However, nothing produces them at the moment.
405 assert(ref->addend() == 0 && "unexpected offset into function");
406 entry.rangeStart = ref->target();
409 assert(ref->addend() == 0 && "unexpected offset into personality fn");
410 entry.personalityFunction = ref->target();
413 assert(ref->addend() == 0 && "unexpected offset into LSDA atom");
414 entry.lsdaLocation = ref->target();
419 if (atom->rawContent().size() < 4 * sizeof(uint32_t))
422 using normalized::read32;
424 read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig);
426 read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig);
431 collectDwarfFrameEntries(const SimpleFile &mergedFile,
432 std::map<const Atom *, const Atom *> &dwarfFrames) {
433 for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) {
434 if (ehFrameAtom->contentType() != DefinedAtom::typeCFI)
436 if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom))
439 if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom))
440 dwarfFrames[function] = ehFrameAtom;
444 /// Every atom defined in __TEXT,__text needs an entry in the final
445 /// __unwind_info section (in order). These comes from two sources:
446 /// + Input __compact_unwind sections where possible (after adding the
447 /// personality function offset which is only known now).
448 /// + A synthesised reference to __eh_frame if there's no __compact_unwind
449 /// or too many personality functions to be accommodated.
450 std::vector<CompactUnwindEntry> createUnwindInfoEntries(
451 const SimpleFile &mergedFile,
452 const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
453 const std::vector<const Atom *> &personalities,
454 const std::map<const Atom *, const Atom *> &dwarfFrames) {
455 std::vector<CompactUnwindEntry> unwindInfos;
457 DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n");
458 // The final order in the __unwind_info section must be derived from the
459 // order of typeCode atoms, since that's how they'll be put into the object
460 // file eventually (yuck!).
461 for (const DefinedAtom *atom : mergedFile.defined()) {
462 if (atom->contentType() != DefinedAtom::typeCode)
465 unwindInfos.push_back(finalizeUnwindInfoEntryForAtom(
466 atom, unwindLocs, personalities, dwarfFrames));
468 DEBUG(llvm::dbgs() << " Entry for " << atom->name()
469 << ", final encoding="
470 << llvm::format("0x%08x", unwindInfos.back().encoding)
477 /// Remove unused EH frames.
479 /// An EH frame is considered unused if there is a corresponding compact
480 /// unwind atom that doesn't require the EH frame.
481 void pruneUnusedEHFrames(
482 SimpleFile &mergedFile,
483 const std::vector<CompactUnwindEntry> &unwindInfos,
484 const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
485 const std::map<const Atom *, const Atom *> &dwarfFrames) {
487 // Worklist of all 'used' FDEs.
488 std::vector<const DefinedAtom *> usedDwarfWorklist;
490 // We have to check two conditions when building the worklist:
491 // (1) EH frames used by compact unwind entries.
492 for (auto &entry : unwindInfos)
494 usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.ehFrame));
496 // (2) EH frames that reference functions with no corresponding compact
498 for (auto &entry : dwarfFrames)
499 if (!unwindLocs.count(entry.first))
500 usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.second));
502 // Add all transitively referenced CFI atoms by processing the worklist.
503 std::set<const Atom *> usedDwarfFrames;
504 while (!usedDwarfWorklist.empty()) {
505 const DefinedAtom *cfiAtom = usedDwarfWorklist.back();
506 usedDwarfWorklist.pop_back();
507 usedDwarfFrames.insert(cfiAtom);
508 for (const auto *ref : *cfiAtom) {
509 const DefinedAtom *cfiTarget = dyn_cast<DefinedAtom>(ref->target());
510 if (cfiTarget->contentType() == DefinedAtom::typeCFI)
511 usedDwarfWorklist.push_back(cfiTarget);
515 // Finally, delete all unreferenced CFI atoms.
516 mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) {
517 if ((atom->contentType() == DefinedAtom::typeCFI) &&
518 !usedDwarfFrames.count(atom))
524 CompactUnwindEntry finalizeUnwindInfoEntryForAtom(
525 const DefinedAtom *function,
526 const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
527 const std::vector<const Atom *> &personalities,
528 const std::map<const Atom *, const Atom *> &dwarfFrames) {
529 auto unwindLoc = unwindLocs.find(function);
531 CompactUnwindEntry entry;
532 if (unwindLoc == unwindLocs.end()) {
533 // Default entry has correct encoding (0 => no unwind), but we need to
534 // synthesise the function.
535 entry.rangeStart = function;
536 entry.rangeLength = function->size();
538 entry = unwindLoc->second;
541 // If there's no __compact_unwind entry, or it explicitly says to use
542 // __eh_frame, we need to try and fill in the correct DWARF atom.
543 if (entry.encoding == _archHandler.dwarfCompactUnwindType() ||
544 entry.encoding == 0) {
545 auto dwarfFrame = dwarfFrames.find(function);
546 if (dwarfFrame != dwarfFrames.end()) {
547 entry.encoding = _archHandler.dwarfCompactUnwindType();
548 entry.ehFrame = dwarfFrame->second;
552 auto personality = std::find(personalities.begin(), personalities.end(),
553 entry.personalityFunction);
554 uint32_t personalityIdx = personality == personalities.end()
556 : personality - personalities.begin() + 1;
558 // FIXME: We should also use DWARF when there isn't enough room for the
559 // personality function in the compact encoding.
560 assert(personalityIdx < 4 && "too many personality functions");
562 entry.encoding |= personalityIdx << 28;
564 if (entry.lsdaLocation)
565 entry.encoding |= 1U << 30;
570 const MachOLinkingContext &_ctx;
571 mach_o::ArchHandler &_archHandler;
576 void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) {
577 assert(ctx.needsCompactUnwindPass());
578 pm.add(llvm::make_unique<CompactUnwindPass>(ctx));
581 } // end namesapce mach_o
582 } // end namesapce lld