1 //===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===//
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 /// \file A pass to convert MachO's __compact_unwind sections into the final
11 /// __unwind_info format used during runtime. See
12 /// mach-o/compact_unwind_encoding.h for more details on the formats involved.
14 //===----------------------------------------------------------------------===//
16 #include "ArchHandler.h"
18 #include "MachONormalizedFileBinaryUtils.h"
19 #include "MachOPasses.h"
20 #include "lld/Common/LLVM.h"
21 #include "lld/Core/DefinedAtom.h"
22 #include "lld/Core/File.h"
23 #include "lld/Core/Reference.h"
24 #include "lld/Core/Simple.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/Format.h"
29 #define DEBUG_TYPE "macho-compact-unwind"
35 struct CompactUnwindEntry {
36 const Atom *rangeStart;
37 const Atom *personalityFunction;
38 const Atom *lsdaLocation;
43 // There are 3 types of compact unwind entry, distinguished by the encoding
44 // value: 0 indicates a function with no unwind info;
45 // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to
46 // __eh_frame, and that the ehFrame entry will be valid; any other value is a
47 // real compact unwind entry -- personalityFunction will be set and
48 // lsdaLocation may be.
51 CompactUnwindEntry(const DefinedAtom *function)
52 : rangeStart(function), personalityFunction(nullptr),
53 lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()),
57 : rangeStart(nullptr), personalityFunction(nullptr),
58 lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {}
61 struct UnwindInfoPage {
62 ArrayRef<CompactUnwindEntry> entries;
66 class UnwindInfoAtom : public SimpleDefinedAtom {
68 UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig,
69 std::vector<const Atom *> &personalities,
70 std::vector<uint32_t> &commonEncodings,
71 std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs)
72 : SimpleDefinedAtom(file), _archHandler(archHandler),
73 _commonEncodingsOffset(7 * sizeof(uint32_t)),
74 _personalityArrayOffset(_commonEncodingsOffset +
75 commonEncodings.size() * sizeof(uint32_t)),
76 _topLevelIndexOffset(_personalityArrayOffset +
77 personalities.size() * sizeof(uint32_t)),
78 _lsdaIndexOffset(_topLevelIndexOffset +
79 3 * (pages.size() + 1) * sizeof(uint32_t)),
80 _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)),
83 addHeader(commonEncodings.size(), personalities.size(), pages.size());
84 addCommonEncodings(commonEncodings);
85 addPersonalityFunctions(personalities);
86 addTopLevelIndexes(pages);
87 addLSDAIndexes(pages, numLSDAs);
88 addSecondLevelPages(pages);
91 ~UnwindInfoAtom() override = default;
93 ContentType contentType() const override {
94 return DefinedAtom::typeProcessedUnwindInfo;
97 Alignment alignment() const override { return 4; }
99 uint64_t size() const override { return _contents.size(); }
101 ContentPermissions permissions() const override {
102 return DefinedAtom::permR__;
105 ArrayRef<uint8_t> rawContent() const override { return _contents; }
107 void addHeader(uint32_t numCommon, uint32_t numPersonalities,
109 using normalized::write32;
111 uint32_t headerSize = 7 * sizeof(uint32_t);
112 _contents.resize(headerSize);
114 uint8_t *headerEntries = _contents.data();
116 write32(headerEntries, 1, _isBig);
117 // commonEncodingsArraySectionOffset
118 write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig);
119 // commonEncodingsArrayCount
120 write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig);
121 // personalityArraySectionOffset
122 write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset,
124 // personalityArrayCount
125 write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig);
126 // indexSectionOffset
127 write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig);
129 write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig);
132 /// Add the list of common encodings to the section; this is simply an array
133 /// of uint32_t compact values. Size has already been specified in the header.
134 void addCommonEncodings(std::vector<uint32_t> &commonEncodings) {
135 using normalized::write32;
137 _contents.resize(_commonEncodingsOffset +
138 commonEncodings.size() * sizeof(uint32_t));
139 uint8_t *commonEncodingsArea =
140 reinterpret_cast<uint8_t *>(_contents.data() + _commonEncodingsOffset);
142 for (uint32_t encoding : commonEncodings) {
143 write32(commonEncodingsArea, encoding, _isBig);
144 commonEncodingsArea += sizeof(uint32_t);
148 void addPersonalityFunctions(std::vector<const Atom *> personalities) {
149 _contents.resize(_personalityArrayOffset +
150 personalities.size() * sizeof(uint32_t));
152 for (unsigned i = 0; i < personalities.size(); ++i)
153 addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t),
157 void addTopLevelIndexes(std::vector<UnwindInfoPage> &pages) {
158 using normalized::write32;
160 uint32_t numIndexes = pages.size() + 1;
161 _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t));
163 uint32_t pageLoc = _firstPageOffset;
165 // The most difficult job here is calculating the LSDAs; everything else
166 // follows fairly naturally, but we can't state where the first
167 uint8_t *indexData = &_contents[_topLevelIndexOffset];
168 uint32_t numLSDAs = 0;
169 for (unsigned i = 0; i < pages.size(); ++i) {
171 addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t),
172 pages[i].entries[0].rangeStart);
173 // secondLevelPagesSectionOffset
174 write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig);
175 write32(indexData + (3 * i + 2) * sizeof(uint32_t),
176 _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig);
178 for (auto &entry : pages[i].entries)
179 if (entry.lsdaLocation)
183 // Finally, write out the final sentinel index
184 auto &finalEntry = pages[pages.size() - 1].entries.back();
185 addImageReference(_topLevelIndexOffset +
186 3 * pages.size() * sizeof(uint32_t),
187 finalEntry.rangeStart, finalEntry.rangeLength);
188 // secondLevelPagesSectionOffset => 0
189 write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t),
190 _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig);
193 void addLSDAIndexes(std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) {
194 _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t));
196 uint32_t curOffset = _lsdaIndexOffset;
197 for (auto &page : pages) {
198 for (auto &entry : page.entries) {
199 if (!entry.lsdaLocation)
202 addImageReference(curOffset, entry.rangeStart);
203 addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation);
204 curOffset += 2 * sizeof(uint32_t);
209 void addSecondLevelPages(std::vector<UnwindInfoPage> &pages) {
210 for (auto &page : pages) {
211 addRegularSecondLevelPage(page);
215 void addRegularSecondLevelPage(const UnwindInfoPage &page) {
216 uint32_t curPageOffset = _contents.size();
217 const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t);
218 uint32_t curPageSize =
219 headerSize + 2 * page.entries.size() * sizeof(uint32_t);
220 _contents.resize(curPageOffset + curPageSize);
222 using normalized::write32;
223 using normalized::write16;
225 write32(&_contents[curPageOffset], 2, _isBig);
226 // offset of 1st entry
227 write16(&_contents[curPageOffset + 4], headerSize, _isBig);
228 write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig);
230 uint32_t pagePos = curPageOffset + headerSize;
231 for (auto &entry : page.entries) {
232 addImageReference(pagePos, entry.rangeStart);
234 write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding,
236 if ((entry.encoding & 0x0f000000U) ==
237 _archHandler.dwarfCompactUnwindType())
238 addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame);
240 pagePos += 2 * sizeof(uint32_t);
244 void addEhFrameReference(uint32_t offset, const Atom *dest,
245 Reference::Addend addend = 0) {
246 addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
247 _archHandler.unwindRefToEhFrameKind(), offset, dest, addend);
250 void addImageReference(uint32_t offset, const Atom *dest,
251 Reference::Addend addend = 0) {
252 addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
253 _archHandler.imageOffsetKind(), offset, dest, addend);
256 void addImageReferenceIndirect(uint32_t offset, const Atom *dest) {
257 addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(),
258 _archHandler.imageOffsetKindIndirect(), offset, dest, 0);
262 mach_o::ArchHandler &_archHandler;
263 std::vector<uint8_t> _contents;
264 uint32_t _commonEncodingsOffset;
265 uint32_t _personalityArrayOffset;
266 uint32_t _topLevelIndexOffset;
267 uint32_t _lsdaIndexOffset;
268 uint32_t _firstPageOffset;
272 /// Pass for instantiating and optimizing GOT slots.
274 class CompactUnwindPass : public Pass {
276 CompactUnwindPass(const MachOLinkingContext &context)
277 : _ctx(context), _archHandler(_ctx.archHandler()),
278 _file(*_ctx.make_file<MachOFile>("<mach-o Compact Unwind Pass>")),
279 _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) {
280 _file.setOrdinal(_ctx.getNextOrdinalAndIncrement());
284 llvm::Error perform(SimpleFile &mergedFile) override {
285 LLVM_DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n");
287 std::map<const Atom *, CompactUnwindEntry> unwindLocs;
288 std::map<const Atom *, const Atom *> dwarfFrames;
289 std::vector<const Atom *> personalities;
290 uint32_t numLSDAs = 0;
292 // First collect all __compact_unwind and __eh_frame entries, addressable by
293 // the function referred to.
294 collectCompactUnwindEntries(mergedFile, unwindLocs, personalities,
297 collectDwarfFrameEntries(mergedFile, dwarfFrames);
299 // Skip rest of pass if no unwind info.
300 if (unwindLocs.empty() && dwarfFrames.empty())
301 return llvm::Error::success();
303 // FIXME: if there are more than 4 personality functions then we need to
304 // defer to DWARF info for the ones we don't put in the list. They should
305 // also probably be sorted by frequency.
306 assert(personalities.size() <= 4);
308 // TODO: Find commmon encodings for use by compressed pages.
309 std::vector<uint32_t> commonEncodings;
311 // Now sort the entries by final address and fixup the compact encoding to
312 // its final form (i.e. set personality function bits & create DWARF
313 // references where needed).
314 std::vector<CompactUnwindEntry> unwindInfos = createUnwindInfoEntries(
315 mergedFile, unwindLocs, personalities, dwarfFrames);
317 // Remove any unused eh-frame atoms.
318 pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames);
320 // Finally, we can start creating pages based on these entries.
322 LLVM_DEBUG(llvm::dbgs() << " Splitting entries into pages\n");
323 // FIXME: we split the entries into pages naively: lots of 4k pages followed
324 // by a small one. ld64 tried to minimize space and align them to real 4k
325 // boundaries. That might be worth doing, or perhaps we could perform some
326 // minor balancing for expected number of lookups.
327 std::vector<UnwindInfoPage> pages;
328 auto remainingInfos = llvm::makeArrayRef(unwindInfos);
330 pages.push_back(UnwindInfoPage());
332 // FIXME: we only create regular pages at the moment. These can hold up to
333 // 1021 entries according to the documentation.
334 unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size());
336 pages.back().entries = remainingInfos.slice(0, entriesInPage);
337 remainingInfos = remainingInfos.slice(entriesInPage);
339 LLVM_DEBUG(llvm::dbgs()
341 << pages.back().entries[0].rangeStart->name() << " to "
342 << pages.back().entries.back().rangeStart->name() << " + "
343 << llvm::format("0x%x",
344 pages.back().entries.back().rangeLength)
345 << " has " << entriesInPage << " entries\n");
346 } while (!remainingInfos.empty());
348 auto *unwind = new (_file.allocator())
349 UnwindInfoAtom(_archHandler, _file, _isBig, personalities,
350 commonEncodings, pages, numLSDAs);
351 mergedFile.addAtom(*unwind);
353 // Finally, remove all __compact_unwind atoms now that we've processed them.
354 mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) {
355 return atom->contentType() == DefinedAtom::typeCompactUnwindInfo;
358 return llvm::Error::success();
361 void collectCompactUnwindEntries(
362 const SimpleFile &mergedFile,
363 std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
364 std::vector<const Atom *> &personalities, uint32_t &numLSDAs) {
365 LLVM_DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n");
367 for (const DefinedAtom *atom : mergedFile.defined()) {
368 if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo)
371 auto unwindEntry = extractCompactUnwindEntry(atom);
372 unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry));
374 LLVM_DEBUG(llvm::dbgs() << " Entry for "
375 << unwindEntry.rangeStart->name() << ", encoding="
376 << llvm::format("0x%08x", unwindEntry.encoding));
377 if (unwindEntry.personalityFunction)
378 LLVM_DEBUG(llvm::dbgs()
380 << unwindEntry.personalityFunction->name()
381 << ", lsdaLoc=" << unwindEntry.lsdaLocation->name());
382 LLVM_DEBUG(llvm::dbgs() << '\n');
384 // Count number of LSDAs we see, since we need to know how big the index
385 // will be while laying out the section.
386 if (unwindEntry.lsdaLocation)
389 // Gather the personality functions now, so that they're in deterministic
390 // order (derived from the DefinedAtom order).
391 if (unwindEntry.personalityFunction) {
392 auto pFunc = std::find(personalities.begin(), personalities.end(),
393 unwindEntry.personalityFunction);
394 if (pFunc == personalities.end())
395 personalities.push_back(unwindEntry.personalityFunction);
400 CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) {
401 CompactUnwindEntry entry;
403 for (const Reference *ref : *atom) {
404 switch (ref->offsetInAtom()) {
406 // FIXME: there could legitimately be functions with multiple encoding
407 // entries. However, nothing produces them at the moment.
408 assert(ref->addend() == 0 && "unexpected offset into function");
409 entry.rangeStart = ref->target();
412 assert(ref->addend() == 0 && "unexpected offset into personality fn");
413 entry.personalityFunction = ref->target();
416 assert(ref->addend() == 0 && "unexpected offset into LSDA atom");
417 entry.lsdaLocation = ref->target();
422 if (atom->rawContent().size() < 4 * sizeof(uint32_t))
425 using normalized::read32;
427 read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig);
429 read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig);
434 collectDwarfFrameEntries(const SimpleFile &mergedFile,
435 std::map<const Atom *, const Atom *> &dwarfFrames) {
436 for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) {
437 if (ehFrameAtom->contentType() != DefinedAtom::typeCFI)
439 if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom))
442 if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom))
443 dwarfFrames[function] = ehFrameAtom;
447 /// Every atom defined in __TEXT,__text needs an entry in the final
448 /// __unwind_info section (in order). These comes from two sources:
449 /// + Input __compact_unwind sections where possible (after adding the
450 /// personality function offset which is only known now).
451 /// + A synthesised reference to __eh_frame if there's no __compact_unwind
452 /// or too many personality functions to be accommodated.
453 std::vector<CompactUnwindEntry> createUnwindInfoEntries(
454 const SimpleFile &mergedFile,
455 const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
456 const std::vector<const Atom *> &personalities,
457 const std::map<const Atom *, const Atom *> &dwarfFrames) {
458 std::vector<CompactUnwindEntry> unwindInfos;
460 LLVM_DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n");
461 // The final order in the __unwind_info section must be derived from the
462 // order of typeCode atoms, since that's how they'll be put into the object
463 // file eventually (yuck!).
464 for (const DefinedAtom *atom : mergedFile.defined()) {
465 if (atom->contentType() != DefinedAtom::typeCode)
468 unwindInfos.push_back(finalizeUnwindInfoEntryForAtom(
469 atom, unwindLocs, personalities, dwarfFrames));
471 LLVM_DEBUG(llvm::dbgs()
472 << " Entry for " << atom->name() << ", final encoding="
473 << llvm::format("0x%08x", unwindInfos.back().encoding)
480 /// Remove unused EH frames.
482 /// An EH frame is considered unused if there is a corresponding compact
483 /// unwind atom that doesn't require the EH frame.
484 void pruneUnusedEHFrames(
485 SimpleFile &mergedFile,
486 const std::vector<CompactUnwindEntry> &unwindInfos,
487 const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
488 const std::map<const Atom *, const Atom *> &dwarfFrames) {
490 // Worklist of all 'used' FDEs.
491 std::vector<const DefinedAtom *> usedDwarfWorklist;
493 // We have to check two conditions when building the worklist:
494 // (1) EH frames used by compact unwind entries.
495 for (auto &entry : unwindInfos)
497 usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.ehFrame));
499 // (2) EH frames that reference functions with no corresponding compact
501 for (auto &entry : dwarfFrames)
502 if (!unwindLocs.count(entry.first))
503 usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.second));
505 // Add all transitively referenced CFI atoms by processing the worklist.
506 std::set<const Atom *> usedDwarfFrames;
507 while (!usedDwarfWorklist.empty()) {
508 const DefinedAtom *cfiAtom = usedDwarfWorklist.back();
509 usedDwarfWorklist.pop_back();
510 usedDwarfFrames.insert(cfiAtom);
511 for (const auto *ref : *cfiAtom) {
512 const DefinedAtom *cfiTarget = dyn_cast<DefinedAtom>(ref->target());
513 if (cfiTarget->contentType() == DefinedAtom::typeCFI)
514 usedDwarfWorklist.push_back(cfiTarget);
518 // Finally, delete all unreferenced CFI atoms.
519 mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) {
520 if ((atom->contentType() == DefinedAtom::typeCFI) &&
521 !usedDwarfFrames.count(atom))
527 CompactUnwindEntry finalizeUnwindInfoEntryForAtom(
528 const DefinedAtom *function,
529 const std::map<const Atom *, CompactUnwindEntry> &unwindLocs,
530 const std::vector<const Atom *> &personalities,
531 const std::map<const Atom *, const Atom *> &dwarfFrames) {
532 auto unwindLoc = unwindLocs.find(function);
534 CompactUnwindEntry entry;
535 if (unwindLoc == unwindLocs.end()) {
536 // Default entry has correct encoding (0 => no unwind), but we need to
537 // synthesise the function.
538 entry.rangeStart = function;
539 entry.rangeLength = function->size();
541 entry = unwindLoc->second;
544 // If there's no __compact_unwind entry, or it explicitly says to use
545 // __eh_frame, we need to try and fill in the correct DWARF atom.
546 if (entry.encoding == _archHandler.dwarfCompactUnwindType() ||
547 entry.encoding == 0) {
548 auto dwarfFrame = dwarfFrames.find(function);
549 if (dwarfFrame != dwarfFrames.end()) {
550 entry.encoding = _archHandler.dwarfCompactUnwindType();
551 entry.ehFrame = dwarfFrame->second;
555 auto personality = std::find(personalities.begin(), personalities.end(),
556 entry.personalityFunction);
557 uint32_t personalityIdx = personality == personalities.end()
559 : personality - personalities.begin() + 1;
561 // FIXME: We should also use DWARF when there isn't enough room for the
562 // personality function in the compact encoding.
563 assert(personalityIdx < 4 && "too many personality functions");
565 entry.encoding |= personalityIdx << 28;
567 if (entry.lsdaLocation)
568 entry.encoding |= 1U << 30;
573 const MachOLinkingContext &_ctx;
574 mach_o::ArchHandler &_archHandler;
579 void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) {
580 assert(ctx.needsCompactUnwindPass());
581 pm.add(llvm::make_unique<CompactUnwindPass>(ctx));
584 } // end namesapce mach_o
585 } // end namesapce lld