1 //===- ICF.cpp ------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "ConcatOutputSection.h"
12 #include "InputSection.h"
13 #include "SymbolTable.h"
15 #include "UnwindInfoSection.h"
17 #include "lld/Common/CommonLinkerContext.h"
18 #include "llvm/Support/LEB128.h"
19 #include "llvm/Support/Parallel.h"
20 #include "llvm/Support/TimeProfiler.h"
21 #include "llvm/Support/xxhash.h"
27 using namespace lld::macho;
29 static constexpr bool verboseDiagnostics = false;
33 ICF(std::vector<ConcatInputSection *> &inputs);
36 using EqualsFn = bool (ICF::*)(const ConcatInputSection *,
37 const ConcatInputSection *);
38 void segregate(size_t begin, size_t end, EqualsFn);
39 size_t findBoundary(size_t begin, size_t end);
40 void forEachClassRange(size_t begin, size_t end,
41 llvm::function_ref<void(size_t, size_t)> func);
42 void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
44 bool equalsConstant(const ConcatInputSection *ia,
45 const ConcatInputSection *ib);
46 bool equalsVariable(const ConcatInputSection *ia,
47 const ConcatInputSection *ib);
49 // ICF needs a copy of the inputs vector because its equivalence-class
50 // segregation algorithm destroys the proper sequence.
51 std::vector<ConcatInputSection *> icfInputs;
54 std::atomic<bool> icfRepeat{false};
55 std::atomic<uint64_t> equalsConstantCount{0};
56 std::atomic<uint64_t> equalsVariableCount{0};
59 ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
60 icfInputs.assign(inputs.begin(), inputs.end());
63 // ICF = Identical Code Folding
65 // We only fold __TEXT,__text, so this is really "code" folding, and not
66 // "COMDAT" folding. String and scalar constant literals are deduplicated
69 // Summary of segments & sections:
71 // The __TEXT segment is readonly at the MMU. Some sections are already
72 // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
73 // synthetic and inherently free of duplicates (__TEXT,__stubs &
74 // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
75 // because doing so induces many test failures.
77 // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
78 // thus ineligible for ICF.
80 // The __DATA_CONST segment is read/write at the MMU, but is logically const to
81 // the application after dyld applies fixups to pointer data. We currently
82 // fold only the __DATA_CONST,__cfstring section.
84 // The __DATA segment is read/write at the MMU, and as application-writeable
85 // data, none of its sections are eligible for ICF.
87 // Please see the large block comment in lld/ELF/ICF.cpp for an explanation
88 // of the segregation algorithm.
90 // FIXME(gkm): implement keep-unique attributes
91 // FIXME(gkm): implement address-significance tables for MachO object files
93 // Compare "non-moving" parts of two ConcatInputSections, namely everything
94 // except references to other ConcatInputSections.
95 bool ICF::equalsConstant(const ConcatInputSection *ia,
96 const ConcatInputSection *ib) {
97 if (verboseDiagnostics)
98 ++equalsConstantCount;
99 // We can only fold within the same OutputSection.
100 if (ia->parent != ib->parent)
102 if (ia->data.size() != ib->data.size())
104 if (ia->data != ib->data)
106 if (ia->relocs.size() != ib->relocs.size())
108 auto f = [](const Reloc &ra, const Reloc &rb) {
109 if (ra.type != rb.type)
111 if (ra.pcrel != rb.pcrel)
113 if (ra.length != rb.length)
115 if (ra.offset != rb.offset)
117 if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
120 InputSection *isecA, *isecB;
124 if (ra.referent.is<Symbol *>()) {
125 const auto *sa = ra.referent.get<Symbol *>();
126 const auto *sb = rb.referent.get<Symbol *>();
127 if (sa->kind() != sb->kind())
129 // ICF runs before Undefineds are treated (and potentially converted into
131 if (isa<DylibSymbol>(sa) || isa<Undefined>(sa))
132 return sa == sb && ra.addend == rb.addend;
133 assert(isa<Defined>(sa));
134 const auto *da = cast<Defined>(sa);
135 const auto *db = cast<Defined>(sb);
136 if (!da->isec || !db->isec) {
137 assert(da->isAbsolute() && db->isAbsolute());
138 return da->value + ra.addend == db->value + rb.addend;
145 isecA = ra.referent.get<InputSection *>();
146 isecB = rb.referent.get<InputSection *>();
149 if (isecA->parent != isecB->parent)
151 // Sections with identical parents should be of the same kind.
152 assert(isecA->kind() == isecB->kind());
153 // We will compare ConcatInputSection contents in equalsVariable.
154 if (isa<ConcatInputSection>(isecA))
155 return ra.addend == rb.addend;
156 // Else we have two literal sections. References to them are equal iff their
157 // offsets in the output section are equal.
158 if (ra.referent.is<Symbol *>())
159 // For symbol relocs, we compare the contents at the symbol address. We
160 // don't do `getOffset(value + addend)` because value + addend may not be
161 // a valid offset in the literal section.
162 return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&
163 ra.addend == rb.addend;
165 assert(valueA == 0 && valueB == 0);
166 // For section relocs, we compare the content at the section offset.
167 return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
170 return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
174 // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
175 // handled by equalsConstant().
176 bool ICF::equalsVariable(const ConcatInputSection *ia,
177 const ConcatInputSection *ib) {
178 if (verboseDiagnostics)
179 ++equalsVariableCount;
180 assert(ia->relocs.size() == ib->relocs.size());
181 auto f = [this](const Reloc &ra, const Reloc &rb) {
182 // We already filtered out mismatching values/addends in equalsConstant.
183 if (ra.referent == rb.referent)
185 const ConcatInputSection *isecA, *isecB;
186 if (ra.referent.is<Symbol *>()) {
187 // Matching DylibSymbols are already filtered out by the
188 // identical-referent check above. Non-matching DylibSymbols were filtered
189 // out in equalsConstant(). So we can safely cast to Defined here.
190 const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
191 const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
192 if (da->isAbsolute())
194 isecA = dyn_cast<ConcatInputSection>(da->isec);
196 return true; // literal sections were checked in equalsConstant.
197 isecB = cast<ConcatInputSection>(db->isec);
199 const auto *sa = ra.referent.get<InputSection *>();
200 const auto *sb = rb.referent.get<InputSection *>();
201 isecA = dyn_cast<ConcatInputSection>(sa);
204 isecB = cast<ConcatInputSection>(sb);
206 return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
208 if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
211 // If there are symbols with associated unwind info, check that the unwind
212 // info matches. For simplicity, we only handle the case where there are only
213 // symbols at offset zero within the section (which is typically the case with
214 // .subsections_via_symbols.)
215 auto hasUnwind = [](Defined *d) { return d->unwindEntry != nullptr; };
216 auto itA = std::find_if(ia->symbols.begin(), ia->symbols.end(), hasUnwind);
217 auto itB = std::find_if(ib->symbols.begin(), ib->symbols.end(), hasUnwind);
218 if (itA == ia->symbols.end())
219 return itB == ib->symbols.end();
220 if (itB == ib->symbols.end())
222 const Defined *da = *itA;
223 const Defined *db = *itB;
224 if (da->unwindEntry->icfEqClass[icfPass % 2] !=
225 db->unwindEntry->icfEqClass[icfPass % 2] ||
226 da->value != 0 || db->value != 0)
228 auto isZero = [](Defined *d) { return d->value == 0; };
229 return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
231 std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
235 // Find the first InputSection after BEGIN whose equivalence class differs
236 size_t ICF::findBoundary(size_t begin, size_t end) {
237 uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
238 for (size_t i = begin + 1; i < end; ++i)
239 if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
244 // Invoke FUNC on subranges with matching equivalence class
245 void ICF::forEachClassRange(size_t begin, size_t end,
246 llvm::function_ref<void(size_t, size_t)> func) {
247 while (begin < end) {
248 size_t mid = findBoundary(begin, end);
254 // Split icfInputs into shards, then parallelize invocation of FUNC on subranges
255 // with matching equivalence class
256 void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
257 // Only use threads when the benefits outweigh the overhead.
258 const size_t threadingThreshold = 1024;
259 if (icfInputs.size() < threadingThreshold) {
260 forEachClassRange(0, icfInputs.size(), func);
265 // Shard into non-overlapping intervals, and call FUNC in parallel. The
266 // sharding must be completed before any calls to FUNC are made so that FUNC
267 // can modify the InputSection in its shard without causing data races.
268 const size_t shards = 256;
269 size_t step = icfInputs.size() / shards;
270 size_t boundaries[shards + 1];
272 boundaries[shards] = icfInputs.size();
273 parallelFor(1, shards, [&](size_t i) {
274 boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
276 parallelFor(1, shards + 1, [&](size_t i) {
277 if (boundaries[i - 1] < boundaries[i]) {
278 forEachClassRange(boundaries[i - 1], boundaries[i], func);
285 // Into each origin-section hash, combine all reloc referent section hashes.
286 for (icfPass = 0; icfPass < 2; ++icfPass) {
287 parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
288 uint32_t hash = isec->icfEqClass[icfPass % 2];
289 for (const Reloc &r : isec->relocs) {
290 if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
291 if (auto *defined = dyn_cast<Defined>(sym)) {
293 if (auto referentIsec =
294 dyn_cast<ConcatInputSection>(defined->isec))
295 hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
297 hash += defined->isec->kind() +
298 defined->isec->getOffset(defined->value);
300 hash += defined->value;
303 // ICF runs before Undefined diags
304 assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym));
308 // Set MSB to 1 to avoid collisions with non-hashed classes.
309 isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);
314 icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
315 return a->icfEqClass[0] < b->icfEqClass[0];
317 forEachClass([&](size_t begin, size_t end) {
318 segregate(begin, end, &ICF::equalsConstant);
321 // Split equivalence groups by comparing relocations until convergence
324 forEachClass([&](size_t begin, size_t end) {
325 segregate(begin, end, &ICF::equalsVariable);
328 log("ICF needed " + Twine(icfPass) + " iterations");
329 if (verboseDiagnostics) {
330 log("equalsConstant() called " + Twine(equalsConstantCount) + " times");
331 log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
334 // Fold sections within equivalence classes
335 forEachClass([&](size_t begin, size_t end) {
338 ConcatInputSection *beginIsec = icfInputs[begin];
339 for (size_t i = begin + 1; i < end; ++i)
340 beginIsec->foldIdentical(icfInputs[i]);
344 // Split an equivalence class into smaller classes.
345 void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
346 while (begin < end) {
347 // Divide [begin, end) into two. Let mid be the start index of the
349 auto bound = std::stable_partition(
350 icfInputs.begin() + begin + 1, icfInputs.begin() + end,
351 [&](ConcatInputSection *isec) {
352 return (this->*equals)(icfInputs[begin], isec);
354 size_t mid = bound - icfInputs.begin();
356 // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
357 // equivalence class ID because every group ends with a unique index.
358 for (size_t i = begin; i < mid; ++i)
359 icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
361 // If we created a group, we need to iterate the main loop again.
369 void macho::markSymAsAddrSig(Symbol *s) {
370 if (auto *d = dyn_cast_or_null<Defined>(s))
372 d->isec->keepUnique = true;
375 void macho::markAddrSigSymbols() {
376 TimeTraceScope timeScope("Mark addrsig symbols");
377 for (InputFile *file : inputFiles) {
378 ObjFile *obj = dyn_cast<ObjFile>(file);
382 Section *addrSigSection = obj->addrSigSection;
385 assert(addrSigSection->subsections.size() == 1);
387 const InputSection *isec = addrSigSection->subsections[0].isec;
389 for (const Reloc &r : isec->relocs) {
390 if (auto *sym = r.referent.dyn_cast<Symbol *>())
391 markSymAsAddrSig(sym);
393 error(toString(isec) + ": unexpected section relocation");
398 void macho::foldIdenticalSections(bool onlyCfStrings) {
399 TimeTraceScope timeScope("Fold Identical Code Sections");
400 // The ICF equivalence-class segregation algorithm relies on pre-computed
401 // hashes of InputSection::data for the ConcatOutputSection::inputs and all
402 // sections referenced by their relocs. We could recursively traverse the
403 // relocs to find every referenced InputSection, but that precludes easy
404 // parallelization. Therefore, we hash every InputSection here where we have
405 // them all accessible as simple vectors.
407 // If an InputSection is ineligible for ICF, we give it a unique ID to force
408 // it into an unfoldable singleton equivalence class. Begin the unique-ID
409 // space at inputSections.size(), so that it will never intersect with
410 // equivalence-class IDs which begin at 0. Since hashes & unique IDs never
411 // coexist with equivalence-class IDs, this is not necessary, but might help
412 // someone keep the numbers straight in case we ever need to debug the
414 std::vector<ConcatInputSection *> hashable;
415 uint64_t icfUniqueID = inputSections.size();
416 for (ConcatInputSection *isec : inputSections) {
417 // FIXME: consider non-code __text sections as hashable?
419 (!onlyCfStrings || isCfStringSection(isec)) &&
420 (isCodeSection(isec) || isCfStringSection(isec) ||
421 isClassRefsSection(isec) || isGccExceptTabSection(isec)) &&
422 !isec->keepUnique && !isec->shouldOmitFromOutput() &&
423 sectionType(isec->getFlags()) == MachO::S_REGULAR;
425 hashable.push_back(isec);
426 for (Defined *d : isec->symbols)
428 hashable.push_back(d->unwindEntry);
430 // __cfstring has embedded addends that foil ICF's hashing / equality
431 // checks. (We can ignore embedded addends when doing ICF because the same
432 // information gets recorded in our Reloc structs.) We therefore create a
433 // mutable copy of the CFString and zero out the embedded addends before
434 // performing any hashing / equality checks.
435 if (isCfStringSection(isec) || isClassRefsSection(isec)) {
436 // We have to do this copying serially as the BumpPtrAllocator is not
437 // thread-safe. FIXME: Make a thread-safe allocator.
438 MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());
439 for (const Reloc &r : isec->relocs)
440 target->relocateOne(copy.data() + r.offset, r, /*va=*/0,
444 } else if (!isEhFrameSection(isec)) {
445 // EH frames are gathered as hashables from unwindEntry above; give a
446 // unique ID to everything else.
447 isec->icfEqClass[0] = ++icfUniqueID;
450 parallelForEach(hashable, [](ConcatInputSection *isec) {
451 assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
452 // Turn-on the top bit to guarantee that valid hashes have no collisions
453 // with the small-integer unique IDs for ICF-ineligible sections
454 isec->icfEqClass[0] = xxHash64(isec->data) | (1ull << 31);
456 // Now that every input section is either hashed or marked as unique, run the
457 // segregation algorithm to detect foldable subsections.