contrib/llvm/tools/lld/ELF/ICF.cpp

   1 //===- ICF.cpp ------------------------------------------------------------===//
   2 //
   3 //                             The LLVM Linker
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // ICF is short for Identical Code Folding. This is a size optimization to
  11 // identify and merge two or more read-only sections (typically functions)
  12 // that happened to have the same contents. It usually reduces output size
  13 // by a few percent.
  14 //
  15 // In ICF, two sections are considered identical if they have the same
  16 // section flags, section data, and relocations. Relocations are tricky,
  17 // because two relocations are considered the same if they have the same
  18 // relocation types, values, and if they point to the same sections *in
  19 // terms of ICF*.
  20 //
  21 // Here is an example. If foo and bar defined below are compiled to the
  22 // same machine instructions, ICF can and should merge the two, although
  23 // their relocations point to each other.
  24 //
  25 //   void foo() { bar(); }
  26 //   void bar() { foo(); }
  27 //
  28 // If you merge the two, their relocations point to the same section and
  29 // thus you know they are mergeable, but how do you know they are
  30 // mergeable in the first place? This is not an easy problem to solve.
  31 //
  32 // What we are doing in LLD is to partition sections into equivalence
  33 // classes. Sections in the same equivalence class when the algorithm
  34 // terminates are considered identical. Here are details:
  35 //
  36 // 1. First, we partition sections using their hash values as keys. Hash
  37 //    values contain section types, section contents and numbers of
  38 //    relocations. During this step, relocation targets are not taken into
  39 //    account. We just put sections that apparently differ into different
  40 //    equivalence classes.
  41 //
  42 // 2. Next, for each equivalence class, we visit sections to compare
  43 //    relocation targets. Relocation targets are considered equivalent if
  44 //    their targets are in the same equivalence class. Sections with
  45 //    different relocation targets are put into different equivalence
  46 //    clases.
  47 //
  48 // 3. If we split an equivalence class in step 2, two relocations
  49 //    previously target the same equivalence class may now target
  50 //    different equivalence classes. Therefore, we repeat step 2 until a
  51 //    convergence is obtained.
  52 //
  53 // 4. For each equivalence class C, pick an arbitrary section in C, and
  54 //    merge all the other sections in C with it.
  55 //
  56 // For small programs, this algorithm needs 3-5 iterations. For large
  57 // programs such as Chromium, it takes more than 20 iterations.
  58 //
  59 // This algorithm was mentioned as an "optimistic algorithm" in [1],
  60 // though gold implements a different algorithm than this.
  61 //
  62 // We parallelize each step so that multiple threads can work on different
  63 // equivalence classes concurrently. That gave us a large performance
  64 // boost when applying ICF on large programs. For example, MSVC link.exe
  65 // or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output
  66 // size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a
  67 // 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still
  68 // faster than MSVC or gold though.
  69 //
  70 // [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding
  71 // in the Gold Linker
  72 // http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf
  73 //
  74 //===----------------------------------------------------------------------===//
  75
  76 #include "ICF.h"
  77 #include "Config.h"
  78 #include "SymbolTable.h"
  79 #include "Symbols.h"
  80 #include "SyntheticSections.h"
  81 #include "Writer.h"
  82 #include "lld/Common/Threads.h"
  83 #include "llvm/ADT/StringExtras.h"
  84 #include "llvm/BinaryFormat/ELF.h"
  85 #include "llvm/Object/ELF.h"
  86 #include "llvm/Support/xxhash.h"
  87 #include <algorithm>
  88 #include <atomic>
  89
  90 using namespace lld;
  91 using namespace lld::elf;
  92 using namespace llvm;
  93 using namespace llvm::ELF;
  94 using namespace llvm::object;
  95
  96 namespace {
  97 template <class ELFT> class ICF {
  98 public:
  99   void run();
 100
 101 private:
 102   void segregate(size_t Begin, size_t End, bool Constant);
 103
 104   template <class RelTy>
 105   bool constantEq(const InputSection *A, ArrayRef<RelTy> RelsA,
 106                   const InputSection *B, ArrayRef<RelTy> RelsB);
 107
 108   template <class RelTy>
 109   bool variableEq(const InputSection *A, ArrayRef<RelTy> RelsA,
 110                   const InputSection *B, ArrayRef<RelTy> RelsB);
 111
 112   bool equalsConstant(const InputSection *A, const InputSection *B);
 113   bool equalsVariable(const InputSection *A, const InputSection *B);
 114
 115   size_t findBoundary(size_t Begin, size_t End);
 116
 117   void forEachClassRange(size_t Begin, size_t End,
 118                          llvm::function_ref<void(size_t, size_t)> Fn);
 119
 120   void forEachClass(llvm::function_ref<void(size_t, size_t)> Fn);
 121
 122   std::vector<InputSection *> Sections;
 123
 124   // We repeat the main loop while `Repeat` is true.
 125   std::atomic<bool> Repeat;
 126
 127   // The main loop counter.
 128   int Cnt = 0;
 129
 130   // We have two locations for equivalence classes. On the first iteration
 131   // of the main loop, Class[0] has a valid value, and Class[1] contains
 132   // garbage. We read equivalence classes from slot 0 and write to slot 1.
 133   // So, Class[0] represents the current class, and Class[1] represents
 134   // the next class. On each iteration, we switch their roles and use them
 135   // alternately.
 136   //
 137   // Why are we doing this? Recall that other threads may be working on
 138   // other equivalence classes in parallel. They may read sections that we
 139   // are updating. We cannot update equivalence classes in place because
 140   // it breaks the invariance that all possibly-identical sections must be
 141   // in the same equivalence class at any moment. In other words, the for
 142   // loop to update equivalence classes is not atomic, and that is
 143   // observable from other threads. By writing new classes to other
 144   // places, we can keep the invariance.
 145   //
 146   // Below, `Current` has the index of the current class, and `Next` has
 147   // the index of the next class. If threading is enabled, they are either
 148   // (0, 1) or (1, 0).
 149   //
 150   // Note on single-thread: if that's the case, they are always (0, 0)
 151   // because we can safely read the next class without worrying about race
 152   // conditions. Using the same location makes this algorithm converge
 153   // faster because it uses results of the same iteration earlier.
 154   int Current = 0;
 155   int Next = 0;
 156 };
 157 }
 158
 159 // Returns true if section S is subject of ICF.
 160 static bool isEligible(InputSection *S) {
 161   if (!S->Live || S->KeepUnique || !(S->Flags & SHF_ALLOC))
 162     return false;
 163
 164   // Don't merge writable sections. .data.rel.ro sections are marked as writable
 165   // but are semantically read-only.
 166   if ((S->Flags & SHF_WRITE) && S->Name != ".data.rel.ro" &&
 167       !S->Name.startswith(".data.rel.ro."))
 168     return false;
 169
 170   // SHF_LINK_ORDER sections are ICF'd as a unit with their dependent sections,
 171   // so we don't consider them for ICF individually.
 172   if (S->Flags & SHF_LINK_ORDER)
 173     return false;
 174
 175   // Don't merge synthetic sections as their Data member is not valid and empty.
 176   // The Data member needs to be valid for ICF as it is used by ICF to determine
 177   // the equality of section contents.
 178   if (isa<SyntheticSection>(S))
 179     return false;
 180
 181   // .init and .fini contains instructions that must be executed to initialize
 182   // and finalize the process. They cannot and should not be merged.
 183   if (S->Name == ".init" || S->Name == ".fini")
 184     return false;
 185
 186   // A user program may enumerate sections named with a C identifier using
 187   // __start_* and __stop_* symbols. We cannot ICF any such sections because
 188   // that could change program semantics.
 189   if (isValidCIdentifier(S->Name))
 190     return false;
 191
 192   return true;
 193 }
 194
 195 // Split an equivalence class into smaller classes.
 196 template <class ELFT>
 197 void ICF<ELFT>::segregate(size_t Begin, size_t End, bool Constant) {
 198   // This loop rearranges sections in [Begin, End) so that all sections
 199   // that are equal in terms of equals{Constant,Variable} are contiguous
 200   // in [Begin, End).
 201   //
 202   // The algorithm is quadratic in the worst case, but that is not an
 203   // issue in practice because the number of the distinct sections in
 204   // each range is usually very small.
 205
 206   while (Begin < End) {
 207     // Divide [Begin, End) into two. Let Mid be the start index of the
 208     // second group.
 209     auto Bound =
 210         std::stable_partition(Sections.begin() + Begin + 1,
 211                               Sections.begin() + End, [&](InputSection *S) {
 212                                 if (Constant)
 213                                   return equalsConstant(Sections[Begin], S);
 214                                 return equalsVariable(Sections[Begin], S);
 215                               });
 216     size_t Mid = Bound - Sections.begin();
 217
 218     // Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by
 219     // updating the sections in [Begin, Mid). We use Mid as an equivalence
 220     // class ID because every group ends with a unique index.
 221     for (size_t I = Begin; I < Mid; ++I)
 222       Sections[I]->Class[Next] = Mid;
 223
 224     // If we created a group, we need to iterate the main loop again.
 225     if (Mid != End)
 226       Repeat = true;
 227
 228     Begin = Mid;
 229   }
 230 }
 231
 232 // Compare two lists of relocations.
 233 template <class ELFT>
 234 template <class RelTy>
 235 bool ICF<ELFT>::constantEq(const InputSection *SecA, ArrayRef<RelTy> RA,
 236                            const InputSection *SecB, ArrayRef<RelTy> RB) {
 237   for (size_t I = 0; I < RA.size(); ++I) {
 238     if (RA[I].r_offset != RB[I].r_offset ||
 239         RA[I].getType(Config->IsMips64EL) != RB[I].getType(Config->IsMips64EL))
 240       return false;
 241
 242     uint64_t AddA = getAddend<ELFT>(RA[I]);
 243     uint64_t AddB = getAddend<ELFT>(RB[I]);
 244
 245     Symbol &SA = SecA->template getFile<ELFT>()->getRelocTargetSym(RA[I]);
 246     Symbol &SB = SecB->template getFile<ELFT>()->getRelocTargetSym(RB[I]);
 247     if (&SA == &SB) {
 248       if (AddA == AddB)
 249         continue;
 250       return false;
 251     }
 252
 253     auto *DA = dyn_cast<Defined>(&SA);
 254     auto *DB = dyn_cast<Defined>(&SB);
 255     if (!DA || !DB)
 256       return false;
 257
 258     // Relocations referring to absolute symbols are constant-equal if their
 259     // values are equal.
 260     if (!DA->Section && !DB->Section && DA->Value + AddA == DB->Value + AddB)
 261       continue;
 262     if (!DA->Section || !DB->Section)
 263       return false;
 264
 265     if (DA->Section->kind() != DB->Section->kind())
 266       return false;
 267
 268     // Relocations referring to InputSections are constant-equal if their
 269     // section offsets are equal.
 270     if (isa<InputSection>(DA->Section)) {
 271       if (DA->Value + AddA == DB->Value + AddB)
 272         continue;
 273       return false;
 274     }
 275
 276     // Relocations referring to MergeInputSections are constant-equal if their
 277     // offsets in the output section are equal.
 278     auto *X = dyn_cast<MergeInputSection>(DA->Section);
 279     if (!X)
 280       return false;
 281     auto *Y = cast<MergeInputSection>(DB->Section);
 282     if (X->getParent() != Y->getParent())
 283       return false;
 284
 285     uint64_t OffsetA =
 286         SA.isSection() ? X->getOffset(AddA) : X->getOffset(DA->Value) + AddA;
 287     uint64_t OffsetB =
 288         SB.isSection() ? Y->getOffset(AddB) : Y->getOffset(DB->Value) + AddB;
 289     if (OffsetA != OffsetB)
 290       return false;
 291   }
 292
 293   return true;
 294 }
 295
 296 // Compare "non-moving" part of two InputSections, namely everything
 297 // except relocation targets.
 298 template <class ELFT>
 299 bool ICF<ELFT>::equalsConstant(const InputSection *A, const InputSection *B) {
 300   if (A->NumRelocations != B->NumRelocations || A->Flags != B->Flags ||
 301       A->getSize() != B->getSize() || A->Data != B->Data)
 302     return false;
 303
 304   // If two sections have different output sections, we cannot merge them.
 305   // FIXME: This doesn't do the right thing in the case where there is a linker
 306   // script. We probably need to move output section assignment before ICF to
 307   // get the correct behaviour here.
 308   if (getOutputSectionName(A) != getOutputSectionName(B))
 309     return false;
 310
 311   if (A->AreRelocsRela)
 312     return constantEq(A, A->template relas<ELFT>(), B,
 313                       B->template relas<ELFT>());
 314   return constantEq(A, A->template rels<ELFT>(), B, B->template rels<ELFT>());
 315 }
 316
 317 // Compare two lists of relocations. Returns true if all pairs of
 318 // relocations point to the same section in terms of ICF.
 319 template <class ELFT>
 320 template <class RelTy>
 321 bool ICF<ELFT>::variableEq(const InputSection *SecA, ArrayRef<RelTy> RA,
 322                            const InputSection *SecB, ArrayRef<RelTy> RB) {
 323   assert(RA.size() == RB.size());
 324
 325   for (size_t I = 0; I < RA.size(); ++I) {
 326     // The two sections must be identical.
 327     Symbol &SA = SecA->template getFile<ELFT>()->getRelocTargetSym(RA[I]);
 328     Symbol &SB = SecB->template getFile<ELFT>()->getRelocTargetSym(RB[I]);
 329     if (&SA == &SB)
 330       continue;
 331
 332     auto *DA = cast<Defined>(&SA);
 333     auto *DB = cast<Defined>(&SB);
 334
 335     // We already dealt with absolute and non-InputSection symbols in
 336     // constantEq, and for InputSections we have already checked everything
 337     // except the equivalence class.
 338     if (!DA->Section)
 339       continue;
 340     auto *X = dyn_cast<InputSection>(DA->Section);
 341     if (!X)
 342       continue;
 343     auto *Y = cast<InputSection>(DB->Section);
 344
 345     // Ineligible sections are in the special equivalence class 0.
 346     // They can never be the same in terms of the equivalence class.
 347     if (X->Class[Current] == 0)
 348       return false;
 349     if (X->Class[Current] != Y->Class[Current])
 350       return false;
 351   };
 352
 353   return true;
 354 }
 355
 356 // Compare "moving" part of two InputSections, namely relocation targets.
 357 template <class ELFT>
 358 bool ICF<ELFT>::equalsVariable(const InputSection *A, const InputSection *B) {
 359   if (A->AreRelocsRela)
 360     return variableEq(A, A->template relas<ELFT>(), B,
 361                       B->template relas<ELFT>());
 362   return variableEq(A, A->template rels<ELFT>(), B, B->template rels<ELFT>());
 363 }
 364
 365 template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t Begin, size_t End) {
 366   uint32_t Class = Sections[Begin]->Class[Current];
 367   for (size_t I = Begin + 1; I < End; ++I)
 368     if (Class != Sections[I]->Class[Current])
 369       return I;
 370   return End;
 371 }
 372
 373 // Sections in the same equivalence class are contiguous in Sections
 374 // vector. Therefore, Sections vector can be considered as contiguous
 375 // groups of sections, grouped by the class.
 376 //
 377 // This function calls Fn on every group within [Begin, End).
 378 template <class ELFT>
 379 void ICF<ELFT>::forEachClassRange(size_t Begin, size_t End,
 380                                   llvm::function_ref<void(size_t, size_t)> Fn) {
 381   while (Begin < End) {
 382     size_t Mid = findBoundary(Begin, End);
 383     Fn(Begin, Mid);
 384     Begin = Mid;
 385   }
 386 }
 387
 388 // Call Fn on each equivalence class.
 389 template <class ELFT>
 390 void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> Fn) {
 391   // If threading is disabled or the number of sections are
 392   // too small to use threading, call Fn sequentially.
 393   if (!ThreadsEnabled || Sections.size() < 1024) {
 394     forEachClassRange(0, Sections.size(), Fn);
 395     ++Cnt;
 396     return;
 397   }
 398
 399   Current = Cnt % 2;
 400   Next = (Cnt + 1) % 2;
 401
 402   // Shard into non-overlapping intervals, and call Fn in parallel.
 403   // The sharding must be completed before any calls to Fn are made
 404   // so that Fn can modify the Chunks in its shard without causing data
 405   // races.
 406   const size_t NumShards = 256;
 407   size_t Step = Sections.size() / NumShards;
 408   size_t Boundaries[NumShards + 1];
 409   Boundaries[0] = 0;
 410   Boundaries[NumShards] = Sections.size();
 411
 412   parallelForEachN(1, NumShards, [&](size_t I) {
 413     Boundaries[I] = findBoundary((I - 1) * Step, Sections.size());
 414   });
 415
 416   parallelForEachN(1, NumShards + 1, [&](size_t I) {
 417     if (Boundaries[I - 1] < Boundaries[I])
 418       forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn);
 419   });
 420   ++Cnt;
 421 }
 422
 423 static void print(const Twine &S) {
 424   if (Config->PrintIcfSections)
 425     message(S);
 426 }
 427
 428 // The main function of ICF.
 429 template <class ELFT> void ICF<ELFT>::run() {
 430   // Collect sections to merge.
 431   for (InputSectionBase *Sec : InputSections)
 432     if (auto *S = dyn_cast<InputSection>(Sec))
 433       if (isEligible(S))
 434         Sections.push_back(S);
 435
 436   // Initially, we use hash values to partition sections.
 437   parallelForEach(Sections, [&](InputSection *S) {
 438     // Set MSB to 1 to avoid collisions with non-hash IDs.
 439     S->Class[0] = xxHash64(S->Data) | (1U << 31);
 440   });
 441
 442   // From now on, sections in Sections vector are ordered so that sections
 443   // in the same equivalence class are consecutive in the vector.
 444   std::stable_sort(Sections.begin(), Sections.end(),
 445                    [](InputSection *A, InputSection *B) {
 446                      return A->Class[0] < B->Class[0];
 447                    });
 448
 449   // Compare static contents and assign unique IDs for each static content.
 450   forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); });
 451
 452   // Split groups by comparing relocations until convergence is obtained.
 453   do {
 454     Repeat = false;
 455     forEachClass(
 456         [&](size_t Begin, size_t End) { segregate(Begin, End, false); });
 457   } while (Repeat);
 458
 459   log("ICF needed " + Twine(Cnt) + " iterations");
 460
 461   // Merge sections by the equivalence class.
 462   forEachClassRange(0, Sections.size(), [&](size_t Begin, size_t End) {
 463     if (End - Begin == 1)
 464       return;
 465     print("selected section " + toString(Sections[Begin]));
 466     for (size_t I = Begin + 1; I < End; ++I) {
 467       print("  removing identical section " + toString(Sections[I]));
 468       Sections[Begin]->replace(Sections[I]);
 469
 470       // At this point we know sections merged are fully identical and hence
 471       // we want to remove duplicate implicit dependencies such as link order
 472       // and relocation sections.
 473       for (InputSection *IS : Sections[I]->DependentSections)
 474         IS->Live = false;
 475     }
 476   });
 477 }
 478
 479 // ICF entry point function.
 480 template <class ELFT> void elf::doIcf() { ICF<ELFT>().run(); }
 481
 482 template void elf::doIcf<ELF32LE>();
 483 template void elf::doIcf<ELF32BE>();
 484 template void elf::doIcf<ELF64LE>();
 485 template void elf::doIcf<ELF64BE>();