1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Thin Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
20 #include "llvm/Analysis/ProfileSummaryInfo.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/Bitcode/BitcodeWriter.h"
25 #include "llvm/Bitcode/BitcodeWriterPass.h"
26 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
27 #include "llvm/IR/DiagnosticPrinter.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/LegacyPassManager.h"
30 #include "llvm/IR/Mangler.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/LTO/LTO.h"
33 #include "llvm/Linker/Linker.h"
34 #include "llvm/MC/SubtargetFeature.h"
35 #include "llvm/Object/IRObjectFile.h"
36 #include "llvm/Support/CachePruning.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/Error.h"
39 #include "llvm/Support/Path.h"
40 #include "llvm/Support/SHA1.h"
41 #include "llvm/Support/TargetRegistry.h"
42 #include "llvm/Support/ThreadPool.h"
43 #include "llvm/Support/Threading.h"
44 #include "llvm/Support/ToolOutputFile.h"
45 #include "llvm/Support/VCSRevision.h"
46 #include "llvm/Target/TargetMachine.h"
47 #include "llvm/Transforms/IPO.h"
48 #include "llvm/Transforms/IPO/FunctionImport.h"
49 #include "llvm/Transforms/IPO/Internalize.h"
50 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
51 #include "llvm/Transforms/ObjCARC.h"
52 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
58 #define DEBUG_TYPE "thinlto"
61 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
62 extern cl::opt<bool> LTODiscardValueNames;
63 extern cl::opt<std::string> LTORemarksFilename;
64 extern cl::opt<bool> LTOPassRemarksWithHotness;
70 ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency()));
72 // Simple helper to save temporary files for debug.
73 static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
74 unsigned count, StringRef Suffix) {
77 // User asked to save temps, let dump the bitcode file after import.
78 std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str();
80 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
82 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
83 " to save optimized bitcode\n");
84 WriteBitcodeToFile(&TheModule, OS, /* ShouldPreserveUseListOrder */ true);
87 static const GlobalValueSummary *
88 getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
89 // If there is any strong definition anywhere, get it.
90 auto StrongDefForLinker = llvm::find_if(
91 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
92 auto Linkage = Summary->linkage();
93 return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
94 !GlobalValue::isWeakForLinker(Linkage);
96 if (StrongDefForLinker != GVSummaryList.end())
97 return StrongDefForLinker->get();
98 // Get the first *linker visible* definition for this global in the summary
100 auto FirstDefForLinker = llvm::find_if(
101 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
102 auto Linkage = Summary->linkage();
103 return !GlobalValue::isAvailableExternallyLinkage(Linkage);
105 // Extern templates can be emitted as available_externally.
106 if (FirstDefForLinker == GVSummaryList.end())
108 return FirstDefForLinker->get();
111 // Populate map of GUID to the prevailing copy for any multiply defined
112 // symbols. Currently assume first copy is prevailing, or any strong
113 // definition. Can be refined with Linker information in the future.
114 static void computePrevailingCopies(
115 const ModuleSummaryIndex &Index,
116 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) {
117 auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) {
118 return GVSummaryList.size() > 1;
121 for (auto &I : Index) {
122 if (HasMultipleCopies(I.second.SummaryList))
123 PrevailingCopy[I.first] =
124 getFirstDefinitionForLinker(I.second.SummaryList);
128 static StringMap<MemoryBufferRef>
129 generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) {
130 StringMap<MemoryBufferRef> ModuleMap;
131 for (auto &ModuleBuffer : Modules) {
132 assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
134 "Expect unique Buffer Identifier");
135 ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer();
140 static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) {
141 if (renameModuleForThinLTO(TheModule, Index))
142 report_fatal_error("renameModuleForThinLTO failed");
145 static std::unique_ptr<Module>
146 loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
147 bool Lazy, bool IsImporting) {
149 Expected<std::unique_ptr<Module>> ModuleOrErr =
151 ? getLazyBitcodeModule(Buffer, Context,
152 /* ShouldLazyLoadMetadata */ true, IsImporting)
153 : parseBitcodeFile(Buffer, Context);
155 handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
156 SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(),
157 SourceMgr::DK_Error, EIB.message());
158 Err.print("ThinLTO", errs());
160 report_fatal_error("Can't load module, abort.");
162 return std::move(ModuleOrErr.get());
166 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
167 StringMap<MemoryBufferRef> &ModuleMap,
168 const FunctionImporter::ImportMapTy &ImportList) {
169 auto Loader = [&](StringRef Identifier) {
170 return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(),
171 /*Lazy=*/true, /*IsImporting*/ true);
174 FunctionImporter Importer(Index, Loader);
175 Expected<bool> Result = Importer.importFunctions(TheModule, ImportList);
177 handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) {
178 SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(),
179 SourceMgr::DK_Error, EIB.message());
180 Err.print("ThinLTO", errs());
182 report_fatal_error("importFunctions failed");
186 static void optimizeModule(Module &TheModule, TargetMachine &TM,
187 unsigned OptLevel, bool Freestanding) {
188 // Populate the PassManager
189 PassManagerBuilder PMB;
190 PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
192 PMB.LibraryInfo->disableAllFunctions();
193 PMB.Inliner = createFunctionInliningPass();
194 // FIXME: should get it from the bitcode?
195 PMB.OptLevel = OptLevel;
196 PMB.LoopVectorize = true;
197 PMB.SLPVectorize = true;
198 PMB.VerifyInput = true;
199 PMB.VerifyOutput = false;
201 legacy::PassManager PM;
203 // Add the TTI (required to inform the vectorizer about register size for
205 PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
208 PMB.populateThinLTOPassManager(PM);
213 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
214 static DenseSet<GlobalValue::GUID>
215 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
216 const Triple &TheTriple) {
217 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
218 for (auto &Entry : PreservedSymbols) {
219 StringRef Name = Entry.first();
220 if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
221 Name = Name.drop_front();
222 GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
224 return GUIDPreservedSymbols;
227 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
229 SmallVector<char, 128> OutputBuffer;
233 raw_svector_ostream OS(OutputBuffer);
234 legacy::PassManager PM;
236 // If the bitcode files contain ARC code and were compiled with optimization,
237 // the ObjCARCContractPass must be run, so do it unconditionally here.
238 PM.add(createObjCARCContractPass());
240 // Setup the codegen now.
241 if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
242 /* DisableVerify */ true))
243 report_fatal_error("Failed to setup codegen");
245 // Run codegen now. resulting binary is in OutputBuffer.
248 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
251 /// Manage caching for a single Module.
252 class ModuleCacheEntry {
253 SmallString<128> EntryPath;
256 // Create a cache entry. This compute a unique hash for the Module considering
257 // the current list of export/import, and offer an interface to query to
258 // access the content in the cache.
260 StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID,
261 const FunctionImporter::ImportMapTy &ImportList,
262 const FunctionImporter::ExportSetTy &ExportList,
263 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
264 const GVSummaryMapTy &DefinedFunctions,
265 const DenseSet<GlobalValue::GUID> &PreservedSymbols, unsigned OptLevel,
266 bool Freestanding, const TargetMachineBuilder &TMBuilder) {
267 if (CachePath.empty())
270 if (!Index.modulePaths().count(ModuleID))
271 // The module does not have an entry, it can't have a hash at all
274 // Compute the unique hash for this entry
275 // This is based on the current compiler version, the module itself, the
276 // export list, the hash for every single module in the import list, the
277 // list of ResolvedODR for the module, and the list of preserved symbols.
279 // Include the hash for the current module
280 auto ModHash = Index.getModuleHash(ModuleID);
282 if (all_of(ModHash, [](uint32_t V) { return V == 0; }))
283 // No hash entry, no caching!
288 // Include the parts of the LTO configuration that affect code generation.
289 auto AddString = [&](StringRef Str) {
291 Hasher.update(ArrayRef<uint8_t>{0});
293 auto AddUnsigned = [&](unsigned I) {
299 Hasher.update(ArrayRef<uint8_t>{Data, 4});
302 // Start with the compiler revision
303 Hasher.update(LLVM_VERSION_STRING);
305 Hasher.update(LLVM_REVISION);
308 // Hash the optimization level and the target machine settings.
309 AddString(TMBuilder.MCpu);
310 // FIXME: Hash more of Options. For now all clients initialize Options from
311 // command-line flags (which is unsupported in production), but may set
312 // RelaxELFRelocations. The clang driver can also pass FunctionSections,
313 // DataSections and DebuggerTuning via command line flags.
314 AddUnsigned(TMBuilder.Options.RelaxELFRelocations);
315 AddUnsigned(TMBuilder.Options.FunctionSections);
316 AddUnsigned(TMBuilder.Options.DataSections);
317 AddUnsigned((unsigned)TMBuilder.Options.DebuggerTuning);
318 AddString(TMBuilder.MAttr);
319 if (TMBuilder.RelocModel)
320 AddUnsigned(*TMBuilder.RelocModel);
321 AddUnsigned(TMBuilder.CGOptLevel);
322 AddUnsigned(OptLevel);
323 AddUnsigned(Freestanding);
325 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
326 for (auto F : ExportList)
327 // The export list can impact the internalization, be conservative here
328 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
330 // Include the hash for every module we import functions from
331 for (auto &Entry : ImportList) {
332 auto ModHash = Index.getModuleHash(Entry.first());
333 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
336 // Include the hash for the resolved ODR.
337 for (auto &Entry : ResolvedODR) {
338 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
339 sizeof(GlobalValue::GUID)));
340 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
341 sizeof(GlobalValue::LinkageTypes)));
344 // Include the hash for the preserved symbols.
345 for (auto &Entry : PreservedSymbols) {
346 if (DefinedFunctions.count(Entry))
348 ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
351 // This choice of file name allows the cache to be pruned (see pruneCache()
352 // in include/llvm/Support/CachePruning.h).
353 sys::path::append(EntryPath, CachePath,
354 "llvmcache-" + toHex(Hasher.result()));
357 // Access the path to this entry in the cache.
358 StringRef getEntryPath() { return EntryPath; }
360 // Try loading the buffer for this cache entry.
361 ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
362 if (EntryPath.empty())
363 return std::error_code();
364 return MemoryBuffer::getFile(EntryPath);
367 // Cache the Produced object file
368 void write(const MemoryBuffer &OutputBuffer) {
369 if (EntryPath.empty())
372 // Write to a temporary to avoid race condition
373 SmallString<128> TempFilename;
376 sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
378 errs() << "Error: " << EC.message() << "\n";
379 report_fatal_error("ThinLTO: Can't get a temporary file");
382 raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
383 OS << OutputBuffer.getBuffer();
385 // Rename to final destination (hopefully race condition won't matter here)
386 EC = sys::fs::rename(TempFilename, EntryPath);
388 sys::fs::remove(TempFilename);
389 raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
391 report_fatal_error(Twine("Failed to open ") + EntryPath +
392 " to save cached entry\n");
393 OS << OutputBuffer.getBuffer();
398 static std::unique_ptr<MemoryBuffer>
399 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
400 StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
401 const FunctionImporter::ImportMapTy &ImportList,
402 const FunctionImporter::ExportSetTy &ExportList,
403 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
404 const GVSummaryMapTy &DefinedGlobals,
405 const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
406 bool DisableCodeGen, StringRef SaveTempsDir,
407 bool Freestanding, unsigned OptLevel, unsigned count) {
409 // "Benchmark"-like optimization: single-source case
410 bool SingleModule = (ModuleMap.size() == 1);
413 promoteModule(TheModule, Index);
415 // Apply summary-based LinkOnce/Weak resolution decisions.
416 thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals);
418 // Save temps: after promotion.
419 saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
422 // Be friendly and don't nuke totally the module when the client didn't
423 // supply anything to preserve.
424 if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) {
425 // Apply summary-based internalization decisions.
426 thinLTOInternalizeModule(TheModule, DefinedGlobals);
429 // Save internalized bitcode
430 saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc");
433 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
435 // Save temps: after cross-module import.
436 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
439 optimizeModule(TheModule, TM, OptLevel, Freestanding);
441 saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
443 if (DisableCodeGen) {
444 // Configured to stop before CodeGen, serialize the bitcode and return.
445 SmallVector<char, 128> OutputBuffer;
447 raw_svector_ostream OS(OutputBuffer);
448 ProfileSummaryInfo PSI(TheModule);
449 auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI);
450 WriteBitcodeToFile(&TheModule, OS, true, &Index);
452 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
455 return codegenModule(TheModule, TM);
458 /// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map
459 /// for caching, and in the \p Index for application during the ThinLTO
460 /// backends. This is needed for correctness for exported symbols (ensure
461 /// at least one copy kept) and a compile-time optimization (to drop duplicate
462 /// copies when possible).
463 static void resolveWeakForLinkerInIndex(
464 ModuleSummaryIndex &Index,
465 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
468 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
469 computePrevailingCopies(Index, PrevailingCopy);
471 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
472 const auto &Prevailing = PrevailingCopy.find(GUID);
473 // Not in map means that there was only one copy, which must be prevailing.
474 if (Prevailing == PrevailingCopy.end())
476 return Prevailing->second == S;
479 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
480 GlobalValue::GUID GUID,
481 GlobalValue::LinkageTypes NewLinkage) {
482 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
485 thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage);
488 // Initialize the TargetMachine builder for a given Triple
489 static void initTMBuilder(TargetMachineBuilder &TMBuilder,
490 const Triple &TheTriple) {
491 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
492 // FIXME this looks pretty terrible...
493 if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
494 if (TheTriple.getArch() == llvm::Triple::x86_64)
495 TMBuilder.MCpu = "core2";
496 else if (TheTriple.getArch() == llvm::Triple::x86)
497 TMBuilder.MCpu = "yonah";
498 else if (TheTriple.getArch() == llvm::Triple::aarch64)
499 TMBuilder.MCpu = "cyclone";
501 TMBuilder.TheTriple = std::move(TheTriple);
504 } // end anonymous namespace
506 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
507 ThinLTOBuffer Buffer(Data, Identifier);
508 if (Modules.empty()) {
509 // First module added, so initialize the triple and some options
512 ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
513 Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
515 TripleStr = *TripleOrErr;
516 Triple TheTriple(TripleStr);
517 initTMBuilder(TMBuilder, Triple(TheTriple));
523 ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
524 Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
526 TripleStr = *TripleOrErr;
527 assert(TMBuilder.TheTriple.str() == TripleStr &&
528 "ThinLTO modules with different triple not supported");
531 Modules.push_back(Buffer);
534 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
535 PreservedSymbols.insert(Name);
538 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
539 // FIXME: At the moment, we don't take advantage of this extra information,
540 // we're conservatively considering cross-references as preserved.
541 // CrossReferencedSymbols.insert(Name);
542 PreservedSymbols.insert(Name);
545 // TargetMachine factory
546 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
548 const Target *TheTarget =
549 TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
551 report_fatal_error("Can't load target for this Triple: " + ErrMsg);
554 // Use MAttr as the default set of features.
555 SubtargetFeatures Features(MAttr);
556 Features.getDefaultSubtargetFeatures(TheTriple);
557 std::string FeatureStr = Features.getString();
559 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
560 TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
561 CodeModel::Default, CGOptLevel));
565 * Produce the combined summary index from all the bitcode files:
568 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
569 std::unique_ptr<ModuleSummaryIndex> CombinedIndex =
570 llvm::make_unique<ModuleSummaryIndex>();
571 uint64_t NextModuleId = 0;
572 for (auto &ModuleBuffer : Modules) {
573 if (Error Err = readModuleSummaryIndex(ModuleBuffer.getMemBuffer(),
574 *CombinedIndex, NextModuleId++)) {
576 logAllUnhandledErrors(
577 std::move(Err), errs(),
578 "error: can't create module summary index for buffer: ");
582 return CombinedIndex;
586 * Perform promotion and renaming of exported internal functions.
587 * Index is updated to reflect linkage changes from weak resolution.
589 void ThinLTOCodeGenerator::promote(Module &TheModule,
590 ModuleSummaryIndex &Index) {
591 auto ModuleCount = Index.modulePaths().size();
592 auto ModuleIdentifier = TheModule.getModuleIdentifier();
594 // Collect for each module the list of function it defines (GUID -> Summary).
595 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
596 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
598 // Convert the preserved symbols set from string to GUID
599 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
600 PreservedSymbols, Triple(TheModule.getTargetTriple()));
602 // Compute "dead" symbols, we don't want to import/export these!
603 auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
605 // Generate import/export list
606 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
607 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
608 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
609 ExportLists, &DeadSymbols);
611 // Resolve LinkOnce/Weak symbols.
612 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
613 resolveWeakForLinkerInIndex(Index, ResolvedODR);
615 thinLTOResolveWeakForLinkerModule(
616 TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
618 // Promote the exported values in the index, so that they are promoted
620 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
621 const auto &ExportList = ExportLists.find(ModuleIdentifier);
622 return (ExportList != ExportLists.end() &&
623 ExportList->second.count(GUID)) ||
624 GUIDPreservedSymbols.count(GUID);
626 thinLTOInternalizeAndPromoteInIndex(Index, isExported);
628 promoteModule(TheModule, Index);
632 * Perform cross-module importing for the module identified by ModuleIdentifier.
634 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
635 ModuleSummaryIndex &Index) {
636 auto ModuleMap = generateModuleMap(Modules);
637 auto ModuleCount = Index.modulePaths().size();
639 // Collect for each module the list of function it defines (GUID -> Summary).
640 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
641 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
643 // Convert the preserved symbols set from string to GUID
644 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
645 PreservedSymbols, Triple(TheModule.getTargetTriple()));
647 // Compute "dead" symbols, we don't want to import/export these!
648 auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
650 // Generate import/export list
651 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
652 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
653 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
654 ExportLists, &DeadSymbols);
655 auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
657 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
661 * Compute the list of summaries needed for importing into module.
663 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
664 StringRef ModulePath, ModuleSummaryIndex &Index,
665 std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
666 auto ModuleCount = Index.modulePaths().size();
668 // Collect for each module the list of function it defines (GUID -> Summary).
669 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
670 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
672 // Generate import/export list
673 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
674 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
675 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
678 llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
679 ImportLists[ModulePath],
680 ModuleToSummariesForIndex);
684 * Emit the list of files needed for importing into module.
686 void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
687 StringRef OutputName,
688 ModuleSummaryIndex &Index) {
689 auto ModuleCount = Index.modulePaths().size();
691 // Collect for each module the list of function it defines (GUID -> Summary).
692 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
693 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
695 // Generate import/export list
696 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
697 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
698 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
702 if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists[ModulePath])))
703 report_fatal_error(Twine("Failed to open ") + OutputName +
704 " to save imports lists\n");
708 * Perform internalization. Index is updated to reflect linkage changes.
710 void ThinLTOCodeGenerator::internalize(Module &TheModule,
711 ModuleSummaryIndex &Index) {
712 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
713 auto ModuleCount = Index.modulePaths().size();
714 auto ModuleIdentifier = TheModule.getModuleIdentifier();
716 // Convert the preserved symbols set from string to GUID
717 auto GUIDPreservedSymbols =
718 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
720 // Collect for each module the list of function it defines (GUID -> Summary).
721 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
722 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
724 // Compute "dead" symbols, we don't want to import/export these!
725 auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
727 // Generate import/export list
728 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
729 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
730 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
731 ExportLists, &DeadSymbols);
732 auto &ExportList = ExportLists[ModuleIdentifier];
734 // Be friendly and don't nuke totally the module when the client didn't
735 // supply anything to preserve.
736 if (ExportList.empty() && GUIDPreservedSymbols.empty())
740 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
741 const auto &ExportList = ExportLists.find(ModuleIdentifier);
742 return (ExportList != ExportLists.end() &&
743 ExportList->second.count(GUID)) ||
744 GUIDPreservedSymbols.count(GUID);
746 thinLTOInternalizeAndPromoteInIndex(Index, isExported);
747 thinLTOInternalizeModule(TheModule,
748 ModuleToDefinedGVSummaries[ModuleIdentifier]);
752 * Perform post-importing ThinLTO optimizations.
754 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
755 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
758 optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
762 * Perform ThinLTO CodeGen.
764 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
765 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
766 return codegenModule(TheModule, *TMBuilder.create());
769 /// Write out the generated object file, either from CacheEntryPath or from
770 /// OutputBuffer, preferring hard-link when possible.
771 /// Returns the path to the generated file in SavedObjectsDirectoryPath.
772 static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
773 StringRef SavedObjectsDirectoryPath,
774 const MemoryBuffer &OutputBuffer) {
775 SmallString<128> OutputPath(SavedObjectsDirectoryPath);
776 llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o");
777 OutputPath.c_str(); // Ensure the string is null terminated.
778 if (sys::fs::exists(OutputPath))
779 sys::fs::remove(OutputPath);
781 // We don't return a memory buffer to the linker, just a list of files.
782 if (!CacheEntryPath.empty()) {
783 // Cache is enabled, hard-link the entry (or copy if hard-link fails).
784 auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath);
786 return OutputPath.str();
787 // Hard linking failed, try to copy.
788 Err = sys::fs::copy_file(CacheEntryPath, OutputPath);
790 return OutputPath.str();
791 // Copy failed (could be because the CacheEntry was removed from the cache
792 // in the meantime by another process), fall back and try to write down the
793 // buffer to the output.
794 errs() << "error: can't link or copy from cached entry '" << CacheEntryPath
795 << "' to '" << OutputPath << "'\n";
797 // No cache entry, just write out the buffer.
799 raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None);
801 report_fatal_error("Can't open output '" + OutputPath + "'\n");
802 OS << OutputBuffer.getBuffer();
803 return OutputPath.str();
806 // Main entry point for the ThinLTO processing
807 void ThinLTOCodeGenerator::run() {
808 // Prepare the resulting object vector
809 assert(ProducedBinaries.empty() && "The generator should not be reused");
810 if (SavedObjectsDirectoryPath.empty())
811 ProducedBinaries.resize(Modules.size());
813 sys::fs::create_directories(SavedObjectsDirectoryPath);
815 sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
817 report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
818 ProducedBinaryFiles.resize(Modules.size());
822 // Perform only parallel codegen and return.
825 for (auto &ModuleBuffer : Modules) {
826 Pool.async([&](int count) {
828 Context.setDiscardValueNames(LTODiscardValueNames);
832 loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
833 /*IsImporting*/ false);
836 auto OutputBuffer = codegen(*TheModule);
837 if (SavedObjectsDirectoryPath.empty())
838 ProducedBinaries[count] = std::move(OutputBuffer);
840 ProducedBinaryFiles[count] = writeGeneratedObject(
841 count, "", SavedObjectsDirectoryPath, *OutputBuffer);
848 // Sequential linking phase
849 auto Index = linkCombinedIndex();
851 // Save temps: index.
852 if (!SaveTempsDir.empty()) {
853 auto SaveTempPath = SaveTempsDir + "index.bc";
855 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
857 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
858 " to save optimized bitcode\n");
859 WriteIndexToFile(*Index, OS);
863 // Prepare the module map.
864 auto ModuleMap = generateModuleMap(Modules);
865 auto ModuleCount = Modules.size();
867 // Collect for each module the list of function it defines (GUID -> Summary).
868 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
869 Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
871 // Convert the preserved symbols set from string to GUID, this is needed for
872 // computing the caching hash and the internalization.
873 auto GUIDPreservedSymbols =
874 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
876 // Compute "dead" symbols, we don't want to import/export these!
877 auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols);
879 // Collect the import/export lists for all modules from the call-graph in the
881 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
882 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
883 ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
884 ExportLists, &DeadSymbols);
886 // We use a std::map here to be able to have a defined ordering when
887 // producing a hash for the cache entry.
888 // FIXME: we should be able to compute the caching hash for the entry based
889 // on the index, and nuke this map.
890 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
892 // Resolve LinkOnce/Weak symbols, this has to be computed early because it
893 // impacts the caching.
894 resolveWeakForLinkerInIndex(*Index, ResolvedODR);
896 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
897 const auto &ExportList = ExportLists.find(ModuleIdentifier);
898 return (ExportList != ExportLists.end() &&
899 ExportList->second.count(GUID)) ||
900 GUIDPreservedSymbols.count(GUID);
903 // Use global summary-based analysis to identify symbols that can be
904 // internalized (because they aren't exported or preserved as per callback).
905 // Changes are made in the index, consumed in the ThinLTO backends.
906 thinLTOInternalizeAndPromoteInIndex(*Index, isExported);
908 // Make sure that every module has an entry in the ExportLists and
909 // ResolvedODR maps to enable threaded access to these maps below.
910 for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
911 ExportLists[DefinedGVSummaries.first()];
912 ResolvedODR[DefinedGVSummaries.first()];
915 // Compute the ordering we will process the inputs: the rough heuristic here
916 // is to sort them per size so that the largest module get schedule as soon as
917 // possible. This is purely a compile-time optimization.
918 std::vector<int> ModulesOrdering;
919 ModulesOrdering.resize(Modules.size());
920 std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
921 std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
922 [&](int LeftIndex, int RightIndex) {
923 auto LSize = Modules[LeftIndex].getBuffer().size();
924 auto RSize = Modules[RightIndex].getBuffer().size();
925 return LSize > RSize;
928 // Parallel optimizer + codegen
930 ThreadPool Pool(ThreadCount);
931 for (auto IndexCount : ModulesOrdering) {
932 auto &ModuleBuffer = Modules[IndexCount];
933 Pool.async([&](int count) {
934 auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
935 auto &ExportList = ExportLists[ModuleIdentifier];
937 auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
939 // The module may be cached, this helps handling it.
940 ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
941 ImportLists[ModuleIdentifier], ExportList,
942 ResolvedODR[ModuleIdentifier],
943 DefinedFunctions, GUIDPreservedSymbols,
944 OptLevel, Freestanding, TMBuilder);
945 auto CacheEntryPath = CacheEntry.getEntryPath();
948 auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
949 DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
950 << CacheEntryPath << "' for buffer " << count << " "
951 << ModuleIdentifier << "\n");
955 if (SavedObjectsDirectoryPath.empty())
956 ProducedBinaries[count] = std::move(ErrOrBuffer.get());
958 ProducedBinaryFiles[count] = writeGeneratedObject(
959 count, CacheEntryPath, SavedObjectsDirectoryPath,
966 Context.setDiscardValueNames(LTODiscardValueNames);
967 Context.enableDebugTypeODRUniquing();
968 auto DiagFileOrErr = lto::setupOptimizationRemarks(
969 Context, LTORemarksFilename, LTOPassRemarksWithHotness, count);
970 if (!DiagFileOrErr) {
971 errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
972 report_fatal_error("ThinLTO: Can't get an output file for the "
978 loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
979 /*IsImporting*/ false);
981 // Save temps: original file.
982 saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
984 auto &ImportList = ImportLists[ModuleIdentifier];
985 // Run the main process now, and generates a binary
986 auto OutputBuffer = ProcessThinLTOModule(
987 *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
988 ExportList, GUIDPreservedSymbols,
989 ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
990 DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count);
992 // Commit to the cache (if enabled)
993 CacheEntry.write(*OutputBuffer);
995 if (SavedObjectsDirectoryPath.empty()) {
996 // We need to generated a memory buffer for the linker.
997 if (!CacheEntryPath.empty()) {
998 // Cache is enabled, reload from the cache
999 // We do this to lower memory pressuree: the buffer is on the heap
1000 // and releasing it frees memory that can be used for the next input
1001 // file. The final binary link will read from the VFS cache
1002 // (hopefully!) or from disk if the memory pressure wasn't too high.
1003 auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
1004 if (auto EC = ReloadedBufferOrErr.getError()) {
1005 // On error, keeping the preexisting buffer and printing a
1006 // diagnostic is more friendly than just crashing.
1007 errs() << "error: can't reload cached file '" << CacheEntryPath
1008 << "': " << EC.message() << "\n";
1010 OutputBuffer = std::move(*ReloadedBufferOrErr);
1013 ProducedBinaries[count] = std::move(OutputBuffer);
1016 ProducedBinaryFiles[count] = writeGeneratedObject(
1017 count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer);
1022 pruneCache(CacheOptions.Path, CacheOptions.Policy);
1024 // If statistics were requested, print them out now.
1025 if (llvm::AreStatisticsEnabled())
1026 llvm::PrintStatistics();
1027 reportAndResetTimings();