1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Thin Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
20 #include "llvm/Analysis/ProfileSummaryInfo.h"
21 #include "llvm/Analysis/TargetLibraryInfo.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/Bitcode/BitcodeWriter.h"
25 #include "llvm/Bitcode/BitcodeWriterPass.h"
26 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
27 #include "llvm/IR/DiagnosticPrinter.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/IR/LegacyPassManager.h"
30 #include "llvm/IR/Mangler.h"
31 #include "llvm/IRReader/IRReader.h"
32 #include "llvm/LTO/LTO.h"
33 #include "llvm/Linker/Linker.h"
34 #include "llvm/MC/SubtargetFeature.h"
35 #include "llvm/Object/IRObjectFile.h"
36 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
37 #include "llvm/Support/CachePruning.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/SHA1.h"
42 #include "llvm/Support/TargetRegistry.h"
43 #include "llvm/Support/ThreadPool.h"
44 #include "llvm/Support/Threading.h"
45 #include "llvm/Support/ToolOutputFile.h"
46 #include "llvm/Support/VCSRevision.h"
47 #include "llvm/Target/TargetMachine.h"
48 #include "llvm/Transforms/IPO.h"
49 #include "llvm/Transforms/IPO/FunctionImport.h"
50 #include "llvm/Transforms/IPO/Internalize.h"
51 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
52 #include "llvm/Transforms/ObjCARC.h"
53 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
59 #define DEBUG_TYPE "thinlto"
62 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
63 extern cl::opt<bool> LTODiscardValueNames;
64 extern cl::opt<std::string> LTORemarksFilename;
65 extern cl::opt<bool> LTOPassRemarksWithHotness;
71 ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency()));
73 // Simple helper to save temporary files for debug.
74 static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
75 unsigned count, StringRef Suffix) {
78 // User asked to save temps, let dump the bitcode file after import.
79 std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str();
81 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
83 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
84 " to save optimized bitcode\n");
85 WriteBitcodeToFile(&TheModule, OS, /* ShouldPreserveUseListOrder */ true);
88 static const GlobalValueSummary *
89 getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
90 // If there is any strong definition anywhere, get it.
91 auto StrongDefForLinker = llvm::find_if(
92 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
93 auto Linkage = Summary->linkage();
94 return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
95 !GlobalValue::isWeakForLinker(Linkage);
97 if (StrongDefForLinker != GVSummaryList.end())
98 return StrongDefForLinker->get();
99 // Get the first *linker visible* definition for this global in the summary
101 auto FirstDefForLinker = llvm::find_if(
102 GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
103 auto Linkage = Summary->linkage();
104 return !GlobalValue::isAvailableExternallyLinkage(Linkage);
106 // Extern templates can be emitted as available_externally.
107 if (FirstDefForLinker == GVSummaryList.end())
109 return FirstDefForLinker->get();
112 // Populate map of GUID to the prevailing copy for any multiply defined
113 // symbols. Currently assume first copy is prevailing, or any strong
114 // definition. Can be refined with Linker information in the future.
115 static void computePrevailingCopies(
116 const ModuleSummaryIndex &Index,
117 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) {
118 auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) {
119 return GVSummaryList.size() > 1;
122 for (auto &I : Index) {
123 if (HasMultipleCopies(I.second))
124 PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second);
128 static StringMap<MemoryBufferRef>
129 generateModuleMap(const std::vector<ThinLTOBuffer> &Modules) {
130 StringMap<MemoryBufferRef> ModuleMap;
131 for (auto &ModuleBuffer : Modules) {
132 assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
134 "Expect unique Buffer Identifier");
135 ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer.getMemBuffer();
140 static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) {
141 if (renameModuleForThinLTO(TheModule, Index))
142 report_fatal_error("renameModuleForThinLTO failed");
145 static std::unique_ptr<Module>
146 loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
147 bool Lazy, bool IsImporting) {
149 Expected<std::unique_ptr<Module>> ModuleOrErr =
151 ? getLazyBitcodeModule(Buffer, Context,
152 /* ShouldLazyLoadMetadata */ true, IsImporting)
153 : parseBitcodeFile(Buffer, Context);
155 handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
156 SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(),
157 SourceMgr::DK_Error, EIB.message());
158 Err.print("ThinLTO", errs());
160 report_fatal_error("Can't load module, abort.");
162 return std::move(ModuleOrErr.get());
166 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
167 StringMap<MemoryBufferRef> &ModuleMap,
168 const FunctionImporter::ImportMapTy &ImportList) {
169 auto Loader = [&](StringRef Identifier) {
170 return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(),
171 /*Lazy=*/true, /*IsImporting*/ true);
174 FunctionImporter Importer(Index, Loader);
175 Expected<bool> Result = Importer.importFunctions(TheModule, ImportList);
177 handleAllErrors(Result.takeError(), [&](ErrorInfoBase &EIB) {
178 SMDiagnostic Err = SMDiagnostic(TheModule.getModuleIdentifier(),
179 SourceMgr::DK_Error, EIB.message());
180 Err.print("ThinLTO", errs());
182 report_fatal_error("importFunctions failed");
186 static void optimizeModule(Module &TheModule, TargetMachine &TM,
187 unsigned OptLevel, bool Freestanding) {
188 // Populate the PassManager
189 PassManagerBuilder PMB;
190 PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
192 PMB.LibraryInfo->disableAllFunctions();
193 PMB.Inliner = createFunctionInliningPass();
194 // FIXME: should get it from the bitcode?
195 PMB.OptLevel = OptLevel;
196 PMB.LoopVectorize = true;
197 PMB.SLPVectorize = true;
198 PMB.VerifyInput = true;
199 PMB.VerifyOutput = false;
201 legacy::PassManager PM;
203 // Add the TTI (required to inform the vectorizer about register size for
205 PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
208 PMB.populateThinLTOPassManager(PM);
213 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
214 static DenseSet<GlobalValue::GUID>
215 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
216 const Triple &TheTriple) {
217 DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
218 for (auto &Entry : PreservedSymbols) {
219 StringRef Name = Entry.first();
220 if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
221 Name = Name.drop_front();
222 GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
224 return GUIDPreservedSymbols;
227 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
229 SmallVector<char, 128> OutputBuffer;
233 raw_svector_ostream OS(OutputBuffer);
234 legacy::PassManager PM;
236 // If the bitcode files contain ARC code and were compiled with optimization,
237 // the ObjCARCContractPass must be run, so do it unconditionally here.
238 PM.add(createObjCARCContractPass());
240 // Setup the codegen now.
241 if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
242 /* DisableVerify */ true))
243 report_fatal_error("Failed to setup codegen");
245 // Run codegen now. resulting binary is in OutputBuffer.
248 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
251 /// Manage caching for a single Module.
252 class ModuleCacheEntry {
253 SmallString<128> EntryPath;
256 // Create a cache entry. This compute a unique hash for the Module considering
257 // the current list of export/import, and offer an interface to query to
258 // access the content in the cache.
260 StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID,
261 const FunctionImporter::ImportMapTy &ImportList,
262 const FunctionImporter::ExportSetTy &ExportList,
263 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
264 const GVSummaryMapTy &DefinedFunctions,
265 const DenseSet<GlobalValue::GUID> &PreservedSymbols, unsigned OptLevel,
266 bool Freestanding, const TargetMachineBuilder &TMBuilder) {
267 if (CachePath.empty())
270 if (!Index.modulePaths().count(ModuleID))
271 // The module does not have an entry, it can't have a hash at all
274 // Compute the unique hash for this entry
275 // This is based on the current compiler version, the module itself, the
276 // export list, the hash for every single module in the import list, the
277 // list of ResolvedODR for the module, and the list of preserved symbols.
279 // Include the hash for the current module
280 auto ModHash = Index.getModuleHash(ModuleID);
282 if (all_of(ModHash, [](uint32_t V) { return V == 0; }))
283 // No hash entry, no caching!
288 // Include the parts of the LTO configuration that affect code generation.
289 auto AddString = [&](StringRef Str) {
291 Hasher.update(ArrayRef<uint8_t>{0});
293 auto AddUnsigned = [&](unsigned I) {
299 Hasher.update(ArrayRef<uint8_t>{Data, 4});
302 // Start with the compiler revision
303 Hasher.update(LLVM_VERSION_STRING);
305 Hasher.update(LLVM_REVISION);
308 // Hash the optimization level and the target machine settings.
309 AddString(TMBuilder.MCpu);
310 // FIXME: Hash more of Options. For now all clients initialize Options from
311 // command-line flags (which is unsupported in production), but may set
312 // RelaxELFRelocations. The clang driver can also pass FunctionSections,
313 // DataSections and DebuggerTuning via command line flags.
314 AddUnsigned(TMBuilder.Options.RelaxELFRelocations);
315 AddUnsigned(TMBuilder.Options.FunctionSections);
316 AddUnsigned(TMBuilder.Options.DataSections);
317 AddUnsigned((unsigned)TMBuilder.Options.DebuggerTuning);
318 AddString(TMBuilder.MAttr);
319 if (TMBuilder.RelocModel)
320 AddUnsigned(*TMBuilder.RelocModel);
321 AddUnsigned(TMBuilder.CGOptLevel);
322 AddUnsigned(OptLevel);
323 AddUnsigned(Freestanding);
325 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
326 for (auto F : ExportList)
327 // The export list can impact the internalization, be conservative here
328 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
330 // Include the hash for every module we import functions from
331 for (auto &Entry : ImportList) {
332 auto ModHash = Index.getModuleHash(Entry.first());
333 Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
336 // Include the hash for the resolved ODR.
337 for (auto &Entry : ResolvedODR) {
338 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
339 sizeof(GlobalValue::GUID)));
340 Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
341 sizeof(GlobalValue::LinkageTypes)));
344 // Include the hash for the preserved symbols.
345 for (auto &Entry : PreservedSymbols) {
346 if (DefinedFunctions.count(Entry))
348 ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
351 // This choice of file name allows the cache to be pruned (see pruneCache()
352 // in include/llvm/Support/CachePruning.h).
353 sys::path::append(EntryPath, CachePath,
354 "llvmcache-" + toHex(Hasher.result()));
357 // Access the path to this entry in the cache.
358 StringRef getEntryPath() { return EntryPath; }
360 // Try loading the buffer for this cache entry.
361 ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
362 if (EntryPath.empty())
363 return std::error_code();
364 return MemoryBuffer::getFile(EntryPath);
367 // Cache the Produced object file
368 void write(const MemoryBuffer &OutputBuffer) {
369 if (EntryPath.empty())
372 // Write to a temporary to avoid race condition
373 SmallString<128> TempFilename;
376 sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
378 errs() << "Error: " << EC.message() << "\n";
379 report_fatal_error("ThinLTO: Can't get a temporary file");
382 raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
383 OS << OutputBuffer.getBuffer();
385 // Rename to final destination (hopefully race condition won't matter here)
386 EC = sys::fs::rename(TempFilename, EntryPath);
388 sys::fs::remove(TempFilename);
389 raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
391 report_fatal_error(Twine("Failed to open ") + EntryPath +
392 " to save cached entry\n");
393 OS << OutputBuffer.getBuffer();
398 static std::unique_ptr<MemoryBuffer>
399 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
400 StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
401 const FunctionImporter::ImportMapTy &ImportList,
402 const FunctionImporter::ExportSetTy &ExportList,
403 const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
404 const GVSummaryMapTy &DefinedGlobals,
405 const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
406 bool DisableCodeGen, StringRef SaveTempsDir,
407 bool Freestanding, unsigned OptLevel, unsigned count) {
409 // "Benchmark"-like optimization: single-source case
410 bool SingleModule = (ModuleMap.size() == 1);
413 promoteModule(TheModule, Index);
415 // Apply summary-based LinkOnce/Weak resolution decisions.
416 thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals);
418 // Save temps: after promotion.
419 saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
422 // Be friendly and don't nuke totally the module when the client didn't
423 // supply anything to preserve.
424 if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) {
425 // Apply summary-based internalization decisions.
426 thinLTOInternalizeModule(TheModule, DefinedGlobals);
429 // Save internalized bitcode
430 saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc");
433 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
435 // Save temps: after cross-module import.
436 saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
439 optimizeModule(TheModule, TM, OptLevel, Freestanding);
441 saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
443 if (DisableCodeGen) {
444 // Configured to stop before CodeGen, serialize the bitcode and return.
445 SmallVector<char, 128> OutputBuffer;
447 raw_svector_ostream OS(OutputBuffer);
448 ProfileSummaryInfo PSI(TheModule);
449 auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr);
450 WriteBitcodeToFile(&TheModule, OS, true, &Index);
452 return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
455 return codegenModule(TheModule, TM);
458 /// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map
459 /// for caching, and in the \p Index for application during the ThinLTO
460 /// backends. This is needed for correctness for exported symbols (ensure
461 /// at least one copy kept) and a compile-time optimization (to drop duplicate
462 /// copies when possible).
463 static void resolveWeakForLinkerInIndex(
464 ModuleSummaryIndex &Index,
465 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
468 DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
469 computePrevailingCopies(Index, PrevailingCopy);
471 auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
472 const auto &Prevailing = PrevailingCopy.find(GUID);
473 // Not in map means that there was only one copy, which must be prevailing.
474 if (Prevailing == PrevailingCopy.end())
476 return Prevailing->second == S;
479 auto recordNewLinkage = [&](StringRef ModuleIdentifier,
480 GlobalValue::GUID GUID,
481 GlobalValue::LinkageTypes NewLinkage) {
482 ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
485 thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage);
488 // Initialize the TargetMachine builder for a given Triple
489 static void initTMBuilder(TargetMachineBuilder &TMBuilder,
490 const Triple &TheTriple) {
491 // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
492 // FIXME this looks pretty terrible...
493 if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
494 if (TheTriple.getArch() == llvm::Triple::x86_64)
495 TMBuilder.MCpu = "core2";
496 else if (TheTriple.getArch() == llvm::Triple::x86)
497 TMBuilder.MCpu = "yonah";
498 else if (TheTriple.getArch() == llvm::Triple::aarch64)
499 TMBuilder.MCpu = "cyclone";
501 TMBuilder.TheTriple = std::move(TheTriple);
504 } // end anonymous namespace
506 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
507 ThinLTOBuffer Buffer(Data, Identifier);
508 if (Modules.empty()) {
509 // First module added, so initialize the triple and some options
512 ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
513 Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
515 TripleStr = *TripleOrErr;
516 Triple TheTriple(TripleStr);
517 initTMBuilder(TMBuilder, Triple(TheTriple));
523 ErrorOr<std::string> TripleOrErr = expectedToErrorOrAndEmitErrors(
524 Context, getBitcodeTargetTriple(Buffer.getMemBuffer()));
526 TripleStr = *TripleOrErr;
527 assert(TMBuilder.TheTriple.str() == TripleStr &&
528 "ThinLTO modules with different triple not supported");
531 Modules.push_back(Buffer);
534 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
535 PreservedSymbols.insert(Name);
538 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
539 // FIXME: At the moment, we don't take advantage of this extra information,
540 // we're conservatively considering cross-references as preserved.
541 // CrossReferencedSymbols.insert(Name);
542 PreservedSymbols.insert(Name);
545 // TargetMachine factory
546 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
548 const Target *TheTarget =
549 TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
551 report_fatal_error("Can't load target for this Triple: " + ErrMsg);
554 // Use MAttr as the default set of features.
555 SubtargetFeatures Features(MAttr);
556 Features.getDefaultSubtargetFeatures(TheTriple);
557 std::string FeatureStr = Features.getString();
559 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
560 TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
561 CodeModel::Default, CGOptLevel));
565 * Produce the combined summary index from all the bitcode files:
568 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
569 std::unique_ptr<ModuleSummaryIndex> CombinedIndex;
570 uint64_t NextModuleId = 0;
571 for (auto &ModuleBuffer : Modules) {
572 Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
573 object::ModuleSummaryIndexObjectFile::create(
574 ModuleBuffer.getMemBuffer());
577 logAllUnhandledErrors(
578 ObjOrErr.takeError(), errs(),
579 "error: can't create ModuleSummaryIndexObjectFile for buffer: ");
582 auto Index = (*ObjOrErr)->takeIndex();
584 CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
586 CombinedIndex = std::move(Index);
589 return CombinedIndex;
593 * Perform promotion and renaming of exported internal functions.
594 * Index is updated to reflect linkage changes from weak resolution.
596 void ThinLTOCodeGenerator::promote(Module &TheModule,
597 ModuleSummaryIndex &Index) {
598 auto ModuleCount = Index.modulePaths().size();
599 auto ModuleIdentifier = TheModule.getModuleIdentifier();
601 // Collect for each module the list of function it defines (GUID -> Summary).
602 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
603 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
605 // Convert the preserved symbols set from string to GUID
606 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
607 PreservedSymbols, Triple(TheModule.getTargetTriple()));
609 // Compute "dead" symbols, we don't want to import/export these!
610 auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
612 // Generate import/export list
613 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
614 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
615 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
616 ExportLists, &DeadSymbols);
618 // Resolve LinkOnce/Weak symbols.
619 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
620 resolveWeakForLinkerInIndex(Index, ResolvedODR);
622 thinLTOResolveWeakForLinkerModule(
623 TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
625 // Promote the exported values in the index, so that they are promoted
627 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
628 const auto &ExportList = ExportLists.find(ModuleIdentifier);
629 return (ExportList != ExportLists.end() &&
630 ExportList->second.count(GUID)) ||
631 GUIDPreservedSymbols.count(GUID);
633 thinLTOInternalizeAndPromoteInIndex(Index, isExported);
635 promoteModule(TheModule, Index);
639 * Perform cross-module importing for the module identified by ModuleIdentifier.
641 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
642 ModuleSummaryIndex &Index) {
643 auto ModuleMap = generateModuleMap(Modules);
644 auto ModuleCount = Index.modulePaths().size();
646 // Collect for each module the list of function it defines (GUID -> Summary).
647 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
648 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
650 // Convert the preserved symbols set from string to GUID
651 auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
652 PreservedSymbols, Triple(TheModule.getTargetTriple()));
654 // Compute "dead" symbols, we don't want to import/export these!
655 auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
657 // Generate import/export list
658 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
659 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
660 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
661 ExportLists, &DeadSymbols);
662 auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
664 crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
668 * Compute the list of summaries needed for importing into module.
670 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
671 StringRef ModulePath, ModuleSummaryIndex &Index,
672 std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
673 auto ModuleCount = Index.modulePaths().size();
675 // Collect for each module the list of function it defines (GUID -> Summary).
676 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
677 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
679 // Generate import/export list
680 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
681 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
682 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
685 llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
686 ImportLists[ModulePath],
687 ModuleToSummariesForIndex);
691 * Emit the list of files needed for importing into module.
693 void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
694 StringRef OutputName,
695 ModuleSummaryIndex &Index) {
696 auto ModuleCount = Index.modulePaths().size();
698 // Collect for each module the list of function it defines (GUID -> Summary).
699 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
700 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
702 // Generate import/export list
703 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
704 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
705 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
709 if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists[ModulePath])))
710 report_fatal_error(Twine("Failed to open ") + OutputName +
711 " to save imports lists\n");
715 * Perform internalization. Index is updated to reflect linkage changes.
717 void ThinLTOCodeGenerator::internalize(Module &TheModule,
718 ModuleSummaryIndex &Index) {
719 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
720 auto ModuleCount = Index.modulePaths().size();
721 auto ModuleIdentifier = TheModule.getModuleIdentifier();
723 // Convert the preserved symbols set from string to GUID
724 auto GUIDPreservedSymbols =
725 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
727 // Collect for each module the list of function it defines (GUID -> Summary).
728 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
729 Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
731 // Compute "dead" symbols, we don't want to import/export these!
732 auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
734 // Generate import/export list
735 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
736 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
737 ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
738 ExportLists, &DeadSymbols);
739 auto &ExportList = ExportLists[ModuleIdentifier];
741 // Be friendly and don't nuke totally the module when the client didn't
742 // supply anything to preserve.
743 if (ExportList.empty() && GUIDPreservedSymbols.empty())
747 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
748 const auto &ExportList = ExportLists.find(ModuleIdentifier);
749 return (ExportList != ExportLists.end() &&
750 ExportList->second.count(GUID)) ||
751 GUIDPreservedSymbols.count(GUID);
753 thinLTOInternalizeAndPromoteInIndex(Index, isExported);
754 thinLTOInternalizeModule(TheModule,
755 ModuleToDefinedGVSummaries[ModuleIdentifier]);
759 * Perform post-importing ThinLTO optimizations.
761 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
762 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
765 optimizeModule(TheModule, *TMBuilder.create(), OptLevel, Freestanding);
769 * Perform ThinLTO CodeGen.
771 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
772 initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
773 return codegenModule(TheModule, *TMBuilder.create());
776 /// Write out the generated object file, either from CacheEntryPath or from
777 /// OutputBuffer, preferring hard-link when possible.
778 /// Returns the path to the generated file in SavedObjectsDirectoryPath.
779 static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
780 StringRef SavedObjectsDirectoryPath,
781 const MemoryBuffer &OutputBuffer) {
782 SmallString<128> OutputPath(SavedObjectsDirectoryPath);
783 llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o");
784 OutputPath.c_str(); // Ensure the string is null terminated.
785 if (sys::fs::exists(OutputPath))
786 sys::fs::remove(OutputPath);
788 // We don't return a memory buffer to the linker, just a list of files.
789 if (!CacheEntryPath.empty()) {
790 // Cache is enabled, hard-link the entry (or copy if hard-link fails).
791 auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath);
793 return OutputPath.str();
794 // Hard linking failed, try to copy.
795 Err = sys::fs::copy_file(CacheEntryPath, OutputPath);
797 return OutputPath.str();
798 // Copy failed (could be because the CacheEntry was removed from the cache
799 // in the meantime by another process), fall back and try to write down the
800 // buffer to the output.
801 errs() << "error: can't link or copy from cached entry '" << CacheEntryPath
802 << "' to '" << OutputPath << "'\n";
804 // No cache entry, just write out the buffer.
806 raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None);
808 report_fatal_error("Can't open output '" + OutputPath + "'\n");
809 OS << OutputBuffer.getBuffer();
810 return OutputPath.str();
813 // Main entry point for the ThinLTO processing
814 void ThinLTOCodeGenerator::run() {
815 // Prepare the resulting object vector
816 assert(ProducedBinaries.empty() && "The generator should not be reused");
817 if (SavedObjectsDirectoryPath.empty())
818 ProducedBinaries.resize(Modules.size());
820 sys::fs::create_directories(SavedObjectsDirectoryPath);
822 sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
824 report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
825 ProducedBinaryFiles.resize(Modules.size());
829 // Perform only parallel codegen and return.
832 for (auto &ModuleBuffer : Modules) {
833 Pool.async([&](int count) {
835 Context.setDiscardValueNames(LTODiscardValueNames);
839 loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
840 /*IsImporting*/ false);
843 auto OutputBuffer = codegen(*TheModule);
844 if (SavedObjectsDirectoryPath.empty())
845 ProducedBinaries[count] = std::move(OutputBuffer);
847 ProducedBinaryFiles[count] = writeGeneratedObject(
848 count, "", SavedObjectsDirectoryPath, *OutputBuffer);
855 // Sequential linking phase
856 auto Index = linkCombinedIndex();
858 // Save temps: index.
859 if (!SaveTempsDir.empty()) {
860 auto SaveTempPath = SaveTempsDir + "index.bc";
862 raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
864 report_fatal_error(Twine("Failed to open ") + SaveTempPath +
865 " to save optimized bitcode\n");
866 WriteIndexToFile(*Index, OS);
870 // Prepare the module map.
871 auto ModuleMap = generateModuleMap(Modules);
872 auto ModuleCount = Modules.size();
874 // Collect for each module the list of function it defines (GUID -> Summary).
875 StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
876 Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
878 // Convert the preserved symbols set from string to GUID, this is needed for
879 // computing the caching hash and the internalization.
880 auto GUIDPreservedSymbols =
881 computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
883 // Compute "dead" symbols, we don't want to import/export these!
884 auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols);
886 // Collect the import/export lists for all modules from the call-graph in the
888 StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
889 StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
890 ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
891 ExportLists, &DeadSymbols);
893 // We use a std::map here to be able to have a defined ordering when
894 // producing a hash for the cache entry.
895 // FIXME: we should be able to compute the caching hash for the entry based
896 // on the index, and nuke this map.
897 StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
899 // Resolve LinkOnce/Weak symbols, this has to be computed early because it
900 // impacts the caching.
901 resolveWeakForLinkerInIndex(*Index, ResolvedODR);
903 auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
904 const auto &ExportList = ExportLists.find(ModuleIdentifier);
905 return (ExportList != ExportLists.end() &&
906 ExportList->second.count(GUID)) ||
907 GUIDPreservedSymbols.count(GUID);
910 // Use global summary-based analysis to identify symbols that can be
911 // internalized (because they aren't exported or preserved as per callback).
912 // Changes are made in the index, consumed in the ThinLTO backends.
913 thinLTOInternalizeAndPromoteInIndex(*Index, isExported);
915 // Make sure that every module has an entry in the ExportLists and
916 // ResolvedODR maps to enable threaded access to these maps below.
917 for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
918 ExportLists[DefinedGVSummaries.first()];
919 ResolvedODR[DefinedGVSummaries.first()];
922 // Compute the ordering we will process the inputs: the rough heuristic here
923 // is to sort them per size so that the largest module get schedule as soon as
924 // possible. This is purely a compile-time optimization.
925 std::vector<int> ModulesOrdering;
926 ModulesOrdering.resize(Modules.size());
927 std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
928 std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
929 [&](int LeftIndex, int RightIndex) {
930 auto LSize = Modules[LeftIndex].getBuffer().size();
931 auto RSize = Modules[RightIndex].getBuffer().size();
932 return LSize > RSize;
935 // Parallel optimizer + codegen
937 ThreadPool Pool(ThreadCount);
938 for (auto IndexCount : ModulesOrdering) {
939 auto &ModuleBuffer = Modules[IndexCount];
940 Pool.async([&](int count) {
941 auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
942 auto &ExportList = ExportLists[ModuleIdentifier];
944 auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
946 // The module may be cached, this helps handling it.
947 ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
948 ImportLists[ModuleIdentifier], ExportList,
949 ResolvedODR[ModuleIdentifier],
950 DefinedFunctions, GUIDPreservedSymbols,
951 OptLevel, Freestanding, TMBuilder);
952 auto CacheEntryPath = CacheEntry.getEntryPath();
955 auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
956 DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
957 << CacheEntryPath << "' for buffer " << count << " "
958 << ModuleIdentifier << "\n");
962 if (SavedObjectsDirectoryPath.empty())
963 ProducedBinaries[count] = std::move(ErrOrBuffer.get());
965 ProducedBinaryFiles[count] = writeGeneratedObject(
966 count, CacheEntryPath, SavedObjectsDirectoryPath,
973 Context.setDiscardValueNames(LTODiscardValueNames);
974 Context.enableDebugTypeODRUniquing();
975 auto DiagFileOrErr = lto::setupOptimizationRemarks(
976 Context, LTORemarksFilename, LTOPassRemarksWithHotness, count);
977 if (!DiagFileOrErr) {
978 errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
979 report_fatal_error("ThinLTO: Can't get an output file for the "
985 loadModuleFromBuffer(ModuleBuffer.getMemBuffer(), Context, false,
986 /*IsImporting*/ false);
988 // Save temps: original file.
989 saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
991 auto &ImportList = ImportLists[ModuleIdentifier];
992 // Run the main process now, and generates a binary
993 auto OutputBuffer = ProcessThinLTOModule(
994 *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
995 ExportList, GUIDPreservedSymbols,
996 ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
997 DisableCodeGen, SaveTempsDir, Freestanding, OptLevel, count);
999 // Commit to the cache (if enabled)
1000 CacheEntry.write(*OutputBuffer);
1002 if (SavedObjectsDirectoryPath.empty()) {
1003 // We need to generated a memory buffer for the linker.
1004 if (!CacheEntryPath.empty()) {
1005 // Cache is enabled, reload from the cache
1006 // We do this to lower memory pressuree: the buffer is on the heap
1007 // and releasing it frees memory that can be used for the next input
1008 // file. The final binary link will read from the VFS cache
1009 // (hopefully!) or from disk if the memory pressure wasn't too high.
1010 auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
1011 if (auto EC = ReloadedBufferOrErr.getError()) {
1012 // On error, keeping the preexisting buffer and printing a
1013 // diagnostic is more friendly than just crashing.
1014 errs() << "error: can't reload cached file '" << CacheEntryPath
1015 << "': " << EC.message() << "\n";
1017 OutputBuffer = std::move(*ReloadedBufferOrErr);
1020 ProducedBinaries[count] = std::move(OutputBuffer);
1023 ProducedBinaryFiles[count] = writeGeneratedObject(
1024 count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer);
1029 pruneCache(CacheOptions.Path, CacheOptions.Policy);
1031 // If statistics were requested, print them out now.
1032 if (llvm::AreStatisticsEnabled())
1033 llvm::PrintStatistics();