1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the "backend" phase of LTO, i.e. it performs
11 // optimization and code generation on a loaded module. It is generally used
12 // internally by the LTO class but can also be used independently, for example
13 // to implement a standalone ThinLTO backend.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/LTO/LTOBackend.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/Bitcode/BitcodeWriter.h"
24 #include "llvm/IR/LegacyPassManager.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/LTO/LTO.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Object/ModuleSymbolTable.h"
30 #include "llvm/Passes/PassBuilder.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Program.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Target/TargetMachine.h"
40 #include "llvm/Transforms/IPO.h"
41 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
42 #include "llvm/Transforms/Scalar/LoopPassManager.h"
43 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
44 #include "llvm/Transforms/Utils/SplitModule.h"
49 LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
50 errs() << "failed to open " << Path << ": " << Msg << '\n';
55 Error Config::addSaveTemps(std::string OutputFileName,
56 bool UseInputModulePath) {
57 ShouldDiscardValueNames = false;
60 ResolutionFile = llvm::make_unique<raw_fd_ostream>(
61 OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
63 return errorCodeToError(EC);
65 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
66 // Keep track of the hook provided by the linker, which also needs to run.
67 ModuleHookFn LinkerHook = Hook;
68 Hook = [=](unsigned Task, const Module &M) {
69 // If the linker's hook returned false, we need to pass that result
71 if (LinkerHook && !LinkerHook(Task, M))
74 std::string PathPrefix;
75 // If this is the combined module (not a ThinLTO backend compile) or the
76 // user hasn't requested using the input module's path, emit to a file
77 // named from the provided OutputFileName with the Task ID appended.
78 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
79 PathPrefix = OutputFileName;
80 if (Task != (unsigned)-1)
81 PathPrefix += utostr(Task) + ".";
83 PathPrefix = M.getModuleIdentifier() + ".";
84 std::string Path = PathPrefix + PathSuffix + ".bc";
86 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
87 // Because -save-temps is a debugging feature, we report the error
90 reportOpenError(Path, EC.message());
91 WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false);
96 setHook("0.preopt", PreOptModuleHook);
97 setHook("1.promote", PostPromoteModuleHook);
98 setHook("2.internalize", PostInternalizeModuleHook);
99 setHook("3.import", PostImportModuleHook);
100 setHook("4.opt", PostOptModuleHook);
101 setHook("5.precodegen", PreCodeGenModuleHook);
103 CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
104 std::string Path = OutputFileName + "index.bc";
106 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
107 // Because -save-temps is a debugging feature, we report the error
108 // directly and exit.
110 reportOpenError(Path, EC.message());
111 WriteIndexToFile(Index, OS);
113 Path = OutputFileName + "index.dot";
114 raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None);
116 reportOpenError(Path, EC.message());
117 Index.exportToDot(OSDot);
121 return Error::success();
126 std::unique_ptr<TargetMachine>
127 createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
128 StringRef TheTriple = M.getTargetTriple();
129 SubtargetFeatures Features;
130 Features.getDefaultSubtargetFeatures(Triple(TheTriple));
131 for (const std::string &A : Conf.MAttrs)
132 Features.AddFeature(A);
134 Reloc::Model RelocModel;
136 RelocModel = *Conf.RelocModel;
139 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
141 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
142 TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
143 Conf.CodeModel, Conf.CGOptLevel));
146 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
147 unsigned OptLevel, bool IsThinLTO,
148 ModuleSummaryIndex *ExportSummary,
149 const ModuleSummaryIndex *ImportSummary) {
150 Optional<PGOOptions> PGOOpt;
151 if (!Conf.SampleProfile.empty())
152 PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true);
154 PassBuilder PB(TM, PGOOpt);
157 // Parse a custom AA pipeline if asked to.
158 if (!PB.parseAAPipeline(AA, "default"))
159 report_fatal_error("Error parsing default AA pipeline");
161 LoopAnalysisManager LAM(Conf.DebugPassManager);
162 FunctionAnalysisManager FAM(Conf.DebugPassManager);
163 CGSCCAnalysisManager CGAM(Conf.DebugPassManager);
164 ModuleAnalysisManager MAM(Conf.DebugPassManager);
166 // Register the AA manager first so that our version is the one used.
167 FAM.registerPass([&] { return std::move(AA); });
169 // Register all the basic analyses with the managers.
170 PB.registerModuleAnalyses(MAM);
171 PB.registerCGSCCAnalyses(CGAM);
172 PB.registerFunctionAnalyses(FAM);
173 PB.registerLoopAnalyses(LAM);
174 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
176 ModulePassManager MPM(Conf.DebugPassManager);
177 // FIXME (davide): verify the input.
179 PassBuilder::OptimizationLevel OL;
183 llvm_unreachable("Invalid optimization level");
185 OL = PassBuilder::O0;
188 OL = PassBuilder::O1;
191 OL = PassBuilder::O2;
194 OL = PassBuilder::O3;
199 MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager,
202 MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary);
205 // FIXME (davide): verify the output.
208 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
209 std::string PipelineDesc,
210 std::string AAPipelineDesc,
211 bool DisableVerify) {
215 // Parse a custom AA pipeline if asked to.
216 if (!AAPipelineDesc.empty())
217 if (!PB.parseAAPipeline(AA, AAPipelineDesc))
218 report_fatal_error("unable to parse AA pipeline description: " +
221 LoopAnalysisManager LAM;
222 FunctionAnalysisManager FAM;
223 CGSCCAnalysisManager CGAM;
224 ModuleAnalysisManager MAM;
226 // Register the AA manager first so that our version is the one used.
227 FAM.registerPass([&] { return std::move(AA); });
229 // Register all the basic analyses with the managers.
230 PB.registerModuleAnalyses(MAM);
231 PB.registerCGSCCAnalyses(CGAM);
232 PB.registerFunctionAnalyses(FAM);
233 PB.registerLoopAnalyses(LAM);
234 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
236 ModulePassManager MPM;
238 // Always verify the input.
239 MPM.addPass(VerifierPass());
241 // Now, add all the passes we've been requested to.
242 if (!PB.parsePassPipeline(MPM, PipelineDesc))
243 report_fatal_error("unable to parse pass pipeline description: " +
247 MPM.addPass(VerifierPass());
251 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
252 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
253 const ModuleSummaryIndex *ImportSummary) {
254 legacy::PassManager passes;
255 passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
257 PassManagerBuilder PMB;
258 PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
259 PMB.Inliner = createFunctionInliningPass();
260 PMB.ExportSummary = ExportSummary;
261 PMB.ImportSummary = ImportSummary;
262 // Unconditionally verify input since it is not verified before this
263 // point and has unknown origin.
264 PMB.VerifyInput = true;
265 PMB.VerifyOutput = !Conf.DisableVerify;
266 PMB.LoopVectorize = true;
267 PMB.SLPVectorize = true;
268 PMB.OptLevel = Conf.OptLevel;
269 PMB.PGOSampleUse = Conf.SampleProfile;
271 PMB.populateThinLTOPassManager(passes);
273 PMB.populateLTOPassManager(passes);
277 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
278 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
279 const ModuleSummaryIndex *ImportSummary) {
280 // FIXME: Plumb the combined index into the new pass manager.
281 if (!Conf.OptPipeline.empty())
282 runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
284 else if (Conf.UseNewPM)
285 runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
288 runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
289 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
292 void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
293 unsigned Task, Module &Mod) {
294 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
297 std::unique_ptr<ToolOutputFile> DwoOut;
298 SmallString<1024> DwoFile(Conf.DwoPath);
299 if (!Conf.DwoDir.empty()) {
301 if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
302 report_fatal_error("Failed to create directory " + Conf.DwoDir + ": " +
305 DwoFile = Conf.DwoDir;
306 sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
309 if (!DwoFile.empty()) {
311 TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
312 DwoOut = llvm::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::F_None);
314 report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
317 auto Stream = AddStream(Task);
318 legacy::PassManager CodeGenPasses;
319 if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
320 DwoOut ? &DwoOut->os() : nullptr,
322 report_fatal_error("Failed to setup codegen");
323 CodeGenPasses.run(Mod);
329 void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
330 unsigned ParallelCodeGenParallelismLevel,
331 std::unique_ptr<Module> Mod) {
332 ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
333 unsigned ThreadCount = 0;
334 const Target *T = &TM->getTarget();
337 std::move(Mod), ParallelCodeGenParallelismLevel,
338 [&](std::unique_ptr<Module> MPart) {
339 // We want to clone the module in a new context to multi-thread the
340 // codegen. We do it by serializing partition modules to bitcode
341 // (while still on the main thread, in order to avoid data races) and
342 // spinning up new threads which deserialize the partitions into
343 // separate contexts.
344 // FIXME: Provide a more direct way to do this in LLVM.
346 raw_svector_ostream BCOS(BC);
347 WriteBitcodeToFile(*MPart, BCOS);
350 CodegenThreadPool.async(
351 [&](const SmallString<0> &BC, unsigned ThreadId) {
352 LTOLLVMContext Ctx(C);
353 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
354 MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
357 report_fatal_error("Failed to read bitcode");
358 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
360 std::unique_ptr<TargetMachine> TM =
361 createTargetMachine(C, T, *MPartInCtx);
363 codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
365 // Pass BC using std::move to ensure that it get moved rather than
366 // copied into the thread's context.
367 std::move(BC), ThreadCount++);
371 // Because the inner lambda (which runs in a worker thread) captures our local
372 // variables, we need to wait for the worker threads to terminate before we
373 // can leave the function scope.
374 CodegenThreadPool.wait();
377 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
378 if (!C.OverrideTriple.empty())
379 Mod.setTargetTriple(C.OverrideTriple);
380 else if (Mod.getTargetTriple().empty())
381 Mod.setTargetTriple(C.DefaultTriple);
384 const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
386 return make_error<StringError>(Msg, inconvertibleErrorCode());
393 finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) {
394 // Make sure we flush the diagnostic remarks file in case the linker doesn't
395 // call the global destructors before exiting.
397 return Error::success();
398 DiagOutputFile->keep();
399 DiagOutputFile->os().flush();
400 return Error::success();
403 Error lto::backend(Config &C, AddStreamFn AddStream,
404 unsigned ParallelCodeGenParallelismLevel,
405 std::unique_ptr<Module> Mod,
406 ModuleSummaryIndex &CombinedIndex) {
407 Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
409 return TOrErr.takeError();
411 std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod);
413 // Setup optimization remarks.
414 auto DiagFileOrErr = lto::setupOptimizationRemarks(
415 Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
417 return DiagFileOrErr.takeError();
418 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
420 if (!C.CodeGenOnly) {
421 if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
422 /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
423 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
426 if (ParallelCodeGenParallelismLevel == 1) {
427 codegen(C, TM.get(), AddStream, 0, *Mod);
429 splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
432 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
435 static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
436 const ModuleSummaryIndex &Index) {
437 std::vector<GlobalValue*> DeadGVs;
438 for (auto &GV : Mod.global_values())
439 if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID()))
440 if (!Index.isGlobalValueLive(GVS)) {
441 DeadGVs.push_back(&GV);
442 convertToDeclaration(GV);
445 // Now that all dead bodies have been dropped, delete the actual objects
446 // themselves when possible.
447 for (GlobalValue *GV : DeadGVs) {
448 GV->removeDeadConstantUsers();
449 // Might reference something defined in native object (i.e. dropped a
450 // non-prevailing IR def, but we need to keep the declaration).
452 GV->eraseFromParent();
456 Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
457 Module &Mod, const ModuleSummaryIndex &CombinedIndex,
458 const FunctionImporter::ImportMapTy &ImportList,
459 const GVSummaryMapTy &DefinedGlobals,
460 MapVector<StringRef, BitcodeModule> &ModuleMap) {
461 Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
463 return TOrErr.takeError();
465 std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod);
467 // Setup optimization remarks.
468 auto DiagFileOrErr = lto::setupOptimizationRemarks(
469 Mod.getContext(), Conf.RemarksFilename, Conf.RemarksWithHotness, Task);
471 return DiagFileOrErr.takeError();
472 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
474 if (Conf.CodeGenOnly) {
475 codegen(Conf, TM.get(), AddStream, Task, Mod);
476 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
479 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
480 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
482 renameModuleForThinLTO(Mod, CombinedIndex);
484 dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
486 thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
488 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
489 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
491 if (!DefinedGlobals.empty())
492 thinLTOInternalizeModule(Mod, DefinedGlobals);
494 if (Conf.PostInternalizeModuleHook &&
495 !Conf.PostInternalizeModuleHook(Task, Mod))
496 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
498 auto ModuleLoader = [&](StringRef Identifier) {
499 assert(Mod.getContext().isODRUniquingDebugTypes() &&
500 "ODR Type uniquing should be enabled on the context");
501 auto I = ModuleMap.find(Identifier);
502 assert(I != ModuleMap.end());
503 return I->second.getLazyModule(Mod.getContext(),
504 /*ShouldLazyLoadMetadata=*/true,
505 /*IsImporting*/ true);
508 FunctionImporter Importer(CombinedIndex, ModuleLoader);
509 if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
512 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
513 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
515 if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
516 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
517 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
519 codegen(Conf, TM.get(), AddStream, Task, Mod);
520 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));