1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the "backend" phase of LTO, i.e. it performs
11 // optimization and code generation on a loaded module. It is generally used
12 // internally by the LTO class but can also be used independently, for example
13 // to implement a standalone ThinLTO backend.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/LTO/LTOBackend.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/Bitcode/BitcodeWriter.h"
24 #include "llvm/IR/LegacyPassManager.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/LTO/LTO.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Object/ModuleSymbolTable.h"
30 #include "llvm/Passes/PassBuilder.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/Path.h"
35 #include "llvm/Support/Program.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include "llvm/Support/TargetRegistry.h"
38 #include "llvm/Support/ThreadPool.h"
39 #include "llvm/Target/TargetMachine.h"
40 #include "llvm/Transforms/IPO.h"
41 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
42 #include "llvm/Transforms/Scalar/LoopPassManager.h"
43 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
44 #include "llvm/Transforms/Utils/SplitModule.h"
49 LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
50 errs() << "failed to open " << Path << ": " << Msg << '\n';
55 Error Config::addSaveTemps(std::string OutputFileName,
56 bool UseInputModulePath) {
57 ShouldDiscardValueNames = false;
60 ResolutionFile = llvm::make_unique<raw_fd_ostream>(
61 OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
63 return errorCodeToError(EC);
65 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
66 // Keep track of the hook provided by the linker, which also needs to run.
67 ModuleHookFn LinkerHook = Hook;
68 Hook = [=](unsigned Task, const Module &M) {
69 // If the linker's hook returned false, we need to pass that result
71 if (LinkerHook && !LinkerHook(Task, M))
74 std::string PathPrefix;
75 // If this is the combined module (not a ThinLTO backend compile) or the
76 // user hasn't requested using the input module's path, emit to a file
77 // named from the provided OutputFileName with the Task ID appended.
78 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
79 PathPrefix = OutputFileName;
80 if (Task != (unsigned)-1)
81 PathPrefix += utostr(Task) + ".";
83 PathPrefix = M.getModuleIdentifier() + ".";
84 std::string Path = PathPrefix + PathSuffix + ".bc";
86 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
87 // Because -save-temps is a debugging feature, we report the error
90 reportOpenError(Path, EC.message());
91 WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false);
96 setHook("0.preopt", PreOptModuleHook);
97 setHook("1.promote", PostPromoteModuleHook);
98 setHook("2.internalize", PostInternalizeModuleHook);
99 setHook("3.import", PostImportModuleHook);
100 setHook("4.opt", PostOptModuleHook);
101 setHook("5.precodegen", PreCodeGenModuleHook);
103 CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
104 std::string Path = OutputFileName + "index.bc";
106 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
107 // Because -save-temps is a debugging feature, we report the error
108 // directly and exit.
110 reportOpenError(Path, EC.message());
111 WriteIndexToFile(Index, OS);
113 Path = OutputFileName + "index.dot";
114 raw_fd_ostream OSDot(Path, EC, sys::fs::OpenFlags::F_None);
116 reportOpenError(Path, EC.message());
117 Index.exportToDot(OSDot);
121 return Error::success();
126 std::unique_ptr<TargetMachine>
127 createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
128 StringRef TheTriple = M.getTargetTriple();
129 SubtargetFeatures Features;
130 Features.getDefaultSubtargetFeatures(Triple(TheTriple));
131 for (const std::string &A : Conf.MAttrs)
132 Features.AddFeature(A);
134 Reloc::Model RelocModel;
136 RelocModel = *Conf.RelocModel;
139 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
141 Optional<CodeModel::Model> CodeModel;
143 CodeModel = *Conf.CodeModel;
145 CodeModel = M.getCodeModel();
147 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
148 TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
149 CodeModel, Conf.CGOptLevel));
152 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
153 unsigned OptLevel, bool IsThinLTO,
154 ModuleSummaryIndex *ExportSummary,
155 const ModuleSummaryIndex *ImportSummary) {
156 Optional<PGOOptions> PGOOpt;
157 if (!Conf.SampleProfile.empty())
158 PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping,
161 PassBuilder PB(TM, PGOOpt);
164 // Parse a custom AA pipeline if asked to.
165 if (auto Err = PB.parseAAPipeline(AA, "default"))
166 report_fatal_error("Error parsing default AA pipeline");
168 LoopAnalysisManager LAM(Conf.DebugPassManager);
169 FunctionAnalysisManager FAM(Conf.DebugPassManager);
170 CGSCCAnalysisManager CGAM(Conf.DebugPassManager);
171 ModuleAnalysisManager MAM(Conf.DebugPassManager);
173 // Register the AA manager first so that our version is the one used.
174 FAM.registerPass([&] { return std::move(AA); });
176 // Register all the basic analyses with the managers.
177 PB.registerModuleAnalyses(MAM);
178 PB.registerCGSCCAnalyses(CGAM);
179 PB.registerFunctionAnalyses(FAM);
180 PB.registerLoopAnalyses(LAM);
181 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
183 ModulePassManager MPM(Conf.DebugPassManager);
184 // FIXME (davide): verify the input.
186 PassBuilder::OptimizationLevel OL;
190 llvm_unreachable("Invalid optimization level");
192 OL = PassBuilder::O0;
195 OL = PassBuilder::O1;
198 OL = PassBuilder::O2;
201 OL = PassBuilder::O3;
206 MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager,
209 MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary);
212 // FIXME (davide): verify the output.
215 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
216 std::string PipelineDesc,
217 std::string AAPipelineDesc,
218 bool DisableVerify) {
222 // Parse a custom AA pipeline if asked to.
223 if (!AAPipelineDesc.empty())
224 if (auto Err = PB.parseAAPipeline(AA, AAPipelineDesc))
225 report_fatal_error("unable to parse AA pipeline description '" +
226 AAPipelineDesc + "': " + toString(std::move(Err)));
228 LoopAnalysisManager LAM;
229 FunctionAnalysisManager FAM;
230 CGSCCAnalysisManager CGAM;
231 ModuleAnalysisManager MAM;
233 // Register the AA manager first so that our version is the one used.
234 FAM.registerPass([&] { return std::move(AA); });
236 // Register all the basic analyses with the managers.
237 PB.registerModuleAnalyses(MAM);
238 PB.registerCGSCCAnalyses(CGAM);
239 PB.registerFunctionAnalyses(FAM);
240 PB.registerLoopAnalyses(LAM);
241 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
243 ModulePassManager MPM;
245 // Always verify the input.
246 MPM.addPass(VerifierPass());
248 // Now, add all the passes we've been requested to.
249 if (auto Err = PB.parsePassPipeline(MPM, PipelineDesc))
250 report_fatal_error("unable to parse pass pipeline description '" +
251 PipelineDesc + "': " + toString(std::move(Err)));
254 MPM.addPass(VerifierPass());
258 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
259 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
260 const ModuleSummaryIndex *ImportSummary) {
261 legacy::PassManager passes;
262 passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
264 PassManagerBuilder PMB;
265 PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
266 PMB.Inliner = createFunctionInliningPass();
267 PMB.ExportSummary = ExportSummary;
268 PMB.ImportSummary = ImportSummary;
269 // Unconditionally verify input since it is not verified before this
270 // point and has unknown origin.
271 PMB.VerifyInput = true;
272 PMB.VerifyOutput = !Conf.DisableVerify;
273 PMB.LoopVectorize = true;
274 PMB.SLPVectorize = true;
275 PMB.OptLevel = Conf.OptLevel;
276 PMB.PGOSampleUse = Conf.SampleProfile;
278 PMB.populateThinLTOPassManager(passes);
280 PMB.populateLTOPassManager(passes);
284 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
285 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
286 const ModuleSummaryIndex *ImportSummary) {
287 // FIXME: Plumb the combined index into the new pass manager.
288 if (!Conf.OptPipeline.empty())
289 runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
291 else if (Conf.UseNewPM)
292 runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
295 runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
296 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
299 void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
300 unsigned Task, Module &Mod) {
301 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
304 std::unique_ptr<ToolOutputFile> DwoOut;
305 SmallString<1024> DwoFile(Conf.DwoPath);
306 if (!Conf.DwoDir.empty()) {
308 if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
309 report_fatal_error("Failed to create directory " + Conf.DwoDir + ": " +
312 DwoFile = Conf.DwoDir;
313 sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
316 if (!DwoFile.empty()) {
318 TM->Options.MCOptions.SplitDwarfFile = DwoFile.str().str();
319 DwoOut = llvm::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::F_None);
321 report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
324 auto Stream = AddStream(Task);
325 legacy::PassManager CodeGenPasses;
326 if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
327 DwoOut ? &DwoOut->os() : nullptr,
329 report_fatal_error("Failed to setup codegen");
330 CodeGenPasses.run(Mod);
336 void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
337 unsigned ParallelCodeGenParallelismLevel,
338 std::unique_ptr<Module> Mod) {
339 ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
340 unsigned ThreadCount = 0;
341 const Target *T = &TM->getTarget();
344 std::move(Mod), ParallelCodeGenParallelismLevel,
345 [&](std::unique_ptr<Module> MPart) {
346 // We want to clone the module in a new context to multi-thread the
347 // codegen. We do it by serializing partition modules to bitcode
348 // (while still on the main thread, in order to avoid data races) and
349 // spinning up new threads which deserialize the partitions into
350 // separate contexts.
351 // FIXME: Provide a more direct way to do this in LLVM.
353 raw_svector_ostream BCOS(BC);
354 WriteBitcodeToFile(*MPart, BCOS);
357 CodegenThreadPool.async(
358 [&](const SmallString<0> &BC, unsigned ThreadId) {
359 LTOLLVMContext Ctx(C);
360 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
361 MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
364 report_fatal_error("Failed to read bitcode");
365 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
367 std::unique_ptr<TargetMachine> TM =
368 createTargetMachine(C, T, *MPartInCtx);
370 codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
372 // Pass BC using std::move to ensure that it get moved rather than
373 // copied into the thread's context.
374 std::move(BC), ThreadCount++);
378 // Because the inner lambda (which runs in a worker thread) captures our local
379 // variables, we need to wait for the worker threads to terminate before we
380 // can leave the function scope.
381 CodegenThreadPool.wait();
384 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
385 if (!C.OverrideTriple.empty())
386 Mod.setTargetTriple(C.OverrideTriple);
387 else if (Mod.getTargetTriple().empty())
388 Mod.setTargetTriple(C.DefaultTriple);
391 const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
393 return make_error<StringError>(Msg, inconvertibleErrorCode());
400 finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) {
401 // Make sure we flush the diagnostic remarks file in case the linker doesn't
402 // call the global destructors before exiting.
404 return Error::success();
405 DiagOutputFile->keep();
406 DiagOutputFile->os().flush();
407 return Error::success();
410 Error lto::backend(Config &C, AddStreamFn AddStream,
411 unsigned ParallelCodeGenParallelismLevel,
412 std::unique_ptr<Module> Mod,
413 ModuleSummaryIndex &CombinedIndex) {
414 Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
416 return TOrErr.takeError();
418 std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod);
420 // Setup optimization remarks.
421 auto DiagFileOrErr = lto::setupOptimizationRemarks(
422 Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
424 return DiagFileOrErr.takeError();
425 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
427 if (!C.CodeGenOnly) {
428 if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
429 /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr))
430 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
433 if (ParallelCodeGenParallelismLevel == 1) {
434 codegen(C, TM.get(), AddStream, 0, *Mod);
436 splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
439 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
442 static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
443 const ModuleSummaryIndex &Index) {
444 std::vector<GlobalValue*> DeadGVs;
445 for (auto &GV : Mod.global_values())
446 if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID()))
447 if (!Index.isGlobalValueLive(GVS)) {
448 DeadGVs.push_back(&GV);
449 convertToDeclaration(GV);
452 // Now that all dead bodies have been dropped, delete the actual objects
453 // themselves when possible.
454 for (GlobalValue *GV : DeadGVs) {
455 GV->removeDeadConstantUsers();
456 // Might reference something defined in native object (i.e. dropped a
457 // non-prevailing IR def, but we need to keep the declaration).
459 GV->eraseFromParent();
463 Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
464 Module &Mod, const ModuleSummaryIndex &CombinedIndex,
465 const FunctionImporter::ImportMapTy &ImportList,
466 const GVSummaryMapTy &DefinedGlobals,
467 MapVector<StringRef, BitcodeModule> &ModuleMap) {
468 Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
470 return TOrErr.takeError();
472 std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod);
474 // Setup optimization remarks.
475 auto DiagFileOrErr = lto::setupOptimizationRemarks(
476 Mod.getContext(), Conf.RemarksFilename, Conf.RemarksWithHotness, Task);
478 return DiagFileOrErr.takeError();
479 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
481 if (Conf.CodeGenOnly) {
482 codegen(Conf, TM.get(), AddStream, Task, Mod);
483 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
486 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
487 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
489 renameModuleForThinLTO(Mod, CombinedIndex);
491 dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
493 thinLTOResolvePrevailingInModule(Mod, DefinedGlobals);
495 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
496 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
498 if (!DefinedGlobals.empty())
499 thinLTOInternalizeModule(Mod, DefinedGlobals);
501 if (Conf.PostInternalizeModuleHook &&
502 !Conf.PostInternalizeModuleHook(Task, Mod))
503 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
505 auto ModuleLoader = [&](StringRef Identifier) {
506 assert(Mod.getContext().isODRUniquingDebugTypes() &&
507 "ODR Type uniquing should be enabled on the context");
508 auto I = ModuleMap.find(Identifier);
509 assert(I != ModuleMap.end());
510 return I->second.getLazyModule(Mod.getContext(),
511 /*ShouldLazyLoadMetadata=*/true,
512 /*IsImporting*/ true);
515 FunctionImporter Importer(CombinedIndex, ModuleLoader);
516 if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
519 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
520 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
522 if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
523 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
524 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
526 codegen(Conf, TM.get(), AddStream, Task, Mod);
527 return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));