1 //===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the "backend" phase of LTO, i.e. it performs
11 // optimization and code generation on a loaded module. It is generally used
12 // internally by the LTO class but can also be used independently, for example
13 // to implement a standalone ThinLTO backend.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/LTO/LTOBackend.h"
18 #include "llvm/Analysis/AliasAnalysis.h"
19 #include "llvm/Analysis/CGSCCPassManager.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/Bitcode/BitcodeWriter.h"
24 #include "llvm/IR/LegacyPassManager.h"
25 #include "llvm/IR/PassManager.h"
26 #include "llvm/IR/Verifier.h"
27 #include "llvm/LTO/LTO.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 #include "llvm/Object/ModuleSymbolTable.h"
30 #include "llvm/Passes/PassBuilder.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/FileSystem.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/ThreadPool.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Transforms/IPO.h"
37 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
38 #include "llvm/Transforms/Scalar/LoopPassManager.h"
39 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
40 #include "llvm/Transforms/Utils/SplitModule.h"
45 LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
46 errs() << "failed to open " << Path << ": " << Msg << '\n';
51 Error Config::addSaveTemps(std::string OutputFileName,
52 bool UseInputModulePath) {
53 ShouldDiscardValueNames = false;
56 ResolutionFile = llvm::make_unique<raw_fd_ostream>(
57 OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
59 return errorCodeToError(EC);
61 auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
62 // Keep track of the hook provided by the linker, which also needs to run.
63 ModuleHookFn LinkerHook = Hook;
64 Hook = [=](unsigned Task, const Module &M) {
65 // If the linker's hook returned false, we need to pass that result
67 if (LinkerHook && !LinkerHook(Task, M))
70 std::string PathPrefix;
71 // If this is the combined module (not a ThinLTO backend compile) or the
72 // user hasn't requested using the input module's path, emit to a file
73 // named from the provided OutputFileName with the Task ID appended.
74 if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
75 PathPrefix = OutputFileName + utostr(Task);
77 PathPrefix = M.getModuleIdentifier();
78 std::string Path = PathPrefix + "." + PathSuffix + ".bc";
80 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
81 // Because -save-temps is a debugging feature, we report the error
84 reportOpenError(Path, EC.message());
85 WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false);
90 setHook("0.preopt", PreOptModuleHook);
91 setHook("1.promote", PostPromoteModuleHook);
92 setHook("2.internalize", PostInternalizeModuleHook);
93 setHook("3.import", PostImportModuleHook);
94 setHook("4.opt", PostOptModuleHook);
95 setHook("5.precodegen", PreCodeGenModuleHook);
97 CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
98 std::string Path = OutputFileName + "index.bc";
100 raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
101 // Because -save-temps is a debugging feature, we report the error
102 // directly and exit.
104 reportOpenError(Path, EC.message());
105 WriteIndexToFile(Index, OS);
109 return Error::success();
114 std::unique_ptr<TargetMachine>
115 createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
116 StringRef TheTriple = M.getTargetTriple();
117 SubtargetFeatures Features;
118 Features.getDefaultSubtargetFeatures(Triple(TheTriple));
119 for (const std::string &A : Conf.MAttrs)
120 Features.AddFeature(A);
122 Reloc::Model RelocModel;
124 RelocModel = *Conf.RelocModel;
127 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
129 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
130 TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel,
131 Conf.CodeModel, Conf.CGOptLevel));
134 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
135 unsigned OptLevel, bool IsThinLTO) {
136 Optional<PGOOptions> PGOOpt;
137 if (!Conf.SampleProfile.empty())
138 PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true);
140 PassBuilder PB(TM, PGOOpt);
143 // Parse a custom AA pipeline if asked to.
144 if (!PB.parseAAPipeline(AA, "default"))
145 report_fatal_error("Error parsing default AA pipeline");
147 LoopAnalysisManager LAM(Conf.DebugPassManager);
148 FunctionAnalysisManager FAM(Conf.DebugPassManager);
149 CGSCCAnalysisManager CGAM(Conf.DebugPassManager);
150 ModuleAnalysisManager MAM(Conf.DebugPassManager);
152 // Register the AA manager first so that our version is the one used.
153 FAM.registerPass([&] { return std::move(AA); });
155 // Register all the basic analyses with the managers.
156 PB.registerModuleAnalyses(MAM);
157 PB.registerCGSCCAnalyses(CGAM);
158 PB.registerFunctionAnalyses(FAM);
159 PB.registerLoopAnalyses(LAM);
160 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
162 ModulePassManager MPM(Conf.DebugPassManager);
163 // FIXME (davide): verify the input.
165 PassBuilder::OptimizationLevel OL;
169 llvm_unreachable("Invalid optimization level");
171 OL = PassBuilder::O0;
174 OL = PassBuilder::O1;
177 OL = PassBuilder::O2;
180 OL = PassBuilder::O3;
185 MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager);
187 MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager);
190 // FIXME (davide): verify the output.
193 static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
194 std::string PipelineDesc,
195 std::string AAPipelineDesc,
196 bool DisableVerify) {
200 // Parse a custom AA pipeline if asked to.
201 if (!AAPipelineDesc.empty())
202 if (!PB.parseAAPipeline(AA, AAPipelineDesc))
203 report_fatal_error("unable to parse AA pipeline description: " +
206 LoopAnalysisManager LAM;
207 FunctionAnalysisManager FAM;
208 CGSCCAnalysisManager CGAM;
209 ModuleAnalysisManager MAM;
211 // Register the AA manager first so that our version is the one used.
212 FAM.registerPass([&] { return std::move(AA); });
214 // Register all the basic analyses with the managers.
215 PB.registerModuleAnalyses(MAM);
216 PB.registerCGSCCAnalyses(CGAM);
217 PB.registerFunctionAnalyses(FAM);
218 PB.registerLoopAnalyses(LAM);
219 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
221 ModulePassManager MPM;
223 // Always verify the input.
224 MPM.addPass(VerifierPass());
226 // Now, add all the passes we've been requested to.
227 if (!PB.parsePassPipeline(MPM, PipelineDesc))
228 report_fatal_error("unable to parse pass pipeline description: " +
232 MPM.addPass(VerifierPass());
236 static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
237 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
238 const ModuleSummaryIndex *ImportSummary) {
239 legacy::PassManager passes;
240 passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
242 PassManagerBuilder PMB;
243 PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
244 PMB.Inliner = createFunctionInliningPass();
245 PMB.ExportSummary = ExportSummary;
246 PMB.ImportSummary = ImportSummary;
247 // Unconditionally verify input since it is not verified before this
248 // point and has unknown origin.
249 PMB.VerifyInput = true;
250 PMB.VerifyOutput = !Conf.DisableVerify;
251 PMB.LoopVectorize = true;
252 PMB.SLPVectorize = true;
253 PMB.OptLevel = Conf.OptLevel;
254 PMB.PGOSampleUse = Conf.SampleProfile;
256 PMB.populateThinLTOPassManager(passes);
258 PMB.populateLTOPassManager(passes);
262 bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
263 bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
264 const ModuleSummaryIndex *ImportSummary) {
265 // FIXME: Plumb the combined index into the new pass manager.
266 if (!Conf.OptPipeline.empty())
267 runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
269 else if (Conf.UseNewPM)
270 runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO);
272 runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
273 return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
276 void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
277 unsigned Task, Module &Mod) {
278 if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
281 auto Stream = AddStream(Task);
282 legacy::PassManager CodeGenPasses;
283 if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS, Conf.CGFileType))
284 report_fatal_error("Failed to setup codegen");
285 CodeGenPasses.run(Mod);
288 void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
289 unsigned ParallelCodeGenParallelismLevel,
290 std::unique_ptr<Module> Mod) {
291 ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
292 unsigned ThreadCount = 0;
293 const Target *T = &TM->getTarget();
296 std::move(Mod), ParallelCodeGenParallelismLevel,
297 [&](std::unique_ptr<Module> MPart) {
298 // We want to clone the module in a new context to multi-thread the
299 // codegen. We do it by serializing partition modules to bitcode
300 // (while still on the main thread, in order to avoid data races) and
301 // spinning up new threads which deserialize the partitions into
302 // separate contexts.
303 // FIXME: Provide a more direct way to do this in LLVM.
305 raw_svector_ostream BCOS(BC);
306 WriteBitcodeToFile(MPart.get(), BCOS);
309 CodegenThreadPool.async(
310 [&](const SmallString<0> &BC, unsigned ThreadId) {
311 LTOLLVMContext Ctx(C);
312 Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
313 MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
316 report_fatal_error("Failed to read bitcode");
317 std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
319 std::unique_ptr<TargetMachine> TM =
320 createTargetMachine(C, T, *MPartInCtx);
322 codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
324 // Pass BC using std::move to ensure that it get moved rather than
325 // copied into the thread's context.
326 std::move(BC), ThreadCount++);
330 // Because the inner lambda (which runs in a worker thread) captures our local
331 // variables, we need to wait for the worker threads to terminate before we
332 // can leave the function scope.
333 CodegenThreadPool.wait();
336 Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
337 if (!C.OverrideTriple.empty())
338 Mod.setTargetTriple(C.OverrideTriple);
339 else if (Mod.getTargetTriple().empty())
340 Mod.setTargetTriple(C.DefaultTriple);
343 const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
345 return make_error<StringError>(Msg, inconvertibleErrorCode());
352 finalizeOptimizationRemarks(std::unique_ptr<ToolOutputFile> DiagOutputFile) {
353 // Make sure we flush the diagnostic remarks file in case the linker doesn't
354 // call the global destructors before exiting.
357 DiagOutputFile->keep();
358 DiagOutputFile->os().flush();
361 Error lto::backend(Config &C, AddStreamFn AddStream,
362 unsigned ParallelCodeGenParallelismLevel,
363 std::unique_ptr<Module> Mod,
364 ModuleSummaryIndex &CombinedIndex) {
365 Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
367 return TOrErr.takeError();
369 std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, *Mod);
371 // Setup optimization remarks.
372 auto DiagFileOrErr = lto::setupOptimizationRemarks(
373 Mod->getContext(), C.RemarksFilename, C.RemarksWithHotness);
375 return DiagFileOrErr.takeError();
376 auto DiagnosticOutputFile = std::move(*DiagFileOrErr);
378 if (!C.CodeGenOnly) {
379 if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false,
380 /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr)) {
381 finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
382 return Error::success();
386 if (ParallelCodeGenParallelismLevel == 1) {
387 codegen(C, TM.get(), AddStream, 0, *Mod);
389 splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
392 finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
393 return Error::success();
396 Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
397 Module &Mod, const ModuleSummaryIndex &CombinedIndex,
398 const FunctionImporter::ImportMapTy &ImportList,
399 const GVSummaryMapTy &DefinedGlobals,
400 MapVector<StringRef, BitcodeModule> &ModuleMap) {
401 Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
403 return TOrErr.takeError();
405 std::unique_ptr<TargetMachine> TM = createTargetMachine(Conf, *TOrErr, Mod);
407 if (Conf.CodeGenOnly) {
408 codegen(Conf, TM.get(), AddStream, Task, Mod);
409 return Error::success();
412 if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
413 return Error::success();
415 renameModuleForThinLTO(Mod, CombinedIndex);
417 thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
419 if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
420 return Error::success();
422 if (!DefinedGlobals.empty())
423 thinLTOInternalizeModule(Mod, DefinedGlobals);
425 if (Conf.PostInternalizeModuleHook &&
426 !Conf.PostInternalizeModuleHook(Task, Mod))
427 return Error::success();
429 auto ModuleLoader = [&](StringRef Identifier) {
430 assert(Mod.getContext().isODRUniquingDebugTypes() &&
431 "ODR Type uniquing should be enabled on the context");
432 auto I = ModuleMap.find(Identifier);
433 assert(I != ModuleMap.end());
434 return I->second.getLazyModule(Mod.getContext(),
435 /*ShouldLazyLoadMetadata=*/true,
436 /*IsImporting*/ true);
439 FunctionImporter Importer(CombinedIndex, ModuleLoader);
440 if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
443 if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
444 return Error::success();
446 if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true,
447 /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex))
448 return Error::success();
450 codegen(Conf, TM.get(), AddStream, Task, Mod);
451 return Error::success();