1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Top-level implementation for the NVPTX target.
12 //===----------------------------------------------------------------------===//
14 #include "NVPTXTargetMachine.h"
16 #include "NVPTXAllocaHoisting.h"
17 #include "NVPTXLowerAggrCopies.h"
18 #include "NVPTXTargetObjectFile.h"
19 #include "NVPTXTargetTransformInfo.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Triple.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/CodeGen/Passes.h"
24 #include "llvm/CodeGen/TargetPassConfig.h"
25 #include "llvm/IR/LegacyPassManager.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Support/TargetRegistry.h"
29 #include "llvm/Target/TargetMachine.h"
30 #include "llvm/Target/TargetOptions.h"
31 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
32 #include "llvm/Transforms/Scalar.h"
33 #include "llvm/Transforms/Scalar/GVN.h"
34 #include "llvm/Transforms/Vectorize.h"
40 // LSV is still relatively new; this switch lets us turn it off in case we
41 // encounter (or suspect) a bug.
43 DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
44 cl::desc("Disable load/store vectorizer"),
45 cl::init(false), cl::Hidden);
49 void initializeNVVMIntrRangePass(PassRegistry&);
50 void initializeNVVMReflectPass(PassRegistry&);
51 void initializeGenericToNVVMPass(PassRegistry&);
52 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
53 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
54 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
55 void initializeNVPTXLowerArgsPass(PassRegistry &);
56 void initializeNVPTXLowerAllocaPass(PassRegistry &);
58 } // end namespace llvm
60 extern "C" void LLVMInitializeNVPTXTarget() {
61 // Register the target.
62 RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
63 RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
65 // FIXME: This pass is really intended to be invoked during IR optimization,
66 // but it's very NVPTX-specific.
67 PassRegistry &PR = *PassRegistry::getPassRegistry();
68 initializeNVVMReflectPass(PR);
69 initializeNVVMIntrRangePass(PR);
70 initializeGenericToNVVMPass(PR);
71 initializeNVPTXAllocaHoistingPass(PR);
72 initializeNVPTXAssignValidGlobalNamesPass(PR);
73 initializeNVPTXLowerArgsPass(PR);
74 initializeNVPTXLowerAllocaPass(PR);
75 initializeNVPTXLowerAggrCopiesPass(PR);
78 static std::string computeDataLayout(bool is64Bit) {
79 std::string Ret = "e";
84 Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
89 static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) {
92 return CodeModel::Small;
95 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
96 StringRef CPU, StringRef FS,
97 const TargetOptions &Options,
98 Optional<Reloc::Model> RM,
99 Optional<CodeModel::Model> CM,
100 CodeGenOpt::Level OL, bool is64bit)
101 // The pic relocation model is used regardless of what the client has
102 // specified, as it is the only relocation model currently supported.
103 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options,
104 Reloc::PIC_, getEffectiveCodeModel(CM), OL),
105 is64bit(is64bit), TLOF(llvm::make_unique<NVPTXTargetObjectFile>()),
106 Subtarget(TT, CPU, FS, *this) {
107 if (TT.getOS() == Triple::NVCL)
108 drvInterface = NVPTX::NVCL;
110 drvInterface = NVPTX::CUDA;
114 NVPTXTargetMachine::~NVPTXTargetMachine() = default;
116 void NVPTXTargetMachine32::anchor() {}
118 NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
119 StringRef CPU, StringRef FS,
120 const TargetOptions &Options,
121 Optional<Reloc::Model> RM,
122 Optional<CodeModel::Model> CM,
123 CodeGenOpt::Level OL, bool JIT)
124 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
126 void NVPTXTargetMachine64::anchor() {}
128 NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
129 StringRef CPU, StringRef FS,
130 const TargetOptions &Options,
131 Optional<Reloc::Model> RM,
132 Optional<CodeModel::Model> CM,
133 CodeGenOpt::Level OL, bool JIT)
134 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
138 class NVPTXPassConfig : public TargetPassConfig {
140 NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
141 : TargetPassConfig(TM, PM) {}
143 NVPTXTargetMachine &getNVPTXTargetMachine() const {
144 return getTM<NVPTXTargetMachine>();
147 void addIRPasses() override;
148 bool addInstSelector() override;
149 void addPostRegAlloc() override;
150 void addMachineSSAOptimization() override;
152 FunctionPass *createTargetRegisterAllocator(bool) override;
153 void addFastRegAlloc(FunctionPass *RegAllocPass) override;
154 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
157 // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
158 // function is only called in opt mode.
159 void addEarlyCSEOrGVNPass();
161 // Add passes that propagate special memory spaces.
162 void addAddressSpaceInferencePasses();
164 // Add passes that perform straight-line scalar optimizations.
165 void addStraightLineScalarOptimizationPasses();
168 } // end anonymous namespace
170 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
171 return new NVPTXPassConfig(*this, PM);
174 void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
175 Builder.addExtension(
176 PassManagerBuilder::EP_EarlyAsPossible,
177 [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
178 PM.add(createNVVMReflectPass());
179 PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
184 NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
185 return TargetTransformInfo(NVPTXTTIImpl(this, F));
188 void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
189 if (getOptLevel() == CodeGenOpt::Aggressive)
190 addPass(createGVNPass());
192 addPass(createEarlyCSEPass());
195 void NVPTXPassConfig::addAddressSpaceInferencePasses() {
196 // NVPTXLowerArgs emits alloca for byval parameters which can often
197 // be eliminated by SROA.
198 addPass(createSROAPass());
199 addPass(createNVPTXLowerAllocaPass());
200 addPass(createInferAddressSpacesPass());
203 void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
204 addPass(createSeparateConstOffsetFromGEPPass());
205 addPass(createSpeculativeExecutionPass());
206 // ReassociateGEPs exposes more opportunites for SLSR. See
207 // the example in reassociate-geps-and-slsr.ll.
208 addPass(createStraightLineStrengthReducePass());
209 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
210 // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
211 // for some of our benchmarks.
212 addEarlyCSEOrGVNPass();
213 // Run NaryReassociate after EarlyCSE/GVN to be more effective.
214 addPass(createNaryReassociatePass());
215 // NaryReassociate on GEPs creates redundant common expressions, so run
216 // EarlyCSE after it.
217 addPass(createEarlyCSEPass());
220 void NVPTXPassConfig::addIRPasses() {
221 // The following passes are known to not play well with virtual regs hanging
222 // around after register allocation (which in our case, is *all* registers).
223 // We explicitly disable them here. We do, however, need some functionality
224 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
225 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
226 disablePass(&PrologEpilogCodeInserterID);
227 disablePass(&MachineCopyPropagationID);
228 disablePass(&TailDuplicateID);
229 disablePass(&StackMapLivenessID);
230 disablePass(&LiveDebugValuesID);
231 disablePass(&PostRASchedulerID);
232 disablePass(&FuncletLayoutID);
233 disablePass(&PatchableFunctionID);
235 // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
236 // it here does nothing. But since we need it for correctness when lowering
237 // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
238 // call addEarlyAsPossiblePasses.
239 addPass(createNVVMReflectPass());
241 if (getOptLevel() != CodeGenOpt::None)
242 addPass(createNVPTXImageOptimizerPass());
243 addPass(createNVPTXAssignValidGlobalNamesPass());
244 addPass(createGenericToNVVMPass());
246 // NVPTXLowerArgs is required for correctness and should be run right
247 // before the address space inference passes.
248 addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
249 if (getOptLevel() != CodeGenOpt::None) {
250 addAddressSpaceInferencePasses();
251 if (!DisableLoadStoreVectorizer)
252 addPass(createLoadStoreVectorizerPass());
253 addStraightLineScalarOptimizationPasses();
256 // === LSR and other generic IR passes ===
257 TargetPassConfig::addIRPasses();
258 // EarlyCSE is not always strong enough to clean up what LSR produces. For
259 // example, GVN can combine
266 // %0 = shl nsw %a, 2
269 // but EarlyCSE can do neither of them.
270 if (getOptLevel() != CodeGenOpt::None)
271 addEarlyCSEOrGVNPass();
274 bool NVPTXPassConfig::addInstSelector() {
275 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
277 addPass(createLowerAggrCopies());
278 addPass(createAllocaHoisting());
279 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
281 if (!ST.hasImageHandles())
282 addPass(createNVPTXReplaceImageHandlesPass());
287 void NVPTXPassConfig::addPostRegAlloc() {
288 addPass(createNVPTXPrologEpilogPass(), false);
289 if (getOptLevel() != CodeGenOpt::None) {
290 // NVPTXPrologEpilogPass calculates frame object offset and replace frame
291 // index with VRFrame register. NVPTXPeephole need to be run after that and
292 // will replace VRFrame with VRFrameLocal when possible.
293 addPass(createNVPTXPeephole());
297 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
298 return nullptr; // No reg alloc
301 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
302 assert(!RegAllocPass && "NVPTX uses no regalloc!");
303 addPass(&PHIEliminationID);
304 addPass(&TwoAddressInstructionPassID);
307 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
308 assert(!RegAllocPass && "NVPTX uses no regalloc!");
310 addPass(&ProcessImplicitDefsID);
311 addPass(&LiveVariablesID);
312 addPass(&MachineLoopInfoID);
313 addPass(&PHIEliminationID);
315 addPass(&TwoAddressInstructionPassID);
316 addPass(&RegisterCoalescerID);
318 // PreRA instruction scheduling.
319 if (addPass(&MachineSchedulerID))
320 printAndVerify("After Machine Scheduling");
323 addPass(&StackSlotColoringID);
325 // FIXME: Needs physical registers
326 //addPass(&PostRAMachineLICMID);
328 printAndVerify("After StackSlotColoring");
331 void NVPTXPassConfig::addMachineSSAOptimization() {
332 // Pre-ra tail duplication.
333 if (addPass(&EarlyTailDuplicateID))
334 printAndVerify("After Pre-RegAlloc TailDuplicate");
336 // Optimize PHIs before DCE: removing dead PHI cycles may make more
337 // instructions dead.
338 addPass(&OptimizePHIsID);
340 // This pass merges large allocas. StackSlotColoring is a different pass
341 // which merges spill slots.
342 addPass(&StackColoringID);
344 // If the target requests it, assign local variables to stack slots relative
345 // to one another and simplify frame index references where possible.
346 addPass(&LocalStackSlotAllocationID);
348 // With optimization, dead code should already be eliminated. However
349 // there is one known exception: lowered code for arguments that are only
350 // used by tail calls, where the tail calls reuse the incoming stack
351 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
352 addPass(&DeadMachineInstructionElimID);
353 printAndVerify("After codegen DCE pass");
355 // Allow targets to insert passes that improve instruction level parallelism,
356 // like if-conversion. Such passes will typically need dominator trees and
357 // loop info, just like LICM and CSE below.
359 printAndVerify("After ILP optimizations");
361 addPass(&MachineLICMID);
362 addPass(&MachineCSEID);
364 addPass(&MachineSinkingID);
365 printAndVerify("After Machine LICM, CSE and Sinking passes");
367 addPass(&PeepholeOptimizerID);
368 printAndVerify("After codegen peephole optimization pass");