1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
11 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "llvm/Target/TargetMachine.h"
19 class AMDGPUTargetMachine;
21 class GCNTargetMachine;
30 FunctionPass *createR600VectorRegMerger();
31 FunctionPass *createR600ExpandSpecialInstrsPass();
32 FunctionPass *createR600EmitClauseMarkers();
33 FunctionPass *createR600ClauseMergePass();
34 FunctionPass *createR600Packetizer();
35 FunctionPass *createR600ControlFlowFinalizer();
36 FunctionPass *createAMDGPUCFGStructurizerPass();
39 FunctionPass *createSIAnnotateControlFlowPass();
40 FunctionPass *createSIFoldOperandsPass();
41 FunctionPass *createSIPeepholeSDWAPass();
42 FunctionPass *createSILowerI1CopiesPass();
43 FunctionPass *createSIShrinkInstructionsPass();
44 FunctionPass *createSILoadStoreOptimizerPass();
45 FunctionPass *createSIWholeQuadModePass();
46 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
47 FunctionPass *createSIFixSGPRCopiesPass();
48 FunctionPass *createSIDebuggerInsertNopsPass();
49 FunctionPass *createSIInsertWaitsPass();
50 FunctionPass *createSIInsertWaitcntsPass();
51 FunctionPass *createAMDGPUCodeGenPreparePass();
52 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
54 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
55 extern char &AMDGPUMachineCFGStructurizerID;
57 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
59 Pass *createAMDGPUAnnotateKernelFeaturesPass();
60 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
61 extern char &AMDGPUAnnotateKernelFeaturesID;
63 ModulePass *createAMDGPULowerIntrinsicsPass();
64 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
65 extern char &AMDGPULowerIntrinsicsID;
67 void initializeSIFoldOperandsPass(PassRegistry &);
68 extern char &SIFoldOperandsID;
70 void initializeSIPeepholeSDWAPass(PassRegistry &);
71 extern char &SIPeepholeSDWAID;
73 void initializeSIShrinkInstructionsPass(PassRegistry&);
74 extern char &SIShrinkInstructionsID;
76 void initializeSIFixSGPRCopiesPass(PassRegistry &);
77 extern char &SIFixSGPRCopiesID;
79 void initializeSIFixVGPRCopiesPass(PassRegistry &);
80 extern char &SIFixVGPRCopiesID;
82 void initializeSILowerI1CopiesPass(PassRegistry &);
83 extern char &SILowerI1CopiesID;
85 void initializeSILoadStoreOptimizerPass(PassRegistry &);
86 extern char &SILoadStoreOptimizerID;
88 void initializeSIWholeQuadModePass(PassRegistry &);
89 extern char &SIWholeQuadModeID;
91 void initializeSILowerControlFlowPass(PassRegistry &);
92 extern char &SILowerControlFlowID;
94 void initializeSIInsertSkipsPass(PassRegistry &);
95 extern char &SIInsertSkipsPassID;
97 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
98 extern char &SIOptimizeExecMaskingID;
100 // Passes common to R600 and SI
101 FunctionPass *createAMDGPUPromoteAlloca();
102 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
103 extern char &AMDGPUPromoteAllocaID;
105 Pass *createAMDGPUStructurizeCFGPass();
106 FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
107 CodeGenOpt::Level OptLevel);
108 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
109 ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
110 FunctionPass *createAMDGPUAnnotateUniformValues();
112 ModulePass* createAMDGPUUnifyMetadataPass();
113 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
114 extern char &AMDGPUUnifyMetadataID;
116 void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
117 extern char &SIFixControlFlowLiveIntervalsID;
119 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
120 extern char &AMDGPUAnnotateUniformValuesPassID;
122 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
123 extern char &AMDGPUCodeGenPrepareID;
125 void initializeSIAnnotateControlFlowPass(PassRegistry&);
126 extern char &SIAnnotateControlFlowPassID;
128 void initializeSIDebuggerInsertNopsPass(PassRegistry&);
129 extern char &SIDebuggerInsertNopsID;
131 void initializeSIInsertWaitsPass(PassRegistry&);
132 extern char &SIInsertWaitsID;
134 void initializeSIInsertWaitcntsPass(PassRegistry&);
135 extern char &SIInsertWaitcntsID;
137 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
138 extern char &AMDGPUUnifyDivergentExitNodesID;
140 ImmutablePass *createAMDGPUAAWrapperPass();
141 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
143 Target &getTheAMDGPUTarget();
144 Target &getTheGCNTarget();
149 TI_SCRATCH_RSRC_DWORD0,
150 TI_SCRATCH_RSRC_DWORD1,
151 TI_SCRATCH_RSRC_DWORD2,
152 TI_SCRATCH_RSRC_DWORD3
156 } // End namespace llvm
158 /// OpenCL uses address spaces to differentiate between
159 /// various memory regions on the hardware. On the CPU
160 /// all of the address spaces point to the same memory,
161 /// however on the GPU, each address space points to
162 /// a separate piece of memory that is unique from other
163 /// memory locations.
165 // The following address space values depend on the triple environment.
166 unsigned PRIVATE_ADDRESS; ///< Address space for private memory.
167 unsigned FLAT_ADDRESS; ///< Address space for flat memory.
168 unsigned REGION_ADDRESS; ///< Address space for region memory.
170 // The maximum value for flat, generic, local, private, constant and region.
171 const static unsigned MAX_COMMON_ADDRESS = 5;
173 const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0).
174 const static unsigned CONSTANT_ADDRESS = 2; ///< Address space for constant memory (VTX2)
175 const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory.
176 const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0)
177 const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1)
179 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
180 // order to be able to dynamically index a constant buffer, for example:
182 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
184 const static unsigned CONSTANT_BUFFER_0 = 8;
185 const static unsigned CONSTANT_BUFFER_1 = 9;
186 const static unsigned CONSTANT_BUFFER_2 = 10;
187 const static unsigned CONSTANT_BUFFER_3 = 11;
188 const static unsigned CONSTANT_BUFFER_4 = 12;
189 const static unsigned CONSTANT_BUFFER_5 = 13;
190 const static unsigned CONSTANT_BUFFER_6 = 14;
191 const static unsigned CONSTANT_BUFFER_7 = 15;
192 const static unsigned CONSTANT_BUFFER_8 = 16;
193 const static unsigned CONSTANT_BUFFER_9 = 17;
194 const static unsigned CONSTANT_BUFFER_10 = 18;
195 const static unsigned CONSTANT_BUFFER_11 = 19;
196 const static unsigned CONSTANT_BUFFER_12 = 20;
197 const static unsigned CONSTANT_BUFFER_13 = 21;
198 const static unsigned CONSTANT_BUFFER_14 = 22;
199 const static unsigned CONSTANT_BUFFER_15 = 23;
201 // Some places use this if the address space can't be determined.
202 const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u;
207 AMDGPUAS getAMDGPUAS(const Module &M);
208 AMDGPUAS getAMDGPUAS(const TargetMachine &TM);
209 AMDGPUAS getAMDGPUAS(Triple T);
210 } // namespace AMDGPU