1 //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This provides a class for OpenMP runtime code generation specialized to NVPTX
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
16 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
18 #include "CGOpenMPRuntime.h"
19 #include "CodeGenFunction.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/IR/CallSite.h"
26 class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
28 class EntryFunctionState {
30 llvm::BasicBlock *ExitBB;
32 EntryFunctionState() : ExitBB(nullptr){};
35 class WorkerFunctionState {
37 llvm::Function *WorkerFn;
38 const CGFunctionInfo *CGFI;
40 WorkerFunctionState(CodeGenModule &CGM);
43 void createWorkerFunction(CodeGenModule &CGM);
46 /// \brief Helper for target entry function. Guide the master and worker
47 /// threads to their respective locations.
48 void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
49 WorkerFunctionState &WST);
51 /// \brief Signal termination of OMP execution.
52 void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
59 /// \brief Get the GPU warp size.
60 llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF);
62 /// \brief Get the id of the current thread on the GPU.
63 llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF);
65 // \brief Get the maximum number of threads in a block of the GPU.
66 llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF);
68 /// \brief Get barrier to synchronize all threads in a block.
69 void getNVPTXCTABarrier(CodeGenFunction &CGF);
71 // \brief Synchronize all GPU threads in a block.
72 void syncCTAThreads(CodeGenFunction &CGF);
78 /// \brief Get the thread id of the OMP master thread.
79 /// The master thread id is the first thread (lane) of the last warp in the
80 /// GPU block. Warp size is assumed to be some power of 2.
81 /// Thread id is 0 indexed.
82 /// E.g: If NumThreads is 33, master id is 32.
83 /// If NumThreads is 64, master id is 32.
84 /// If NumThreads is 1024, master id is 992.
85 llvm::Value *getMasterThreadID(CodeGenFunction &CGF);
88 // Private state and methods.
91 // Master-worker control state.
92 // Number of requested OMP threads in parallel region.
93 llvm::GlobalVariable *ActiveWorkers;
94 // Outlined function for the workers to execute.
95 llvm::GlobalVariable *WorkID;
97 /// \brief Initialize master-worker control state.
98 void initializeEnvironment();
100 /// \brief Emit the worker function for the current target region.
101 void emitWorkerFunction(WorkerFunctionState &WST);
103 /// \brief Helper for worker function. Emit body of worker loop.
104 void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
106 /// \brief Returns specified OpenMP runtime function for the current OpenMP
107 /// implementation. Specialized for the NVPTX device.
108 /// \param Function OpenMP runtime function.
109 /// \return Specified function.
110 llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
113 // Base class overrides.
116 /// \brief Creates offloading entry for the provided entry ID \a ID,
117 /// address \a Addr and size \a Size.
118 void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
119 uint64_t Size) override;
121 /// \brief Emit outlined function for 'target' directive on the NVPTX
123 /// \param D Directive to emit.
124 /// \param ParentName Name of the function that encloses the target region.
125 /// \param OutlinedFn Outlined function value to be defined by this call.
126 /// \param OutlinedFnID Outlined function ID value to be defined by this call.
127 /// \param IsOffloadEntry True if the outlined function is an offload entry.
128 /// An outlined function may not be an entry if, e.g. the if clause always
129 /// evaluates to false.
130 void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
131 StringRef ParentName,
132 llvm::Function *&OutlinedFn,
133 llvm::Constant *&OutlinedFnID,
135 const RegionCodeGenTy &CodeGen) override;
138 explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
140 /// \brief This function ought to emit, in the general case, a call to
141 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
142 // as these numbers are obtained through the PTX grid and block configuration.
143 /// \param NumTeams An integer expression of teams.
144 /// \param ThreadLimit An integer expression of threads.
145 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
146 const Expr *ThreadLimit, SourceLocation Loc) override;
148 /// \brief Emits inlined function for the specified OpenMP parallel
149 // directive but an inlined function for teams.
150 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
151 /// kmp_int32 BoundID, struct context_vars*).
152 /// \param D OpenMP directive.
153 /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
154 /// \param InnermostKind Kind of innermost directive (for simple directives it
155 /// is a directive itself, for combined - its innermost directive).
156 /// \param CodeGen Code generation sequence for the \a D directive.
158 emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
159 const VarDecl *ThreadIDVar,
160 OpenMPDirectiveKind InnermostKind,
161 const RegionCodeGenTy &CodeGen) override;
163 /// \brief Emits code for teams call of the \a OutlinedFn with
164 /// variables captured in a record which address is stored in \a
166 /// \param OutlinedFn Outlined function to be run by team masters. Type of
167 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
168 /// \param CapturedVars A pointer to the record with the references to
169 /// variables used in \a OutlinedFn function.
171 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
172 SourceLocation Loc, llvm::Value *OutlinedFn,
173 ArrayRef<llvm::Value *> CapturedVars) override;
176 } // CodeGen namespace.
177 } // clang namespace.
179 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H