1 //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This provides a class for OpenMP runtime code generation specialized to NVPTX
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
16 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
18 #include "CGOpenMPRuntime.h"
19 #include "CodeGenFunction.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/IR/CallSite.h"
26 class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
28 /// Defines the execution mode.
30 /// SPMD execution mode (all threads are worker threads).
32 /// Non-SPMD execution mode (1 master thread, others are workers).
34 /// Unknown execution mode (orphaned directive).
38 /// Parallel outlined function work for workers to execute.
39 llvm::SmallVector<llvm::Function *, 16> Work;
41 struct EntryFunctionState {
42 llvm::BasicBlock *ExitBB = nullptr;
45 class WorkerFunctionState {
47 llvm::Function *WorkerFn;
48 const CGFunctionInfo &CGFI;
51 WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
54 void createWorkerFunction(CodeGenModule &CGM);
57 ExecutionMode getExecutionMode() const;
59 /// Emit the worker function for the current target region.
60 void emitWorkerFunction(WorkerFunctionState &WST);
62 /// Helper for worker function. Emit body of worker loop.
63 void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
65 /// Helper for non-SPMD target entry function. Guide the master and
66 /// worker threads to their respective locations.
67 void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
68 WorkerFunctionState &WST);
70 /// Signal termination of OMP execution for non-SPMD target entry
72 void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
74 /// Helper for generic variables globalization prolog.
75 void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
77 /// Helper for generic variables globalization epilog.
78 void emitGenericVarsEpilog(CodeGenFunction &CGF);
80 /// Helper for SPMD mode target directive's entry function.
81 void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
82 const OMPExecutableDirective &D);
84 /// Signal termination of SPMD mode execution.
85 void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
88 // Base class overrides.
91 /// Creates offloading entry for the provided entry ID \a ID,
92 /// address \a Addr, size \a Size, and flags \a Flags.
93 void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
94 uint64_t Size, int32_t Flags,
95 llvm::GlobalValue::LinkageTypes Linkage) override;
97 /// Emit outlined function specialized for the Fork-Join
98 /// programming model for applicable target directives on the NVPTX device.
99 /// \param D Directive to emit.
100 /// \param ParentName Name of the function that encloses the target region.
101 /// \param OutlinedFn Outlined function value to be defined by this call.
102 /// \param OutlinedFnID Outlined function ID value to be defined by this call.
103 /// \param IsOffloadEntry True if the outlined function is an offload entry.
104 /// An outlined function may not be an entry if, e.g. the if clause always
105 /// evaluates to false.
106 void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
107 llvm::Function *&OutlinedFn,
108 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
109 const RegionCodeGenTy &CodeGen);
111 /// Emit outlined function specialized for the Single Program
112 /// Multiple Data programming model for applicable target directives on the
114 /// \param D Directive to emit.
115 /// \param ParentName Name of the function that encloses the target region.
116 /// \param OutlinedFn Outlined function value to be defined by this call.
117 /// \param OutlinedFnID Outlined function ID value to be defined by this call.
118 /// \param IsOffloadEntry True if the outlined function is an offload entry.
119 /// \param CodeGen Object containing the target statements.
120 /// An outlined function may not be an entry if, e.g. the if clause always
121 /// evaluates to false.
122 void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
123 llvm::Function *&OutlinedFn,
124 llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
125 const RegionCodeGenTy &CodeGen);
127 /// Emit outlined function for 'target' directive on the NVPTX
129 /// \param D Directive to emit.
130 /// \param ParentName Name of the function that encloses the target region.
131 /// \param OutlinedFn Outlined function value to be defined by this call.
132 /// \param OutlinedFnID Outlined function ID value to be defined by this call.
133 /// \param IsOffloadEntry True if the outlined function is an offload entry.
134 /// An outlined function may not be an entry if, e.g. the if clause always
135 /// evaluates to false.
136 void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
137 StringRef ParentName,
138 llvm::Function *&OutlinedFn,
139 llvm::Constant *&OutlinedFnID,
141 const RegionCodeGenTy &CodeGen) override;
143 /// Emits code for parallel or serial call of the \a OutlinedFn with
144 /// variables captured in a record which address is stored in \a
146 /// This call is for the Non-SPMD Execution Mode.
147 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
148 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
149 /// \param CapturedVars A pointer to the record with the references to
150 /// variables used in \a OutlinedFn function.
151 /// \param IfCond Condition in the associated 'if' clause, if it was
152 /// specified, nullptr otherwise.
153 void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
154 llvm::Value *OutlinedFn,
155 ArrayRef<llvm::Value *> CapturedVars,
158 /// Emits code for parallel or serial call of the \a OutlinedFn with
159 /// variables captured in a record which address is stored in \a
161 /// This call is for a parallel directive within an SPMD target directive.
162 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
163 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
164 /// \param CapturedVars A pointer to the record with the references to
165 /// variables used in \a OutlinedFn function.
166 /// \param IfCond Condition in the associated 'if' clause, if it was
167 /// specified, nullptr otherwise.
169 void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
170 llvm::Value *OutlinedFn,
171 ArrayRef<llvm::Value *> CapturedVars,
175 /// Get the function name of an outlined region.
176 // The name can be customized depending on the target.
178 StringRef getOutlinedHelperName() const override {
179 return "__omp_outlined__";
183 explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
185 /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
186 /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
187 virtual void emitProcBindClause(CodeGenFunction &CGF,
188 OpenMPProcBindClauseKind ProcBind,
189 SourceLocation Loc) override;
191 /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
192 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
194 /// \param NumThreads An integer value of threads.
195 virtual void emitNumThreadsClause(CodeGenFunction &CGF,
196 llvm::Value *NumThreads,
197 SourceLocation Loc) override;
199 /// This function ought to emit, in the general case, a call to
200 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
201 // as these numbers are obtained through the PTX grid and block configuration.
202 /// \param NumTeams An integer expression of teams.
203 /// \param ThreadLimit An integer expression of threads.
204 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
205 const Expr *ThreadLimit, SourceLocation Loc) override;
207 /// Emits inlined function for the specified OpenMP parallel
209 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
210 /// kmp_int32 BoundID, struct context_vars*).
211 /// \param D OpenMP directive.
212 /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
213 /// \param InnermostKind Kind of innermost directive (for simple directives it
214 /// is a directive itself, for combined - its innermost directive).
215 /// \param CodeGen Code generation sequence for the \a D directive.
217 emitParallelOutlinedFunction(const OMPExecutableDirective &D,
218 const VarDecl *ThreadIDVar,
219 OpenMPDirectiveKind InnermostKind,
220 const RegionCodeGenTy &CodeGen) override;
222 /// Emits inlined function for the specified OpenMP teams
224 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
225 /// kmp_int32 BoundID, struct context_vars*).
226 /// \param D OpenMP directive.
227 /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
228 /// \param InnermostKind Kind of innermost directive (for simple directives it
229 /// is a directive itself, for combined - its innermost directive).
230 /// \param CodeGen Code generation sequence for the \a D directive.
232 emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
233 const VarDecl *ThreadIDVar,
234 OpenMPDirectiveKind InnermostKind,
235 const RegionCodeGenTy &CodeGen) override;
237 /// Emits code for teams call of the \a OutlinedFn with
238 /// variables captured in a record which address is stored in \a
240 /// \param OutlinedFn Outlined function to be run by team masters. Type of
241 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
242 /// \param CapturedVars A pointer to the record with the references to
243 /// variables used in \a OutlinedFn function.
245 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
246 SourceLocation Loc, llvm::Value *OutlinedFn,
247 ArrayRef<llvm::Value *> CapturedVars) override;
249 /// Emits code for parallel or serial call of the \a OutlinedFn with
250 /// variables captured in a record which address is stored in \a
252 /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
253 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
254 /// \param CapturedVars A pointer to the record with the references to
255 /// variables used in \a OutlinedFn function.
256 /// \param IfCond Condition in the associated 'if' clause, if it was
257 /// specified, nullptr otherwise.
258 void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
259 llvm::Value *OutlinedFn,
260 ArrayRef<llvm::Value *> CapturedVars,
261 const Expr *IfCond) override;
263 /// Emits a critical region.
264 /// \param CriticalName Name of the critical region.
265 /// \param CriticalOpGen Generator for the statement associated with the given
267 /// \param Hint Value of the 'hint' clause (optional).
268 void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
269 const RegionCodeGenTy &CriticalOpGen,
271 const Expr *Hint = nullptr) override;
273 /// Emit a code for reduction clause.
275 /// \param Privates List of private copies for original reduction arguments.
276 /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
277 /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
278 /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
279 /// or 'operator binop(LHS, RHS)'.
280 /// \param Options List of options for reduction codegen:
281 /// WithNowait true if parent directive has also nowait clause, false
283 /// SimpleReduction Emit reduction operation only. Used for omp simd
284 /// directive on the host.
285 /// ReductionKind The kind of reduction to perform.
286 virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
287 ArrayRef<const Expr *> Privates,
288 ArrayRef<const Expr *> LHSExprs,
289 ArrayRef<const Expr *> RHSExprs,
290 ArrayRef<const Expr *> ReductionOps,
291 ReductionOptionsTy Options) override;
293 /// Returns specified OpenMP runtime function for the current OpenMP
294 /// implementation. Specialized for the NVPTX device.
295 /// \param Function OpenMP runtime function.
296 /// \return Specified function.
297 llvm::Constant *createNVPTXRuntimeFunction(unsigned Function);
299 /// Translates the native parameter of outlined function if this is required
301 /// \param FD Field decl from captured record for the parameter.
302 /// \param NativeParam Parameter itself.
303 const VarDecl *translateParameter(const FieldDecl *FD,
304 const VarDecl *NativeParam) const override;
306 /// Gets the address of the native argument basing on the address of the
307 /// target-specific parameter.
308 /// \param NativeParam Parameter itself.
309 /// \param TargetParam Corresponding target-specific parameter.
310 Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
311 const VarDecl *TargetParam) const override;
313 /// Emits call of the outlined function with the provided arguments,
314 /// translating these arguments to correct target-specific arguments.
315 void emitOutlinedFunctionCall(
316 CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
317 ArrayRef<llvm::Value *> Args = llvm::None) const override;
319 /// Emits OpenMP-specific function prolog.
320 /// Required for device constructs.
321 void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
323 /// Gets the OpenMP-specific address of the local variable.
324 Address getAddressOfLocalVariable(CodeGenFunction &CGF,
325 const VarDecl *VD) override;
327 /// Target codegen is specialized based on two data-sharing modes: CUDA, in
328 /// which the local variables are actually global threadlocal, and Generic, in
329 /// which the local variables are placed in global memory if they may escape
330 /// their declaration context.
331 enum DataSharingMode {
332 /// CUDA data sharing mode.
334 /// Generic data-sharing mode.
338 /// Cleans up references to the objects in finished function.
340 void functionFinished(CodeGenFunction &CGF) override;
343 /// Track the execution mode when codegening directives within a target
344 /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
345 /// target region and used by containing directives such as 'parallel'
346 /// to emit optimized code.
347 ExecutionMode CurrentExecutionMode = EM_Unknown;
349 /// true if we're emitting the code for the target region and next parallel
350 /// region is L0 for sure.
351 bool IsInTargetMasterThreadRegion = false;
352 /// true if we're definitely in the parallel region.
353 bool IsInParallelRegion = false;
355 /// Map between an outlined function and its wrapper.
356 llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
358 /// Emit function which wraps the outline parallel region
359 /// and controls the parameters which are passed to this function.
360 /// The wrapper ensures that the outlined function is called
361 /// with the correct arguments when data is shared.
362 llvm::Function *createParallelDataSharingWrapper(
363 llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
365 /// The map of local variables to their addresses in the global memory.
366 using DeclToAddrMapTy = llvm::MapVector<const Decl *,
367 std::pair<const FieldDecl *, Address>>;
368 /// Set of the parameters passed by value escaping OpenMP context.
369 using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
370 struct FunctionData {
371 DeclToAddrMapTy LocalVarData;
372 EscapedParamsTy EscapedParameters;
373 llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
374 llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
375 const RecordDecl *GlobalRecord = nullptr;
376 llvm::Value *GlobalRecordAddr = nullptr;
377 std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
379 /// Maps the function to the list of the globalized variables with their
381 llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
384 } // CodeGen namespace.
385 } // clang namespace.
387 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H