1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This provides a class for OpenMP runtime code generation.
12 //===----------------------------------------------------------------------===//
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
27 using namespace clang;
28 using namespace CodeGen;
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
34 /// \brief Kinds of OpenMP regions used in codegen.
35 enum CGOpenMPRegionKind {
36 /// \brief Region with outlined function for standalone 'parallel'
38 ParallelOutlinedRegion,
39 /// \brief Region with outlined function for standalone 'task' directive.
41 /// \brief Region for constructs that do not require function outlining,
42 /// like 'for', 'sections', 'atomic' etc. directives.
46 CGOpenMPRegionInfo(const CapturedStmt &CS,
47 const CGOpenMPRegionKind RegionKind,
48 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
49 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50 CodeGen(CodeGen), Kind(Kind) {}
52 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
54 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
57 /// \brief Get a variable or parameter for storing global thread id
58 /// inside OpenMP construct.
59 virtual const VarDecl *getThreadIDVariable() const = 0;
61 /// \brief Emit the captured statement body.
62 virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
64 /// \brief Get an LValue for the current ThreadID variable.
65 /// \return LValue for thread id variable. This LValue always has type int32*.
66 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
68 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
70 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
72 static bool classof(const CGCapturedStmtInfo *Info) {
73 return Info->getKind() == CR_OpenMP;
77 CGOpenMPRegionKind RegionKind;
78 const RegionCodeGenTy &CodeGen;
79 OpenMPDirectiveKind Kind;
82 /// \brief API for captured statement code generation in OpenMP constructs.
83 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
85 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
86 const RegionCodeGenTy &CodeGen,
87 OpenMPDirectiveKind Kind)
88 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind),
89 ThreadIDVar(ThreadIDVar) {
90 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
92 /// \brief Get a variable or parameter for storing global thread id
93 /// inside OpenMP construct.
94 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
96 /// \brief Get the name of the capture helper.
97 StringRef getHelperName() const override { return ".omp_outlined."; }
99 static bool classof(const CGCapturedStmtInfo *Info) {
100 return CGOpenMPRegionInfo::classof(Info) &&
101 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
102 ParallelOutlinedRegion;
106 /// \brief A variable or parameter storing global thread id for OpenMP
108 const VarDecl *ThreadIDVar;
111 /// \brief API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
114 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
115 const VarDecl *ThreadIDVar,
116 const RegionCodeGenTy &CodeGen,
117 OpenMPDirectiveKind Kind)
118 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind),
119 ThreadIDVar(ThreadIDVar) {
120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
122 /// \brief Get a variable or parameter for storing global thread id
123 /// inside OpenMP construct.
124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
126 /// \brief Get an LValue for the current ThreadID variable.
127 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
129 /// \brief Get the name of the capture helper.
130 StringRef getHelperName() const override { return ".omp_outlined."; }
132 static bool classof(const CGCapturedStmtInfo *Info) {
133 return CGOpenMPRegionInfo::classof(Info) &&
134 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
139 /// \brief A variable or parameter storing global thread id for OpenMP
141 const VarDecl *ThreadIDVar;
144 /// \brief API for inlined captured statement code generation in OpenMP
146 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
148 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
149 const RegionCodeGenTy &CodeGen,
150 OpenMPDirectiveKind Kind)
151 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind), OldCSI(OldCSI),
152 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
153 // \brief Retrieve the value of the context parameter.
154 llvm::Value *getContextValue() const override {
156 return OuterRegionInfo->getContextValue();
157 llvm_unreachable("No context value for inlined OpenMP region");
159 virtual void setContextValue(llvm::Value *V) override {
160 if (OuterRegionInfo) {
161 OuterRegionInfo->setContextValue(V);
164 llvm_unreachable("No context value for inlined OpenMP region");
166 /// \brief Lookup the captured field decl for a variable.
167 const FieldDecl *lookup(const VarDecl *VD) const override {
169 return OuterRegionInfo->lookup(VD);
170 // If there is no outer outlined region,no need to lookup in a list of
171 // captured variables, we can use the original one.
174 FieldDecl *getThisFieldDecl() const override {
176 return OuterRegionInfo->getThisFieldDecl();
179 /// \brief Get a variable or parameter for storing global thread id
180 /// inside OpenMP construct.
181 const VarDecl *getThreadIDVariable() const override {
183 return OuterRegionInfo->getThreadIDVariable();
187 /// \brief Get the name of the capture helper.
188 StringRef getHelperName() const override {
189 if (auto *OuterRegionInfo = getOldCSI())
190 return OuterRegionInfo->getHelperName();
191 llvm_unreachable("No helper name for inlined OpenMP construct");
194 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
196 static bool classof(const CGCapturedStmtInfo *Info) {
197 return CGOpenMPRegionInfo::classof(Info) &&
198 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
202 /// \brief CodeGen info about outer OpenMP region.
203 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
204 CGOpenMPRegionInfo *OuterRegionInfo;
207 /// \brief RAII for emitting code of OpenMP constructs.
208 class InlinedOpenMPRegionRAII {
209 CodeGenFunction &CGF;
212 /// \brief Constructs region for combined constructs.
213 /// \param CodeGen Code generation sequence for combined directives. Includes
214 /// a list of functions used for code generation of implicitly inlined
216 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
217 OpenMPDirectiveKind Kind)
219 // Start emission for the construct.
220 CGF.CapturedStmtInfo =
221 new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, Kind);
223 ~InlinedOpenMPRegionRAII() {
224 // Restore original CapturedStmtInfo only if we're done with code emission.
226 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
227 delete CGF.CapturedStmtInfo;
228 CGF.CapturedStmtInfo = OldCSI;
234 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
235 return CGF.MakeNaturalAlignAddrLValue(
236 CGF.Builder.CreateAlignedLoad(
237 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
238 CGF.PointerAlignInBytes),
239 getThreadIDVariable()
241 ->castAs<PointerType>()
245 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
246 // 1.2.2 OpenMP Language Terminology
247 // Structured block - An executable statement with a single entry at the
248 // top and a single exit at the bottom.
249 // The point of exit cannot be a branch out of the structured block.
250 // longjmp() and throw() must not violate the entry/exit criteria.
251 CGF.EHStack.pushTerminate();
253 CodeGenFunction::RunCleanupsScope Scope(CGF);
256 CGF.EHStack.popTerminate();
259 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
260 CodeGenFunction &CGF) {
261 return CGF.MakeNaturalAlignAddrLValue(
262 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
263 getThreadIDVariable()->getType());
266 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
267 : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
268 IdentTy = llvm::StructType::create(
269 "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
270 CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
271 CGM.Int8PtrTy /* psource */, nullptr);
272 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
273 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
274 llvm::PointerType::getUnqual(CGM.Int32Ty)};
275 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
276 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
279 void CGOpenMPRuntime::clear() {
280 InternalVars.clear();
283 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
286 assert(ThreadIDVar->getType()->isPointerType() &&
287 "thread id variable must be of type kmp_int32 *");
288 const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
289 CodeGenFunction CGF(CGM, true);
290 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind);
291 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
292 return CGF.GenerateCapturedStmtFunction(*CS);
295 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
296 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
297 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
298 assert(!ThreadIDVar->getType()->isPointerType() &&
299 "thread id variable must be of type kmp_int32 for tasks");
300 auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
301 CodeGenFunction CGF(CGM, true);
302 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
304 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
305 return CGF.GenerateCapturedStmtFunction(*CS);
309 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
310 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
312 if (!DefaultOpenMPPSource) {
313 // Initialize default location for psource field of ident_t structure of
314 // all ident_t objects. Format is ";file;function;line;column;;".
316 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
317 DefaultOpenMPPSource =
318 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
319 DefaultOpenMPPSource =
320 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
322 auto DefaultOpenMPLocation = new llvm::GlobalVariable(
323 CGM.getModule(), IdentTy, /*isConstant*/ true,
324 llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
325 DefaultOpenMPLocation->setUnnamedAddr(true);
327 llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
328 llvm::Constant *Values[] = {Zero,
329 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
330 Zero, Zero, DefaultOpenMPPSource};
331 llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
332 DefaultOpenMPLocation->setInitializer(Init);
333 OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
334 return DefaultOpenMPLocation;
339 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
341 OpenMPLocationFlags Flags) {
342 // If no debug info is generated - return global default location.
343 if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
345 return getOrCreateDefaultLocation(Flags);
347 assert(CGF.CurFn && "No function in current CodeGenFunction.");
349 llvm::Value *LocValue = nullptr;
350 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
351 if (I != OpenMPLocThreadIDMap.end())
352 LocValue = I->second.DebugLoc;
353 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
354 // GetOpenMPThreadID was called before this routine.
355 if (LocValue == nullptr) {
356 // Generate "ident_t .kmpc_loc.addr;"
357 llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
358 AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
359 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
360 Elem.second.DebugLoc = AI;
363 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
364 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
365 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
366 llvm::ConstantExpr::getSizeOf(IdentTy),
367 CGM.PointerAlignInBytes);
370 // char **psource = &.kmpc_loc_<flags>.addr.psource;
371 auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
374 auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
375 if (OMPDebugLoc == nullptr) {
376 SmallString<128> Buffer2;
377 llvm::raw_svector_ostream OS2(Buffer2);
378 // Build debug location
379 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
380 OS2 << ";" << PLoc.getFilename() << ";";
381 if (const FunctionDecl *FD =
382 dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
383 OS2 << FD->getQualifiedNameAsString();
385 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
386 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
387 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
389 // *psource = ";<File>;<Function>;<Line>;<Column>;;";
390 CGF.Builder.CreateStore(OMPDebugLoc, PSource);
395 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
396 SourceLocation Loc) {
397 assert(CGF.CurFn && "No function in current CodeGenFunction.");
399 llvm::Value *ThreadID = nullptr;
400 // Check whether we've already cached a load of the thread id in this
402 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
403 if (I != OpenMPLocThreadIDMap.end()) {
404 ThreadID = I->second.ThreadID;
405 if (ThreadID != nullptr)
408 if (auto OMPRegionInfo =
409 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
410 if (OMPRegionInfo->getThreadIDVariable()) {
411 // Check if this an outlined function with thread id passed as argument.
412 auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
413 ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
414 // If value loaded in entry block, cache it and use it everywhere in
416 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
417 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
418 Elem.second.ThreadID = ThreadID;
424 // This is not an outlined function region - need to call __kmpc_int32
425 // kmpc_global_thread_num(ident_t *loc).
426 // Generate thread id value and cache this value for use across the
428 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
429 CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
431 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
432 emitUpdateLocation(CGF, Loc));
433 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
434 Elem.second.ThreadID = ThreadID;
438 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
439 assert(CGF.CurFn && "No function in current CodeGenFunction.");
440 if (OpenMPLocThreadIDMap.count(CGF.CurFn))
441 OpenMPLocThreadIDMap.erase(CGF.CurFn);
444 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
445 return llvm::PointerType::getUnqual(IdentTy);
448 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
449 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
453 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
454 llvm::Constant *RTLFn = nullptr;
456 case OMPRTL__kmpc_fork_call: {
457 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
459 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
460 getKmpc_MicroPointerTy()};
461 llvm::FunctionType *FnTy =
462 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
463 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
466 case OMPRTL__kmpc_global_thread_num: {
467 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
468 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
469 llvm::FunctionType *FnTy =
470 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
471 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
474 case OMPRTL__kmpc_threadprivate_cached: {
475 // Build void *__kmpc_threadprivate_cached(ident_t *loc,
476 // kmp_int32 global_tid, void *data, size_t size, void ***cache);
477 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
478 CGM.VoidPtrTy, CGM.SizeTy,
479 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
480 llvm::FunctionType *FnTy =
481 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
482 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
485 case OMPRTL__kmpc_critical: {
486 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
487 // kmp_critical_name *crit);
488 llvm::Type *TypeParams[] = {
489 getIdentTyPointerTy(), CGM.Int32Ty,
490 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
491 llvm::FunctionType *FnTy =
492 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
493 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
496 case OMPRTL__kmpc_threadprivate_register: {
497 // Build void __kmpc_threadprivate_register(ident_t *, void *data,
498 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
499 // typedef void *(*kmpc_ctor)(void *);
501 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
502 /*isVarArg*/ false)->getPointerTo();
503 // typedef void *(*kmpc_cctor)(void *, void *);
504 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
505 auto KmpcCopyCtorTy =
506 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
507 /*isVarArg*/ false)->getPointerTo();
508 // typedef void (*kmpc_dtor)(void *);
510 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
512 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
513 KmpcCopyCtorTy, KmpcDtorTy};
514 auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
516 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
519 case OMPRTL__kmpc_end_critical: {
520 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
521 // kmp_critical_name *crit);
522 llvm::Type *TypeParams[] = {
523 getIdentTyPointerTy(), CGM.Int32Ty,
524 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
525 llvm::FunctionType *FnTy =
526 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
527 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
530 case OMPRTL__kmpc_cancel_barrier: {
531 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
533 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
534 llvm::FunctionType *FnTy =
535 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
536 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
539 case OMPRTL__kmpc_barrier: {
540 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
541 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
542 llvm::FunctionType *FnTy =
543 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
544 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
547 case OMPRTL__kmpc_for_static_fini: {
548 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
549 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
550 llvm::FunctionType *FnTy =
551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
552 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
555 case OMPRTL__kmpc_push_num_threads: {
556 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
557 // kmp_int32 num_threads)
558 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
560 llvm::FunctionType *FnTy =
561 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
562 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
565 case OMPRTL__kmpc_serialized_parallel: {
566 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
568 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
569 llvm::FunctionType *FnTy =
570 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
574 case OMPRTL__kmpc_end_serialized_parallel: {
575 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
577 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
578 llvm::FunctionType *FnTy =
579 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
580 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
583 case OMPRTL__kmpc_flush: {
584 // Build void __kmpc_flush(ident_t *loc);
585 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
586 llvm::FunctionType *FnTy =
587 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
588 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
591 case OMPRTL__kmpc_master: {
592 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
593 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
594 llvm::FunctionType *FnTy =
595 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
596 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
599 case OMPRTL__kmpc_end_master: {
600 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
601 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
602 llvm::FunctionType *FnTy =
603 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
604 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
607 case OMPRTL__kmpc_omp_taskyield: {
608 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
610 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
611 llvm::FunctionType *FnTy =
612 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
613 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
616 case OMPRTL__kmpc_single: {
617 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
618 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
619 llvm::FunctionType *FnTy =
620 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
621 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
624 case OMPRTL__kmpc_end_single: {
625 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
626 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
627 llvm::FunctionType *FnTy =
628 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
629 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
632 case OMPRTL__kmpc_omp_task_alloc: {
633 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
634 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
635 // kmp_routine_entry_t *task_entry);
636 assert(KmpRoutineEntryPtrTy != nullptr &&
637 "Type kmp_routine_entry_t must be created.");
638 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
639 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
640 // Return void * and then cast to particular kmp_task_t type.
641 llvm::FunctionType *FnTy =
642 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
643 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
646 case OMPRTL__kmpc_omp_task: {
647 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
649 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
651 llvm::FunctionType *FnTy =
652 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
653 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
656 case OMPRTL__kmpc_copyprivate: {
657 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
658 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
660 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
662 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
663 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
664 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
666 llvm::FunctionType *FnTy =
667 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
668 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
671 case OMPRTL__kmpc_reduce: {
672 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
673 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
674 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
675 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
676 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
678 llvm::Type *TypeParams[] = {
679 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
680 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
681 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
682 llvm::FunctionType *FnTy =
683 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
684 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
687 case OMPRTL__kmpc_reduce_nowait: {
688 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
689 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
690 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
692 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
693 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
695 llvm::Type *TypeParams[] = {
696 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
697 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
698 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
699 llvm::FunctionType *FnTy =
700 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
701 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
704 case OMPRTL__kmpc_end_reduce: {
705 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
706 // kmp_critical_name *lck);
707 llvm::Type *TypeParams[] = {
708 getIdentTyPointerTy(), CGM.Int32Ty,
709 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
710 llvm::FunctionType *FnTy =
711 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
712 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
715 case OMPRTL__kmpc_end_reduce_nowait: {
716 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
717 // kmp_critical_name *lck);
718 llvm::Type *TypeParams[] = {
719 getIdentTyPointerTy(), CGM.Int32Ty,
720 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
721 llvm::FunctionType *FnTy =
722 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
724 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
727 case OMPRTL__kmpc_omp_task_begin_if0: {
728 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
730 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
732 llvm::FunctionType *FnTy =
733 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
735 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
738 case OMPRTL__kmpc_omp_task_complete_if0: {
739 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
741 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
743 llvm::FunctionType *FnTy =
744 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
745 RTLFn = CGM.CreateRuntimeFunction(FnTy,
746 /*Name=*/"__kmpc_omp_task_complete_if0");
749 case OMPRTL__kmpc_ordered: {
750 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
751 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
752 llvm::FunctionType *FnTy =
753 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
754 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
757 case OMPRTL__kmpc_end_ordered: {
758 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
759 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
760 llvm::FunctionType *FnTy =
761 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
762 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
765 case OMPRTL__kmpc_omp_taskwait: {
766 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
767 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
768 llvm::FunctionType *FnTy =
769 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
770 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
773 case OMPRTL__kmpc_taskgroup: {
774 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
775 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
776 llvm::FunctionType *FnTy =
777 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
778 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
781 case OMPRTL__kmpc_end_taskgroup: {
782 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
783 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
784 llvm::FunctionType *FnTy =
785 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
786 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
789 case OMPRTL__kmpc_push_proc_bind: {
790 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
792 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
793 llvm::FunctionType *FnTy =
794 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
795 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
798 case OMPRTL__kmpc_omp_task_with_deps: {
799 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
800 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
801 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
802 llvm::Type *TypeParams[] = {
803 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
804 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
805 llvm::FunctionType *FnTy =
806 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
808 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
811 case OMPRTL__kmpc_omp_wait_deps: {
812 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
813 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
814 // kmp_depend_info_t *noalias_dep_list);
815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
816 CGM.Int32Ty, CGM.VoidPtrTy,
817 CGM.Int32Ty, CGM.VoidPtrTy};
818 llvm::FunctionType *FnTy =
819 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
820 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
823 case OMPRTL__kmpc_cancellationpoint: {
824 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
825 // global_tid, kmp_int32 cncl_kind)
826 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
827 llvm::FunctionType *FnTy =
828 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
829 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
832 case OMPRTL__kmpc_cancel: {
833 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
834 // kmp_int32 cncl_kind)
835 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
836 llvm::FunctionType *FnTy =
837 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
838 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
845 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
847 assert((IVSize == 32 || IVSize == 64) &&
848 "IV size is not compatible with the omp runtime");
849 auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
850 : "__kmpc_for_static_init_4u")
851 : (IVSigned ? "__kmpc_for_static_init_8"
852 : "__kmpc_for_static_init_8u");
853 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
854 auto PtrTy = llvm::PointerType::getUnqual(ITy);
855 llvm::Type *TypeParams[] = {
856 getIdentTyPointerTy(), // loc
858 CGM.Int32Ty, // schedtype
859 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
866 llvm::FunctionType *FnTy =
867 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
868 return CGM.CreateRuntimeFunction(FnTy, Name);
871 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
873 assert((IVSize == 32 || IVSize == 64) &&
874 "IV size is not compatible with the omp runtime");
877 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
878 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
879 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
880 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
882 CGM.Int32Ty, // schedtype
888 llvm::FunctionType *FnTy =
889 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
890 return CGM.CreateRuntimeFunction(FnTy, Name);
893 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
895 assert((IVSize == 32 || IVSize == 64) &&
896 "IV size is not compatible with the omp runtime");
899 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
900 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
901 llvm::Type *TypeParams[] = {
902 getIdentTyPointerTy(), // loc
905 llvm::FunctionType *FnTy =
906 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
907 return CGM.CreateRuntimeFunction(FnTy, Name);
910 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
912 assert((IVSize == 32 || IVSize == 64) &&
913 "IV size is not compatible with the omp runtime");
916 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
917 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
918 auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
919 auto PtrTy = llvm::PointerType::getUnqual(ITy);
920 llvm::Type *TypeParams[] = {
921 getIdentTyPointerTy(), // loc
923 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
928 llvm::FunctionType *FnTy =
929 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
930 return CGM.CreateRuntimeFunction(FnTy, Name);
934 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
935 assert(!CGM.getLangOpts().OpenMPUseTLS ||
936 !CGM.getContext().getTargetInfo().isTLSSupported());
937 // Lookup the entry, lazily creating it if necessary.
938 return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
939 Twine(CGM.getMangledName(VD)) + ".cache.");
942 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
945 SourceLocation Loc) {
946 if (CGM.getLangOpts().OpenMPUseTLS &&
947 CGM.getContext().getTargetInfo().isTLSSupported())
950 auto VarTy = VDAddr->getType()->getPointerElementType();
951 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
952 CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
953 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
954 getOrCreateThreadPrivateCache(VD)};
955 return CGF.EmitRuntimeCall(
956 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
959 void CGOpenMPRuntime::emitThreadPrivateVarInit(
960 CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
961 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
962 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
964 auto OMPLoc = emitUpdateLocation(CGF, Loc);
965 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
967 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
968 // to register constructor/destructor for variable.
969 llvm::Value *Args[] = {OMPLoc,
970 CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
971 Ctor, CopyCtor, Dtor};
973 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
976 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
977 const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
978 bool PerformInit, CodeGenFunction *CGF) {
979 if (CGM.getLangOpts().OpenMPUseTLS &&
980 CGM.getContext().getTargetInfo().isTLSSupported())
983 VD = VD->getDefinition(CGM.getContext());
984 if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
985 ThreadPrivateWithDefinition.insert(VD);
986 QualType ASTTy = VD->getType();
988 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
989 auto Init = VD->getAnyInitializer();
990 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
991 // Generate function that re-emits the declaration's initializer into the
992 // threadprivate copy of the variable VD
993 CodeGenFunction CtorCGF(CGM);
994 FunctionArgList Args;
995 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
996 /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
997 Args.push_back(&Dst);
999 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1000 CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1001 /*isVariadic=*/false);
1002 auto FTy = CGM.getTypes().GetFunctionType(FI);
1003 auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1004 FTy, ".__kmpc_global_ctor_.", Loc);
1005 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1006 Args, SourceLocation());
1007 auto ArgVal = CtorCGF.EmitLoadOfScalar(
1008 CtorCGF.GetAddrOfLocalVar(&Dst),
1009 /*Volatile=*/false, CGM.PointerAlignInBytes,
1010 CGM.getContext().VoidPtrTy, Dst.getLocation());
1011 auto Arg = CtorCGF.Builder.CreatePointerCast(
1013 CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
1014 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1015 /*IsInitializer=*/true);
1016 ArgVal = CtorCGF.EmitLoadOfScalar(
1017 CtorCGF.GetAddrOfLocalVar(&Dst),
1018 /*Volatile=*/false, CGM.PointerAlignInBytes,
1019 CGM.getContext().VoidPtrTy, Dst.getLocation());
1020 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1021 CtorCGF.FinishFunction();
1024 if (VD->getType().isDestructedType() != QualType::DK_none) {
1025 // Generate function that emits destructor call for the threadprivate copy
1026 // of the variable VD
1027 CodeGenFunction DtorCGF(CGM);
1028 FunctionArgList Args;
1029 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1030 /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1031 Args.push_back(&Dst);
1033 auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1034 CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1035 /*isVariadic=*/false);
1036 auto FTy = CGM.getTypes().GetFunctionType(FI);
1037 auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1038 FTy, ".__kmpc_global_dtor_.", Loc);
1039 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1041 auto ArgVal = DtorCGF.EmitLoadOfScalar(
1042 DtorCGF.GetAddrOfLocalVar(&Dst),
1043 /*Volatile=*/false, CGM.PointerAlignInBytes,
1044 CGM.getContext().VoidPtrTy, Dst.getLocation());
1045 DtorCGF.emitDestroy(ArgVal, ASTTy,
1046 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1047 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1048 DtorCGF.FinishFunction();
1051 // Do not emit init function if it is not required.
1055 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1057 llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1058 /*isVarArg=*/false)->getPointerTo();
1059 // Copying constructor for the threadprivate variable.
1060 // Must be NULL - reserved by runtime, but currently it requires that this
1061 // parameter is always NULL. Otherwise it fires assertion.
1062 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1063 if (Ctor == nullptr) {
1064 auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1065 /*isVarArg=*/false)->getPointerTo();
1066 Ctor = llvm::Constant::getNullValue(CtorTy);
1068 if (Dtor == nullptr) {
1069 auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1070 /*isVarArg=*/false)->getPointerTo();
1071 Dtor = llvm::Constant::getNullValue(DtorTy);
1074 auto InitFunctionTy =
1075 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1076 auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1077 InitFunctionTy, ".__omp_threadprivate_init_.");
1078 CodeGenFunction InitCGF(CGM);
1079 FunctionArgList ArgList;
1080 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1081 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1083 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1084 InitCGF.FinishFunction();
1085 return InitFunction;
1087 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1092 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1093 /// function. Here is the logic:
1099 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1100 const RegionCodeGenTy &ThenGen,
1101 const RegionCodeGenTy &ElseGen) {
1102 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1104 // If the condition constant folds and can be elided, try to avoid emitting
1105 // the condition and the dead arm of the if/else.
1107 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1108 CodeGenFunction::RunCleanupsScope Scope(CGF);
1117 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1118 // emit the conditional branch.
1119 auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1120 auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1121 auto ContBlock = CGF.createBasicBlock("omp_if.end");
1122 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1124 // Emit the 'then' code.
1125 CGF.EmitBlock(ThenBlock);
1127 CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1130 CGF.EmitBranch(ContBlock);
1131 // Emit the 'else' code if present.
1133 // There is no need to emit line number for unconditional branch.
1134 auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1135 CGF.EmitBlock(ElseBlock);
1138 CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1142 // There is no need to emit line number for unconditional branch.
1143 auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1144 CGF.EmitBranch(ContBlock);
1146 // Emit the continuation block for code after the if.
1147 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1150 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1151 llvm::Value *OutlinedFn,
1152 llvm::Value *CapturedStruct,
1153 const Expr *IfCond) {
1154 auto *RTLoc = emitUpdateLocation(CGF, Loc);
1156 [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
1157 // Build call __kmpc_fork_call(loc, 1, microtask,
1158 // captured_struct/*context*/)
1159 llvm::Value *Args[] = {
1161 CGF.Builder.getInt32(
1162 1), // Number of arguments after 'microtask' argument
1163 // (there is only one additional argument - 'context')
1164 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
1165 CGF.EmitCastToVoidPtr(CapturedStruct)};
1166 auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1167 CGF.EmitRuntimeCall(RTLFn, Args);
1169 auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
1170 CodeGenFunction &CGF) {
1171 auto ThreadID = getThreadID(CGF, Loc);
1173 // __kmpc_serialized_parallel(&Loc, GTid);
1174 llvm::Value *Args[] = {RTLoc, ThreadID};
1175 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1178 // OutlinedFn(>id, &zero, CapturedStruct);
1179 auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1180 auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
1182 auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
1183 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1184 llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
1185 CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1187 // __kmpc_end_serialized_parallel(&Loc, GTid);
1188 llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1189 CGF.EmitRuntimeCall(
1190 createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1193 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1195 CodeGenFunction::RunCleanupsScope Scope(CGF);
1200 // If we're inside an (outlined) parallel region, use the region info's
1201 // thread-ID variable (it is passed in a first argument of the outlined function
1202 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1203 // regular serial code region, get thread ID by calling kmp_int32
1204 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1205 // return the address of that temp.
1206 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1207 SourceLocation Loc) {
1208 if (auto OMPRegionInfo =
1209 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1210 if (OMPRegionInfo->getThreadIDVariable())
1211 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1213 auto ThreadID = getThreadID(CGF, Loc);
1215 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1216 auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1217 CGF.EmitStoreOfScalar(ThreadID,
1218 CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
1220 return ThreadIDTemp;
1224 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1225 const llvm::Twine &Name) {
1226 SmallString<256> Buffer;
1227 llvm::raw_svector_ostream Out(Buffer);
1229 auto RuntimeName = Out.str();
1230 auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1232 assert(Elem.second->getType()->getPointerElementType() == Ty &&
1233 "OMP internal variable has different type than requested");
1234 return &*Elem.second;
1237 return Elem.second = new llvm::GlobalVariable(
1238 CGM.getModule(), Ty, /*IsConstant*/ false,
1239 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1243 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1244 llvm::Twine Name(".gomp_critical_user_", CriticalName);
1245 return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1249 template <size_t N> class CallEndCleanup : public EHScopeStack::Cleanup {
1250 llvm::Value *Callee;
1251 llvm::Value *Args[N];
1254 CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1256 assert(CleanupArgs.size() == N);
1257 std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1259 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1260 CGF.EmitRuntimeCall(Callee, Args);
1265 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1266 StringRef CriticalName,
1267 const RegionCodeGenTy &CriticalOpGen,
1268 SourceLocation Loc) {
1269 // __kmpc_critical(ident_t *, gtid, Lock);
1271 // __kmpc_end_critical(ident_t *, gtid, Lock);
1272 // Prepare arguments and build a call to __kmpc_critical
1274 CodeGenFunction::RunCleanupsScope Scope(CGF);
1275 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1276 getCriticalRegionLock(CriticalName)};
1277 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1278 // Build a call to __kmpc_end_critical
1279 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1280 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1281 llvm::makeArrayRef(Args));
1282 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1286 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1287 OpenMPDirectiveKind Kind,
1288 const RegionCodeGenTy &BodyOpGen) {
1289 llvm::Value *CallBool = CGF.EmitScalarConversion(
1291 CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1292 CGF.getContext().BoolTy);
1294 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1295 auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1296 // Generate the branch (If-stmt)
1297 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1298 CGF.EmitBlock(ThenBlock);
1299 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1300 // Emit the rest of bblocks/branches
1301 CGF.EmitBranch(ContBlock);
1302 CGF.EmitBlock(ContBlock, true);
1305 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1306 const RegionCodeGenTy &MasterOpGen,
1307 SourceLocation Loc) {
1308 // if(__kmpc_master(ident_t *, gtid)) {
1310 // __kmpc_end_master(ident_t *, gtid);
1312 // Prepare arguments and build a call to __kmpc_master
1313 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1315 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1316 typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1317 MasterCallEndCleanup;
1318 emitIfStmt(CGF, IsMaster, OMPD_master, [&](CodeGenFunction &CGF) -> void {
1319 CodeGenFunction::RunCleanupsScope Scope(CGF);
1320 CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1321 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1322 llvm::makeArrayRef(Args));
1327 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1328 SourceLocation Loc) {
1329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1330 llvm::Value *Args[] = {
1331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1333 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1337 const RegionCodeGenTy &TaskgroupOpGen,
1338 SourceLocation Loc) {
1339 // __kmpc_taskgroup(ident_t *, gtid);
1340 // TaskgroupOpGen();
1341 // __kmpc_end_taskgroup(ident_t *, gtid);
1342 // Prepare arguments and build a call to __kmpc_taskgroup
1344 CodeGenFunction::RunCleanupsScope Scope(CGF);
1345 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1346 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1347 // Build a call to __kmpc_end_taskgroup
1348 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1349 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1350 llvm::makeArrayRef(Args));
1351 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1355 static llvm::Value *emitCopyprivateCopyFunction(
1356 CodeGenModule &CGM, llvm::Type *ArgsType,
1357 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1358 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1359 auto &C = CGM.getContext();
1360 // void copy_func(void *LHSArg, void *RHSArg);
1361 FunctionArgList Args;
1362 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1364 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1366 Args.push_back(&LHSArg);
1367 Args.push_back(&RHSArg);
1368 FunctionType::ExtInfo EI;
1369 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1370 C.VoidTy, Args, EI, /*isVariadic=*/false);
1371 auto *Fn = llvm::Function::Create(
1372 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1373 ".omp.copyprivate.copy_func", &CGM.getModule());
1374 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1375 CodeGenFunction CGF(CGM);
1376 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1377 // Dest = (void*[n])(LHSArg);
1378 // Src = (void*[n])(RHSArg);
1379 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1380 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1381 CGF.PointerAlignInBytes),
1383 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1384 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1385 CGF.PointerAlignInBytes),
1387 // *(Type0*)Dst[0] = *(Type0*)Src[0];
1388 // *(Type1*)Dst[1] = *(Type1*)Src[1];
1390 // *(Typen*)Dst[n] = *(Typen*)Src[n];
1391 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1392 auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1393 CGF.Builder.CreateAlignedLoad(
1394 CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1395 CGM.PointerAlignInBytes),
1396 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1397 auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1398 CGF.Builder.CreateAlignedLoad(
1399 CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1400 CGM.PointerAlignInBytes),
1401 CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1402 auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1403 QualType Type = VD->getType();
1404 CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
1405 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1406 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1409 CGF.FinishFunction();
1413 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1414 const RegionCodeGenTy &SingleOpGen,
1416 ArrayRef<const Expr *> CopyprivateVars,
1417 ArrayRef<const Expr *> SrcExprs,
1418 ArrayRef<const Expr *> DstExprs,
1419 ArrayRef<const Expr *> AssignmentOps) {
1420 assert(CopyprivateVars.size() == SrcExprs.size() &&
1421 CopyprivateVars.size() == DstExprs.size() &&
1422 CopyprivateVars.size() == AssignmentOps.size());
1423 auto &C = CGM.getContext();
1424 // int32 did_it = 0;
1425 // if(__kmpc_single(ident_t *, gtid)) {
1427 // __kmpc_end_single(ident_t *, gtid);
1430 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1431 // <copy_func>, did_it);
1433 llvm::AllocaInst *DidIt = nullptr;
1434 if (!CopyprivateVars.empty()) {
1435 // int32 did_it = 0;
1436 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1437 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1438 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
1439 DidIt->getAlignment());
1441 // Prepare arguments and build a call to __kmpc_single
1442 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1444 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1445 typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1446 SingleCallEndCleanup;
1447 emitIfStmt(CGF, IsSingle, OMPD_single, [&](CodeGenFunction &CGF) -> void {
1448 CodeGenFunction::RunCleanupsScope Scope(CGF);
1449 CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1450 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1451 llvm::makeArrayRef(Args));
1455 CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1456 DidIt->getAlignment());
1459 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1460 // <copy_func>, did_it);
1462 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1463 auto CopyprivateArrayTy =
1464 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1465 /*IndexTypeQuals=*/0);
1466 // Create a list of all private variables for copyprivate.
1467 auto *CopyprivateList =
1468 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1469 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1470 auto *Elem = CGF.Builder.CreateStructGEP(
1471 CopyprivateList->getAllocatedType(), CopyprivateList, I);
1472 CGF.Builder.CreateAlignedStore(
1473 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1474 CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1475 Elem, CGM.PointerAlignInBytes);
1477 // Build function that copies private values from single region to all other
1478 // threads in the corresponding parallel region.
1479 auto *CpyFn = emitCopyprivateCopyFunction(
1480 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1481 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1482 auto *BufSize = llvm::ConstantInt::get(
1483 CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1484 auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1487 CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1488 llvm::Value *Args[] = {
1489 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1490 getThreadID(CGF, Loc), // i32 <gtid>
1491 BufSize, // size_t <buf_size>
1492 CL, // void *<copyprivate list>
1493 CpyFn, // void (*) (void *, void *) <copy_func>
1494 DidItVal // i32 did_it
1496 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1500 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1501 const RegionCodeGenTy &OrderedOpGen,
1502 SourceLocation Loc) {
1503 // __kmpc_ordered(ident_t *, gtid);
1505 // __kmpc_end_ordered(ident_t *, gtid);
1506 // Prepare arguments and build a call to __kmpc_ordered
1508 CodeGenFunction::RunCleanupsScope Scope(CGF);
1509 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1510 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1511 // Build a call to __kmpc_end_ordered
1512 CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1513 NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1514 llvm::makeArrayRef(Args));
1515 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1519 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1520 OpenMPDirectiveKind Kind,
1521 bool CheckForCancel) {
1522 // Build call __kmpc_cancel_barrier(loc, thread_id);
1523 // Build call __kmpc_barrier(loc, thread_id);
1524 OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1525 if (Kind == OMPD_for) {
1527 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1528 } else if (Kind == OMPD_sections) {
1529 Flags = static_cast<OpenMPLocationFlags>(Flags |
1530 OMP_IDENT_BARRIER_IMPL_SECTIONS);
1531 } else if (Kind == OMPD_single) {
1533 static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1534 } else if (Kind == OMPD_barrier) {
1535 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1537 Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1539 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1541 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1542 getThreadID(CGF, Loc)};
1543 if (auto *OMPRegionInfo =
1544 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1545 auto CancelDestination =
1546 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1547 if (CancelDestination.isValid()) {
1548 auto *Result = CGF.EmitRuntimeCall(
1549 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1550 if (CheckForCancel) {
1551 // if (__kmpc_cancel_barrier()) {
1552 // exit from construct;
1554 auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1555 auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1556 auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1557 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1558 CGF.EmitBlock(ExitBB);
1559 // exit from construct;
1560 CGF.EmitBranchThroughCleanup(CancelDestination);
1561 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1566 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1569 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1570 /// the enum sched_type in kmp.h).
1571 enum OpenMPSchedType {
1572 /// \brief Lower bound for default (unordered) versions.
1574 OMP_sch_static_chunked = 33,
1575 OMP_sch_static = 34,
1576 OMP_sch_dynamic_chunked = 35,
1577 OMP_sch_guided_chunked = 36,
1578 OMP_sch_runtime = 37,
1580 /// \brief Lower bound for 'ordered' versions.
1582 OMP_ord_static_chunked = 65,
1583 OMP_ord_static = 66,
1584 OMP_ord_dynamic_chunked = 67,
1585 OMP_ord_guided_chunked = 68,
1586 OMP_ord_runtime = 69,
1588 OMP_sch_default = OMP_sch_static,
1591 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1592 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1593 bool Chunked, bool Ordered) {
1594 switch (ScheduleKind) {
1595 case OMPC_SCHEDULE_static:
1596 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1597 : (Ordered ? OMP_ord_static : OMP_sch_static);
1598 case OMPC_SCHEDULE_dynamic:
1599 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1600 case OMPC_SCHEDULE_guided:
1601 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1602 case OMPC_SCHEDULE_runtime:
1603 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1604 case OMPC_SCHEDULE_auto:
1605 return Ordered ? OMP_ord_auto : OMP_sch_auto;
1606 case OMPC_SCHEDULE_unknown:
1607 assert(!Chunked && "chunk was specified but schedule kind not known");
1608 return Ordered ? OMP_ord_static : OMP_sch_static;
1610 llvm_unreachable("Unexpected runtime schedule");
1613 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1614 bool Chunked) const {
1615 auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1616 return Schedule == OMP_sch_static;
1619 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1621 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1622 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1623 return Schedule != OMP_sch_static;
1626 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1627 OpenMPScheduleClauseKind ScheduleKind,
1628 unsigned IVSize, bool IVSigned, bool Ordered,
1629 llvm::Value *IL, llvm::Value *LB,
1630 llvm::Value *UB, llvm::Value *ST,
1631 llvm::Value *Chunk) {
1632 OpenMPSchedType Schedule =
1633 getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1635 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1636 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
1637 // Call __kmpc_dispatch_init(
1638 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1639 // kmp_int[32|64] lower, kmp_int[32|64] upper,
1640 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
1642 // If the Chunk was not specified in the clause - use default value 1.
1643 if (Chunk == nullptr)
1644 Chunk = CGF.Builder.getIntN(IVSize, 1);
1645 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1646 getThreadID(CGF, Loc),
1647 CGF.Builder.getInt32(Schedule), // Schedule type
1648 CGF.Builder.getIntN(IVSize, 0), // Lower
1650 CGF.Builder.getIntN(IVSize, 1), // Stride
1653 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1655 // Call __kmpc_for_static_init(
1656 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1657 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1658 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1659 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
1660 if (Chunk == nullptr) {
1661 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1662 "expected static non-chunked schedule");
1663 // If the Chunk was not specified in the clause - use default value 1.
1664 Chunk = CGF.Builder.getIntN(IVSize, 1);
1666 assert((Schedule == OMP_sch_static_chunked ||
1667 Schedule == OMP_ord_static_chunked) &&
1668 "expected static chunked schedule");
1669 llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1670 getThreadID(CGF, Loc),
1671 CGF.Builder.getInt32(Schedule), // Schedule type
1676 CGF.Builder.getIntN(IVSize, 1), // Incr
1679 CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1683 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1684 SourceLocation Loc) {
1685 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1686 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1687 getThreadID(CGF, Loc)};
1688 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1692 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1696 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1697 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1698 getThreadID(CGF, Loc)};
1699 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1702 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1703 SourceLocation Loc, unsigned IVSize,
1704 bool IVSigned, llvm::Value *IL,
1705 llvm::Value *LB, llvm::Value *UB,
1707 // Call __kmpc_dispatch_next(
1708 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1709 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1710 // kmp_int[32|64] *p_stride);
1711 llvm::Value *Args[] = {
1712 emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1719 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1720 return CGF.EmitScalarConversion(
1721 Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1722 CGF.getContext().BoolTy);
1725 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1726 llvm::Value *NumThreads,
1727 SourceLocation Loc) {
1728 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1729 llvm::Value *Args[] = {
1730 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1731 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1732 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1736 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1737 OpenMPProcBindClauseKind ProcBind,
1738 SourceLocation Loc) {
1739 // Constants for proc bind value accepted by the runtime.
1750 case OMPC_PROC_BIND_master:
1751 RuntimeProcBind = ProcBindMaster;
1753 case OMPC_PROC_BIND_close:
1754 RuntimeProcBind = ProcBindClose;
1756 case OMPC_PROC_BIND_spread:
1757 RuntimeProcBind = ProcBindSpread;
1759 case OMPC_PROC_BIND_unknown:
1760 llvm_unreachable("Unsupported proc_bind value.");
1762 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1763 llvm::Value *Args[] = {
1764 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1765 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1766 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1769 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1770 SourceLocation Loc) {
1771 // Build call void __kmpc_flush(ident_t *loc)
1772 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1773 emitUpdateLocation(CGF, Loc));
1777 /// \brief Indexes of fields for type kmp_task_t.
1778 enum KmpTaskTFields {
1779 /// \brief List of shared variables.
1781 /// \brief Task routine.
1783 /// \brief Partition id for the untied tasks.
1785 /// \brief Function with call of destructors for private variables.
1786 KmpTaskTDestructors,
1790 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1791 if (!KmpRoutineEntryPtrTy) {
1792 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1793 auto &C = CGM.getContext();
1794 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1795 FunctionProtoType::ExtProtoInfo EPI;
1796 KmpRoutineEntryPtrQTy = C.getPointerType(
1797 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1798 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1802 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1804 auto *Field = FieldDecl::Create(
1805 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1806 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1807 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1808 Field->setAccess(AS_public);
1813 struct PrivateHelpersTy {
1814 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1815 const VarDecl *PrivateElemInit)
1816 : Original(Original), PrivateCopy(PrivateCopy),
1817 PrivateElemInit(PrivateElemInit) {}
1818 const VarDecl *Original;
1819 const VarDecl *PrivateCopy;
1820 const VarDecl *PrivateElemInit;
1822 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1826 createPrivatesRecordDecl(CodeGenModule &CGM,
1827 const ArrayRef<PrivateDataTy> Privates) {
1828 if (!Privates.empty()) {
1829 auto &C = CGM.getContext();
1830 // Build struct .kmp_privates_t. {
1831 // /* private vars */
1833 auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1834 RD->startDefinition();
1835 for (auto &&Pair : Privates) {
1836 auto Type = Pair.second.Original->getType();
1837 Type = Type.getNonReferenceType();
1838 addFieldToRecordDecl(C, RD, Type);
1840 RD->completeDefinition();
1847 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1848 QualType KmpRoutineEntryPointerQTy) {
1849 auto &C = CGM.getContext();
1850 // Build struct kmp_task_t {
1852 // kmp_routine_entry_t routine;
1853 // kmp_int32 part_id;
1854 // kmp_routine_entry_t destructors;
1856 auto *RD = C.buildImplicitRecord("kmp_task_t");
1857 RD->startDefinition();
1858 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1859 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1860 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1861 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1862 RD->completeDefinition();
1867 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
1868 const ArrayRef<PrivateDataTy> Privates) {
1869 auto &C = CGM.getContext();
1870 // Build struct kmp_task_t_with_privates {
1871 // kmp_task_t task_data;
1872 // .kmp_privates_t. privates;
1874 auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
1875 RD->startDefinition();
1876 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
1877 if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1878 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1880 RD->completeDefinition();
1884 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1887 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1888 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
1893 static llvm::Value *
1894 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1895 QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
1896 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
1897 QualType SharedsPtrTy, llvm::Value *TaskFunction,
1898 llvm::Value *TaskPrivatesMap) {
1899 auto &C = CGM.getContext();
1900 FunctionArgList Args;
1901 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1902 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1903 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1904 Args.push_back(&GtidArg);
1905 Args.push_back(&TaskTypeArg);
1906 FunctionType::ExtInfo Info;
1907 auto &TaskEntryFnInfo =
1908 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1909 /*isVariadic=*/false);
1910 auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1912 llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1913 ".omp_task_entry.", &CGM.getModule());
1914 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1915 CodeGenFunction CGF(CGM);
1916 CGF.disableDebugInfo();
1917 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1919 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
1920 // tt->task_data.shareds);
1921 auto *GtidParam = CGF.EmitLoadOfScalar(
1922 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1923 C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1924 auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1925 CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1927 CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1928 auto *KmpTaskTWithPrivatesQTyRD =
1929 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1931 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
1932 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
1933 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
1934 auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
1935 auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
1937 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
1938 auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
1939 auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1940 CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
1941 CGF.ConvertTypeForMem(SharedsPtrTy));
1943 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
1944 llvm::Value *PrivatesParam;
1945 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
1946 auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
1947 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1948 PrivatesLVal.getAddress(), CGF.VoidPtrTy);
1950 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
1953 llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
1954 TaskPrivatesMap, SharedsParam};
1955 CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1956 CGF.EmitStoreThroughLValue(
1957 RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1958 CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1959 CGF.FinishFunction();
1963 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
1965 QualType KmpInt32Ty,
1966 QualType KmpTaskTWithPrivatesPtrQTy,
1967 QualType KmpTaskTWithPrivatesQTy) {
1968 auto &C = CGM.getContext();
1969 FunctionArgList Args;
1970 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1971 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1972 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1973 Args.push_back(&GtidArg);
1974 Args.push_back(&TaskTypeArg);
1975 FunctionType::ExtInfo Info;
1976 auto &DestructorFnInfo =
1977 CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1978 /*isVariadic=*/false);
1979 auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
1980 auto *DestructorFn =
1981 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
1982 ".omp_task_destructor.", &CGM.getModule());
1983 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
1984 CodeGenFunction CGF(CGM);
1985 CGF.disableDebugInfo();
1986 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
1989 auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1990 CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1992 CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1993 auto *KmpTaskTWithPrivatesQTyRD =
1994 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1995 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
1996 Base = CGF.EmitLValueForField(Base, *FI);
1998 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
1999 if (auto DtorKind = Field->getType().isDestructedType()) {
2000 auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2001 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2004 CGF.FinishFunction();
2005 return DestructorFn;
2008 /// \brief Emit a privates mapping function for correct handling of private and
2009 /// firstprivate variables.
2011 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2012 /// **noalias priv1,..., <tyn> **noalias privn) {
2013 /// *priv1 = &.privates.priv1;
2015 /// *privn = &.privates.privn;
2018 static llvm::Value *
2019 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2020 const ArrayRef<const Expr *> PrivateVars,
2021 const ArrayRef<const Expr *> FirstprivateVars,
2022 QualType PrivatesQTy,
2023 const ArrayRef<PrivateDataTy> Privates) {
2024 auto &C = CGM.getContext();
2025 FunctionArgList Args;
2026 ImplicitParamDecl TaskPrivatesArg(
2027 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2028 C.getPointerType(PrivatesQTy).withConst().withRestrict());
2029 Args.push_back(&TaskPrivatesArg);
2030 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2031 unsigned Counter = 1;
2032 for (auto *E: PrivateVars) {
2033 Args.push_back(ImplicitParamDecl::Create(
2034 C, /*DC=*/nullptr, Loc,
2035 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2038 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2039 PrivateVarsPos[VD] = Counter;
2042 for (auto *E : FirstprivateVars) {
2043 Args.push_back(ImplicitParamDecl::Create(
2044 C, /*DC=*/nullptr, Loc,
2045 /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2048 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2049 PrivateVarsPos[VD] = Counter;
2052 FunctionType::ExtInfo Info;
2053 auto &TaskPrivatesMapFnInfo =
2054 CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2055 /*isVariadic=*/false);
2056 auto *TaskPrivatesMapTy =
2057 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2058 auto *TaskPrivatesMap = llvm::Function::Create(
2059 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2060 ".omp_task_privates_map.", &CGM.getModule());
2061 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
2063 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2064 CodeGenFunction CGF(CGM);
2065 CGF.disableDebugInfo();
2066 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2067 TaskPrivatesMapFnInfo, Args);
2069 // *privi = &.privates.privi;
2070 auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
2071 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
2073 CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
2074 auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2076 for (auto *Field : PrivatesQTyRD->fields()) {
2077 auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2078 auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2079 auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
2081 auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
2082 CGF.EmitStoreOfScalar(
2083 FieldLVal.getAddress(),
2084 CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
2085 RefLVal.getType()->getPointeeType()));
2088 CGF.FinishFunction();
2089 return TaskPrivatesMap;
2092 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2093 const PrivateDataTy *P2) {
2094 return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2097 void CGOpenMPRuntime::emitTaskCall(
2098 CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2099 bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2100 llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
2101 const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2102 ArrayRef<const Expr *> PrivateCopies,
2103 ArrayRef<const Expr *> FirstprivateVars,
2104 ArrayRef<const Expr *> FirstprivateCopies,
2105 ArrayRef<const Expr *> FirstprivateInits,
2106 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2107 auto &C = CGM.getContext();
2108 llvm::SmallVector<PrivateDataTy, 8> Privates;
2109 // Aggregate privates and sort them by the alignment.
2110 auto I = PrivateCopies.begin();
2111 for (auto *E : PrivateVars) {
2112 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2113 Privates.push_back(std::make_pair(
2114 C.getTypeAlignInChars(VD->getType()),
2115 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2116 /*PrivateElemInit=*/nullptr)));
2119 I = FirstprivateCopies.begin();
2120 auto IElemInitRef = FirstprivateInits.begin();
2121 for (auto *E : FirstprivateVars) {
2122 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2123 Privates.push_back(std::make_pair(
2124 C.getTypeAlignInChars(VD->getType()),
2126 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2127 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2128 ++I, ++IElemInitRef;
2130 llvm::array_pod_sort(Privates.begin(), Privates.end(),
2131 array_pod_sort_comparator);
2132 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2133 // Build type kmp_routine_entry_t (if not built yet).
2134 emitKmpRoutineEntryT(KmpInt32Ty);
2135 // Build type kmp_task_t (if not built yet).
2136 if (KmpTaskTQTy.isNull()) {
2137 KmpTaskTQTy = C.getRecordType(
2138 createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2140 auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2141 // Build particular struct kmp_task_t for the given task.
2142 auto *KmpTaskTWithPrivatesQTyRD =
2143 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2144 auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2145 QualType KmpTaskTWithPrivatesPtrQTy =
2146 C.getPointerType(KmpTaskTWithPrivatesQTy);
2147 auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2148 auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2149 auto KmpTaskTWithPrivatesTySize =
2150 CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
2151 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2153 // Emit initial values for private copies (if any).
2154 llvm::Value *TaskPrivatesMap = nullptr;
2155 auto *TaskPrivatesMapTy =
2156 std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2159 if (!Privates.empty()) {
2160 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2161 TaskPrivatesMap = emitTaskPrivateMappingFunction(
2162 CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2163 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2164 TaskPrivatesMap, TaskPrivatesMapTy);
2166 TaskPrivatesMap = llvm::ConstantPointerNull::get(
2167 cast<llvm::PointerType>(TaskPrivatesMapTy));
2169 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2171 auto *TaskEntry = emitProxyTaskFunction(
2172 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2173 KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2175 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2176 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2177 // kmp_routine_entry_t *task_entry);
2178 // Task flags. Format is taken from
2179 // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2180 // description of kmp_tasking_flags struct.
2181 const unsigned TiedFlag = 0x1;
2182 const unsigned FinalFlag = 0x2;
2183 unsigned Flags = Tied ? TiedFlag : 0;
2186 ? CGF.Builder.CreateSelect(Final.getPointer(),
2187 CGF.Builder.getInt32(FinalFlag),
2188 CGF.Builder.getInt32(/*C=*/0))
2189 : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2190 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2191 auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
2192 llvm::Value *AllocArgs[] = {
2193 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
2194 KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
2195 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
2196 KmpRoutineEntryPtrTy)};
2197 auto *NewTask = CGF.EmitRuntimeCall(
2198 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2199 auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2200 NewTask, KmpTaskTWithPrivatesPtrTy);
2201 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2202 KmpTaskTWithPrivatesQTy);
2204 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2205 // Fill the data in the resulting kmp_task_t record.
2206 // Copy shareds if there are any.
2207 llvm::Value *KmpTaskSharedsPtr = nullptr;
2208 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2209 KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
2210 CGF.EmitLValueForField(
2211 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
2213 CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2215 // Emit initial values for private copies (if any).
2216 bool NeedsCleanup = false;
2217 if (!Privates.empty()) {
2218 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2219 auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2220 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2222 if (!FirstprivateVars.empty()) {
2223 SharedsBase = CGF.MakeNaturalAlignAddrLValue(
2224 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2225 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2228 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2229 cast<CapturedStmt>(*D.getAssociatedStmt()));
2230 for (auto &&Pair : Privates) {
2231 auto *VD = Pair.second.PrivateCopy;
2232 auto *Init = VD->getAnyInitializer();
2233 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2235 if (auto *Elem = Pair.second.PrivateElemInit) {
2236 auto *OriginalVD = Pair.second.Original;
2237 auto *SharedField = CapturesInfo.lookup(OriginalVD);
2238 auto SharedRefLValue =
2239 CGF.EmitLValueForField(SharedsBase, SharedField);
2240 QualType Type = OriginalVD->getType();
2241 if (Type->isArrayType()) {
2242 // Initialize firstprivate array.
2243 if (!isa<CXXConstructExpr>(Init) ||
2244 CGF.isTrivialInitializer(Init)) {
2245 // Perform simple memcpy.
2246 CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2247 SharedRefLValue.getAddress(), Type);
2249 // Initialize firstprivate array using element-by-element
2251 CGF.EmitOMPAggregateAssign(
2252 PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2253 Type, [&CGF, Elem, Init, &CapturesInfo](
2254 llvm::Value *DestElement, llvm::Value *SrcElement) {
2255 // Clean up any temporaries needed by the initialization.
2256 CodeGenFunction::OMPPrivateScope InitScope(CGF);
2257 InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
2260 (void)InitScope.Privatize();
2261 // Emit initialization for single element.
2262 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2263 CGF, &CapturesInfo);
2264 CGF.EmitAnyExprToMem(Init, DestElement,
2265 Init->getType().getQualifiers(),
2266 /*IsInitializer=*/false);
2270 CodeGenFunction::OMPPrivateScope InitScope(CGF);
2271 InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
2272 return SharedRefLValue.getAddress();
2274 (void)InitScope.Privatize();
2275 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2276 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2277 /*capturedByInit=*/false);
2280 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2283 NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2287 // Provide pointer to function with destructors for privates.
2288 llvm::Value *DestructorFn =
2289 NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2290 KmpTaskTWithPrivatesPtrQTy,
2291 KmpTaskTWithPrivatesQTy)
2292 : llvm::ConstantPointerNull::get(
2293 cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2294 LValue Destructor = CGF.EmitLValueForField(
2295 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2296 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2297 DestructorFn, KmpRoutineEntryPtrTy),
2300 // Process list of dependences.
2301 llvm::Value *DependInfo = nullptr;
2302 unsigned DependencesNumber = Dependences.size();
2303 if (!Dependences.empty()) {
2304 // Dependence kind for RTL.
2305 enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
2306 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2307 RecordDecl *KmpDependInfoRD;
2308 QualType FlagsTy = C.getIntTypeForBitwidth(
2309 C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false);
2310 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2311 if (KmpDependInfoTy.isNull()) {
2312 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2313 KmpDependInfoRD->startDefinition();
2314 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2315 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2316 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2317 KmpDependInfoRD->completeDefinition();
2318 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2320 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2322 // Define type kmp_depend_info[<Dependences.size()>];
2323 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2324 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()),
2325 ArrayType::Normal, /*IndexTypeQuals=*/0);
2326 // kmp_depend_info[<Dependences.size()>] deps;
2327 DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2328 for (unsigned i = 0; i < DependencesNumber; ++i) {
2329 auto Addr = CGF.EmitLValue(Dependences[i].second);
2330 auto *Size = llvm::ConstantInt::get(
2332 C.getTypeSizeInChars(Dependences[i].second->getType()).getQuantity());
2333 auto Base = CGF.MakeNaturalAlignAddrLValue(
2334 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i),
2336 // deps[i].base_addr = &<Dependences[i].second>;
2337 auto BaseAddrLVal = CGF.EmitLValueForField(
2338 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2339 CGF.EmitStoreOfScalar(
2340 CGF.Builder.CreatePtrToInt(Addr.getAddress(), CGF.IntPtrTy),
2342 // deps[i].len = sizeof(<Dependences[i].second>);
2343 auto LenLVal = CGF.EmitLValueForField(
2344 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2345 CGF.EmitStoreOfScalar(Size, LenLVal);
2346 // deps[i].flags = <Dependences[i].first>;
2347 RTLDependenceKindTy DepKind;
2348 switch (Dependences[i].first) {
2349 case OMPC_DEPEND_in:
2352 case OMPC_DEPEND_out:
2355 case OMPC_DEPEND_inout:
2358 case OMPC_DEPEND_unknown:
2359 llvm_unreachable("Unknown task dependence type");
2361 auto FlagsLVal = CGF.EmitLValueForField(
2362 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2363 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2366 DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2367 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0),
2371 // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2373 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2375 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2376 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2377 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2378 // list is not empty
2379 auto *ThreadID = getThreadID(CGF, Loc);
2380 auto *UpLoc = emitUpdateLocation(CGF, Loc);
2381 llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
2382 llvm::Value *DepTaskArgs[] = {
2386 DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
2388 DependInfo ? CGF.Builder.getInt32(0) : nullptr,
2389 DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
2390 auto &&ThenCodeGen = [this, DependInfo, &TaskArgs,
2391 &DepTaskArgs](CodeGenFunction &CGF) {
2392 // TODO: add check for untied tasks.
2393 CGF.EmitRuntimeCall(
2394 createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps
2395 : OMPRTL__kmpc_omp_task),
2396 DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs));
2398 typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2400 llvm::Value *DepWaitTaskArgs[] = {
2403 DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
2405 DependInfo ? CGF.Builder.getInt32(0) : nullptr,
2406 DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
2407 auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2408 DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2409 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2410 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2411 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2412 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2415 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2417 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2418 // kmp_task_t *new_task);
2419 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2421 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2422 // kmp_task_t *new_task);
2423 CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2425 createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2426 llvm::makeArrayRef(TaskArgs));
2428 // Call proxy_task_entry(gtid, new_task);
2429 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2430 CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2433 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2435 CodeGenFunction::RunCleanupsScope Scope(CGF);
2440 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2441 llvm::Type *ArgsType,
2442 ArrayRef<const Expr *> LHSExprs,
2443 ArrayRef<const Expr *> RHSExprs,
2444 ArrayRef<const Expr *> ReductionOps) {
2445 auto &C = CGM.getContext();
2447 // void reduction_func(void *LHSArg, void *RHSArg);
2448 FunctionArgList Args;
2449 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2451 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2453 Args.push_back(&LHSArg);
2454 Args.push_back(&RHSArg);
2455 FunctionType::ExtInfo EI;
2456 auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2457 C.VoidTy, Args, EI, /*isVariadic=*/false);
2458 auto *Fn = llvm::Function::Create(
2459 CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2460 ".omp.reduction.reduction_func", &CGM.getModule());
2461 CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2462 CodeGenFunction CGF(CGM);
2463 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2465 // Dst = (void*[n])(LHSArg);
2466 // Src = (void*[n])(RHSArg);
2467 auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2468 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
2469 CGF.PointerAlignInBytes),
2471 auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2472 CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
2473 CGF.PointerAlignInBytes),
2477 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2479 CodeGenFunction::OMPPrivateScope Scope(CGF);
2480 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
2482 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
2483 [&]() -> llvm::Value *{
2484 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2485 CGF.Builder.CreateAlignedLoad(
2486 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
2487 CGM.PointerAlignInBytes),
2488 CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
2491 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
2492 [&]() -> llvm::Value *{
2493 return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2494 CGF.Builder.CreateAlignedLoad(
2495 CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
2496 CGM.PointerAlignInBytes),
2497 CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
2501 for (auto *E : ReductionOps) {
2502 CGF.EmitIgnoredExpr(E);
2504 Scope.ForceCleanup();
2505 CGF.FinishFunction();
2509 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2510 ArrayRef<const Expr *> LHSExprs,
2511 ArrayRef<const Expr *> RHSExprs,
2512 ArrayRef<const Expr *> ReductionOps,
2513 bool WithNowait, bool SimpleReduction) {
2514 // Next code should be emitted for reduction:
2516 // static kmp_critical_name lock = { 0 };
2518 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2519 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2521 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2522 // *(Type<n>-1*)rhs[<n>-1]);
2526 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2527 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2528 // RedList, reduce_func, &<lock>)) {
2531 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2533 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2537 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2539 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2544 // if SimpleReduction is true, only the next code is generated:
2546 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2549 auto &C = CGM.getContext();
2551 if (SimpleReduction) {
2552 CodeGenFunction::RunCleanupsScope Scope(CGF);
2553 for (auto *E : ReductionOps) {
2554 CGF.EmitIgnoredExpr(E);
2559 // 1. Build a list of reduction variables.
2560 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2561 llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2562 QualType ReductionArrayTy =
2563 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2564 /*IndexTypeQuals=*/0);
2565 auto *ReductionList =
2566 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2567 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2568 auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
2569 CGF.Builder.CreateAlignedStore(
2570 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2571 CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
2572 Elem, CGM.PointerAlignInBytes);
2575 // 2. Emit reduce_func().
2576 auto *ReductionFn = emitReductionFunction(
2577 CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2578 RHSExprs, ReductionOps);
2580 // 3. Create static kmp_critical_name lock = { 0 };
2581 auto *Lock = getCriticalRegionLock(".reduction");
2583 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2584 // RedList, reduce_func, &<lock>);
2585 auto *IdentTLoc = emitUpdateLocation(
2587 static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2588 auto *ThreadId = getThreadID(CGF, Loc);
2589 auto *ReductionArrayTySize = llvm::ConstantInt::get(
2590 CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2591 auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
2593 llvm::Value *Args[] = {
2594 IdentTLoc, // ident_t *<loc>
2595 ThreadId, // i32 <gtid>
2596 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2597 ReductionArrayTySize, // size_type sizeof(RedList)
2598 RL, // void *RedList
2599 ReductionFn, // void (*) (void *, void *) <reduce_func>
2600 Lock // kmp_critical_name *&<lock>
2602 auto Res = CGF.EmitRuntimeCall(
2603 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2604 : OMPRTL__kmpc_reduce),
2607 // 5. Build switch(res)
2608 auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2609 auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2613 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2615 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2617 auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2618 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2619 CGF.EmitBlock(Case1BB);
2622 CodeGenFunction::RunCleanupsScope Scope(CGF);
2623 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2624 llvm::Value *EndArgs[] = {
2625 IdentTLoc, // ident_t *<loc>
2626 ThreadId, // i32 <gtid>
2627 Lock // kmp_critical_name *&<lock>
2630 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2632 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2633 : OMPRTL__kmpc_end_reduce),
2634 llvm::makeArrayRef(EndArgs));
2635 for (auto *E : ReductionOps) {
2636 CGF.EmitIgnoredExpr(E);
2640 CGF.EmitBranch(DefaultBB);
2644 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2647 auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2648 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2649 CGF.EmitBlock(Case2BB);
2652 CodeGenFunction::RunCleanupsScope Scope(CGF);
2654 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2655 llvm::Value *EndArgs[] = {
2656 IdentTLoc, // ident_t *<loc>
2657 ThreadId, // i32 <gtid>
2658 Lock // kmp_critical_name *&<lock>
2661 .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2663 createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2664 llvm::makeArrayRef(EndArgs));
2666 auto I = LHSExprs.begin();
2667 for (auto *E : ReductionOps) {
2668 const Expr *XExpr = nullptr;
2669 const Expr *EExpr = nullptr;
2670 const Expr *UpExpr = nullptr;
2671 BinaryOperatorKind BO = BO_Comma;
2672 if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2673 if (BO->getOpcode() == BO_Assign) {
2674 XExpr = BO->getLHS();
2675 UpExpr = BO->getRHS();
2678 // Try to emit update expression as a simple atomic.
2679 auto *RHSExpr = UpExpr;
2681 // Analyze RHS part of the whole expression.
2682 if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2683 RHSExpr->IgnoreParenImpCasts())) {
2684 // If this is a conditional operator, analyze its condition for
2685 // min/max reduction operator.
2686 RHSExpr = ACO->getCond();
2689 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2690 EExpr = BORHS->getRHS();
2691 BO = BORHS->getOpcode();
2695 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2696 LValue X = CGF.EmitLValue(XExpr);
2699 E = CGF.EmitAnyExpr(EExpr);
2700 CGF.EmitOMPAtomicSimpleUpdateExpr(
2701 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2702 [&CGF, UpExpr, VD](RValue XRValue) {
2703 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2704 PrivateScope.addPrivate(
2705 VD, [&CGF, VD, XRValue]() -> llvm::Value *{
2706 auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
2707 CGF.EmitStoreThroughLValue(
2709 CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
2712 (void)PrivateScope.Privatize();
2713 return CGF.EmitAnyExpr(UpExpr);
2716 // Emit as a critical region.
2717 emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2718 CGF.EmitIgnoredExpr(E);
2725 CGF.EmitBranch(DefaultBB);
2726 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2729 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2730 SourceLocation Loc) {
2731 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2733 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2734 // Ignore return result until untied tasks are supported.
2735 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2738 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2739 OpenMPDirectiveKind InnerKind,
2740 const RegionCodeGenTy &CodeGen) {
2741 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind);
2742 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2755 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
2756 RTCancelKind CancelKind = CancelNoreq;
2757 if (CancelRegion == OMPD_parallel)
2758 CancelKind = CancelParallel;
2759 else if (CancelRegion == OMPD_for)
2760 CancelKind = CancelLoop;
2761 else if (CancelRegion == OMPD_sections)
2762 CancelKind = CancelSections;
2764 assert(CancelRegion == OMPD_taskgroup);
2765 CancelKind = CancelTaskgroup;
2770 void CGOpenMPRuntime::emitCancellationPointCall(
2771 CodeGenFunction &CGF, SourceLocation Loc,
2772 OpenMPDirectiveKind CancelRegion) {
2773 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2774 // global_tid, kmp_int32 cncl_kind);
2775 if (auto *OMPRegionInfo =
2776 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2778 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2779 if (CancelDest.isValid()) {
2780 llvm::Value *Args[] = {
2781 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2782 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2783 // Ignore return result until untied tasks are supported.
2784 auto *Result = CGF.EmitRuntimeCall(
2785 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
2786 // if (__kmpc_cancellationpoint()) {
2787 // __kmpc_cancel_barrier();
2788 // exit from construct;
2790 auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2791 auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2792 auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2793 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2794 CGF.EmitBlock(ExitBB);
2795 // __kmpc_cancel_barrier();
2796 emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
2797 // exit from construct;
2798 CGF.EmitBranchThroughCleanup(CancelDest);
2799 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2804 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
2805 OpenMPDirectiveKind CancelRegion) {
2806 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2807 // kmp_int32 cncl_kind);
2808 if (auto *OMPRegionInfo =
2809 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2811 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2812 if (CancelDest.isValid()) {
2813 llvm::Value *Args[] = {
2814 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2815 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2816 // Ignore return result until untied tasks are supported.
2818 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
2819 // if (__kmpc_cancel()) {
2820 // __kmpc_cancel_barrier();
2821 // exit from construct;
2823 auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2824 auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2825 auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2826 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2827 CGF.EmitBlock(ExitBB);
2828 // __kmpc_cancel_barrier();
2829 emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
2830 // exit from construct;
2831 CGF.EmitBranchThroughCleanup(CancelDest);
2832 CGF.EmitBlock(ContBB, /*IsFinished=*/true);