]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Merge clang trunk r238337 from ^/vendor/clang/dist, resolve conflicts,
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGOpenMPRuntime.cpp
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26
27 using namespace clang;
28 using namespace CodeGen;
29
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44   };
45
46   CGOpenMPRegionInfo(const CapturedStmt &CS,
47                      const CGOpenMPRegionKind RegionKind,
48                      const RegionCodeGenTy &CodeGen)
49       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50         CodeGen(CodeGen) {}
51
52   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen)
54       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
55         CodeGen(CodeGen) {}
56
57   /// \brief Get a variable or parameter for storing global thread id
58   /// inside OpenMP construct.
59   virtual const VarDecl *getThreadIDVariable() const = 0;
60
61   /// \brief Emit the captured statement body.
62   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
63
64   /// \brief Get an LValue for the current ThreadID variable.
65   /// \return LValue for thread id variable. This LValue always has type int32*.
66   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
67
68   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
69
70   static bool classof(const CGCapturedStmtInfo *Info) {
71     return Info->getKind() == CR_OpenMP;
72   }
73
74 protected:
75   CGOpenMPRegionKind RegionKind;
76   const RegionCodeGenTy &CodeGen;
77 };
78
79 /// \brief API for captured statement code generation in OpenMP constructs.
80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
81 public:
82   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
83                              const RegionCodeGenTy &CodeGen)
84       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
85         ThreadIDVar(ThreadIDVar) {
86     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
87   }
88   /// \brief Get a variable or parameter for storing global thread id
89   /// inside OpenMP construct.
90   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
91
92   /// \brief Get the name of the capture helper.
93   StringRef getHelperName() const override { return ".omp_outlined."; }
94
95   static bool classof(const CGCapturedStmtInfo *Info) {
96     return CGOpenMPRegionInfo::classof(Info) &&
97            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
98                ParallelOutlinedRegion;
99   }
100
101 private:
102   /// \brief A variable or parameter storing global thread id for OpenMP
103   /// constructs.
104   const VarDecl *ThreadIDVar;
105 };
106
107 /// \brief API for captured statement code generation in OpenMP constructs.
108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
109 public:
110   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
111                                  const VarDecl *ThreadIDVar,
112                                  const RegionCodeGenTy &CodeGen)
113       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
114         ThreadIDVar(ThreadIDVar) {
115     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
116   }
117   /// \brief Get a variable or parameter for storing global thread id
118   /// inside OpenMP construct.
119   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
120
121   /// \brief Get an LValue for the current ThreadID variable.
122   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
123
124   /// \brief Get the name of the capture helper.
125   StringRef getHelperName() const override { return ".omp_outlined."; }
126
127   static bool classof(const CGCapturedStmtInfo *Info) {
128     return CGOpenMPRegionInfo::classof(Info) &&
129            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
130                TaskOutlinedRegion;
131   }
132
133 private:
134   /// \brief A variable or parameter storing global thread id for OpenMP
135   /// constructs.
136   const VarDecl *ThreadIDVar;
137 };
138
139 /// \brief API for inlined captured statement code generation in OpenMP
140 /// constructs.
141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
142 public:
143   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
144                             const RegionCodeGenTy &CodeGen)
145       : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
146         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
147   // \brief Retrieve the value of the context parameter.
148   llvm::Value *getContextValue() const override {
149     if (OuterRegionInfo)
150       return OuterRegionInfo->getContextValue();
151     llvm_unreachable("No context value for inlined OpenMP region");
152   }
153   virtual void setContextValue(llvm::Value *V) override {
154     if (OuterRegionInfo) {
155       OuterRegionInfo->setContextValue(V);
156       return;
157     }
158     llvm_unreachable("No context value for inlined OpenMP region");
159   }
160   /// \brief Lookup the captured field decl for a variable.
161   const FieldDecl *lookup(const VarDecl *VD) const override {
162     if (OuterRegionInfo)
163       return OuterRegionInfo->lookup(VD);
164     // If there is no outer outlined region,no need to lookup in a list of
165     // captured variables, we can use the original one.
166     return nullptr;
167   }
168   FieldDecl *getThisFieldDecl() const override {
169     if (OuterRegionInfo)
170       return OuterRegionInfo->getThisFieldDecl();
171     return nullptr;
172   }
173   /// \brief Get a variable or parameter for storing global thread id
174   /// inside OpenMP construct.
175   const VarDecl *getThreadIDVariable() const override {
176     if (OuterRegionInfo)
177       return OuterRegionInfo->getThreadIDVariable();
178     return nullptr;
179   }
180
181   /// \brief Get the name of the capture helper.
182   StringRef getHelperName() const override {
183     if (auto *OuterRegionInfo = getOldCSI())
184       return OuterRegionInfo->getHelperName();
185     llvm_unreachable("No helper name for inlined OpenMP construct");
186   }
187
188   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
189
190   static bool classof(const CGCapturedStmtInfo *Info) {
191     return CGOpenMPRegionInfo::classof(Info) &&
192            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
193   }
194
195 private:
196   /// \brief CodeGen info about outer OpenMP region.
197   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
198   CGOpenMPRegionInfo *OuterRegionInfo;
199 };
200
201 /// \brief RAII for emitting code of OpenMP constructs.
202 class InlinedOpenMPRegionRAII {
203   CodeGenFunction &CGF;
204
205 public:
206   /// \brief Constructs region for combined constructs.
207   /// \param CodeGen Code generation sequence for combined directives. Includes
208   /// a list of functions used for code generation of implicitly inlined
209   /// regions.
210   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
211       : CGF(CGF) {
212     // Start emission for the construct.
213     CGF.CapturedStmtInfo =
214         new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
215   }
216   ~InlinedOpenMPRegionRAII() {
217     // Restore original CapturedStmtInfo only if we're done with code emission.
218     auto *OldCSI =
219         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
220     delete CGF.CapturedStmtInfo;
221     CGF.CapturedStmtInfo = OldCSI;
222   }
223 };
224
225 } // namespace
226
227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
228   return CGF.MakeNaturalAlignAddrLValue(
229       CGF.Builder.CreateAlignedLoad(
230           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
231           CGF.PointerAlignInBytes),
232       getThreadIDVariable()
233           ->getType()
234           ->castAs<PointerType>()
235           ->getPointeeType());
236 }
237
238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
239   // 1.2.2 OpenMP Language Terminology
240   // Structured block - An executable statement with a single entry at the
241   // top and a single exit at the bottom.
242   // The point of exit cannot be a branch out of the structured block.
243   // longjmp() and throw() must not violate the entry/exit criteria.
244   CGF.EHStack.pushTerminate();
245   {
246     CodeGenFunction::RunCleanupsScope Scope(CGF);
247     CodeGen(CGF);
248   }
249   CGF.EHStack.popTerminate();
250 }
251
252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
253     CodeGenFunction &CGF) {
254   return CGF.MakeNaturalAlignAddrLValue(
255       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
256       getThreadIDVariable()->getType());
257 }
258
259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
260     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
261   IdentTy = llvm::StructType::create(
262       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
263       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
264       CGM.Int8PtrTy /* psource */, nullptr);
265   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
266   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
267                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
268   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
269   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
270 }
271
272 void CGOpenMPRuntime::clear() {
273   InternalVars.clear();
274 }
275
276 llvm::Value *
277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
278                                               const VarDecl *ThreadIDVar,
279                                               const RegionCodeGenTy &CodeGen) {
280   assert(ThreadIDVar->getType()->isPointerType() &&
281          "thread id variable must be of type kmp_int32 *");
282   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
283   CodeGenFunction CGF(CGM, true);
284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
285   CGF.CapturedStmtInfo = &CGInfo;
286   return CGF.GenerateCapturedStmtFunction(*CS);
287 }
288
289 llvm::Value *
290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
291                                           const VarDecl *ThreadIDVar,
292                                           const RegionCodeGenTy &CodeGen) {
293   assert(!ThreadIDVar->getType()->isPointerType() &&
294          "thread id variable must be of type kmp_int32 for tasks");
295   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
296   CodeGenFunction CGF(CGM, true);
297   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
298   CGF.CapturedStmtInfo = &CGInfo;
299   return CGF.GenerateCapturedStmtFunction(*CS);
300 }
301
302 llvm::Value *
303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
304   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
305   if (!Entry) {
306     if (!DefaultOpenMPPSource) {
307       // Initialize default location for psource field of ident_t structure of
308       // all ident_t objects. Format is ";file;function;line;column;;".
309       // Taken from
310       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
311       DefaultOpenMPPSource =
312           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
313       DefaultOpenMPPSource =
314           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
315     }
316     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
317         CGM.getModule(), IdentTy, /*isConstant*/ true,
318         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
319     DefaultOpenMPLocation->setUnnamedAddr(true);
320
321     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
322     llvm::Constant *Values[] = {Zero,
323                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
324                                 Zero, Zero, DefaultOpenMPPSource};
325     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
326     DefaultOpenMPLocation->setInitializer(Init);
327     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
328     return DefaultOpenMPLocation;
329   }
330   return Entry;
331 }
332
333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
334                                                  SourceLocation Loc,
335                                                  OpenMPLocationFlags Flags) {
336   // If no debug info is generated - return global default location.
337   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
338       Loc.isInvalid())
339     return getOrCreateDefaultLocation(Flags);
340
341   assert(CGF.CurFn && "No function in current CodeGenFunction.");
342
343   llvm::Value *LocValue = nullptr;
344   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
345   if (I != OpenMPLocThreadIDMap.end())
346     LocValue = I->second.DebugLoc;
347   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
348   // GetOpenMPThreadID was called before this routine.
349   if (LocValue == nullptr) {
350     // Generate "ident_t .kmpc_loc.addr;"
351     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
352     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
353     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
354     Elem.second.DebugLoc = AI;
355     LocValue = AI;
356
357     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
358     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
359     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
360                              llvm::ConstantExpr::getSizeOf(IdentTy),
361                              CGM.PointerAlignInBytes);
362   }
363
364   // char **psource = &.kmpc_loc_<flags>.addr.psource;
365   auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
366                                                          IdentField_PSource);
367
368   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
369   if (OMPDebugLoc == nullptr) {
370     SmallString<128> Buffer2;
371     llvm::raw_svector_ostream OS2(Buffer2);
372     // Build debug location
373     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
374     OS2 << ";" << PLoc.getFilename() << ";";
375     if (const FunctionDecl *FD =
376             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
377       OS2 << FD->getQualifiedNameAsString();
378     }
379     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
380     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
381     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
382   }
383   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
384   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
385
386   return LocValue;
387 }
388
389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
390                                           SourceLocation Loc) {
391   assert(CGF.CurFn && "No function in current CodeGenFunction.");
392
393   llvm::Value *ThreadID = nullptr;
394   // Check whether we've already cached a load of the thread id in this
395   // function.
396   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
397   if (I != OpenMPLocThreadIDMap.end()) {
398     ThreadID = I->second.ThreadID;
399     if (ThreadID != nullptr)
400       return ThreadID;
401   }
402   if (auto OMPRegionInfo =
403           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
404     if (OMPRegionInfo->getThreadIDVariable()) {
405       // Check if this an outlined function with thread id passed as argument.
406       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
407       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
408       // If value loaded in entry block, cache it and use it everywhere in
409       // function.
410       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
411         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
412         Elem.second.ThreadID = ThreadID;
413       }
414       return ThreadID;
415     }
416   }
417
418   // This is not an outlined function region - need to call __kmpc_int32
419   // kmpc_global_thread_num(ident_t *loc).
420   // Generate thread id value and cache this value for use across the
421   // function.
422   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
423   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
424   ThreadID =
425       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
426                           emitUpdateLocation(CGF, Loc));
427   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
428   Elem.second.ThreadID = ThreadID;
429   return ThreadID;
430 }
431
432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
433   assert(CGF.CurFn && "No function in current CodeGenFunction.");
434   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
435     OpenMPLocThreadIDMap.erase(CGF.CurFn);
436 }
437
438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
439   return llvm::PointerType::getUnqual(IdentTy);
440 }
441
442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
443   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
444 }
445
446 llvm::Constant *
447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
448   llvm::Constant *RTLFn = nullptr;
449   switch (Function) {
450   case OMPRTL__kmpc_fork_call: {
451     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
452     // microtask, ...);
453     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
454                                 getKmpc_MicroPointerTy()};
455     llvm::FunctionType *FnTy =
456         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
457     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
458     break;
459   }
460   case OMPRTL__kmpc_global_thread_num: {
461     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
462     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
463     llvm::FunctionType *FnTy =
464         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
465     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
466     break;
467   }
468   case OMPRTL__kmpc_threadprivate_cached: {
469     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
470     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
471     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
472                                 CGM.VoidPtrTy, CGM.SizeTy,
473                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
474     llvm::FunctionType *FnTy =
475         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
477     break;
478   }
479   case OMPRTL__kmpc_critical: {
480     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
481     // kmp_critical_name *crit);
482     llvm::Type *TypeParams[] = {
483         getIdentTyPointerTy(), CGM.Int32Ty,
484         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
485     llvm::FunctionType *FnTy =
486         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
487     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
488     break;
489   }
490   case OMPRTL__kmpc_threadprivate_register: {
491     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
492     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
493     // typedef void *(*kmpc_ctor)(void *);
494     auto KmpcCtorTy =
495         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
496                                 /*isVarArg*/ false)->getPointerTo();
497     // typedef void *(*kmpc_cctor)(void *, void *);
498     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
499     auto KmpcCopyCtorTy =
500         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
501                                 /*isVarArg*/ false)->getPointerTo();
502     // typedef void (*kmpc_dtor)(void *);
503     auto KmpcDtorTy =
504         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
505             ->getPointerTo();
506     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
507                               KmpcCopyCtorTy, KmpcDtorTy};
508     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
509                                         /*isVarArg*/ false);
510     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
511     break;
512   }
513   case OMPRTL__kmpc_end_critical: {
514     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
515     // kmp_critical_name *crit);
516     llvm::Type *TypeParams[] = {
517         getIdentTyPointerTy(), CGM.Int32Ty,
518         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
519     llvm::FunctionType *FnTy =
520         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
521     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
522     break;
523   }
524   case OMPRTL__kmpc_cancel_barrier: {
525     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
526     // global_tid);
527     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
528     llvm::FunctionType *FnTy =
529         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
530     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
531     break;
532   }
533   case OMPRTL__kmpc_for_static_fini: {
534     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
535     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
536     llvm::FunctionType *FnTy =
537         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
538     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
539     break;
540   }
541   case OMPRTL__kmpc_push_num_threads: {
542     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
543     // kmp_int32 num_threads)
544     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
545                                 CGM.Int32Ty};
546     llvm::FunctionType *FnTy =
547         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
549     break;
550   }
551   case OMPRTL__kmpc_serialized_parallel: {
552     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
553     // global_tid);
554     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
555     llvm::FunctionType *FnTy =
556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
558     break;
559   }
560   case OMPRTL__kmpc_end_serialized_parallel: {
561     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
562     // global_tid);
563     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
564     llvm::FunctionType *FnTy =
565         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
567     break;
568   }
569   case OMPRTL__kmpc_flush: {
570     // Build void __kmpc_flush(ident_t *loc);
571     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
572     llvm::FunctionType *FnTy =
573         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
575     break;
576   }
577   case OMPRTL__kmpc_master: {
578     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
579     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
580     llvm::FunctionType *FnTy =
581         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
582     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
583     break;
584   }
585   case OMPRTL__kmpc_end_master: {
586     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
587     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
588     llvm::FunctionType *FnTy =
589         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
590     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
591     break;
592   }
593   case OMPRTL__kmpc_omp_taskyield: {
594     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
595     // int end_part);
596     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
597     llvm::FunctionType *FnTy =
598         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
599     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
600     break;
601   }
602   case OMPRTL__kmpc_single: {
603     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
604     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
605     llvm::FunctionType *FnTy =
606         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
607     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
608     break;
609   }
610   case OMPRTL__kmpc_end_single: {
611     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
612     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
613     llvm::FunctionType *FnTy =
614         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
615     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
616     break;
617   }
618   case OMPRTL__kmpc_omp_task_alloc: {
619     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
620     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
621     // kmp_routine_entry_t *task_entry);
622     assert(KmpRoutineEntryPtrTy != nullptr &&
623            "Type kmp_routine_entry_t must be created.");
624     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
625                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
626     // Return void * and then cast to particular kmp_task_t type.
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
630     break;
631   }
632   case OMPRTL__kmpc_omp_task: {
633     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
634     // *new_task);
635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
636                                 CGM.VoidPtrTy};
637     llvm::FunctionType *FnTy =
638         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
639     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
640     break;
641   }
642   case OMPRTL__kmpc_copyprivate: {
643     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
644     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
645     // kmp_int32 didit);
646     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
647     auto *CpyFnTy =
648         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
650                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
651                                 CGM.Int32Ty};
652     llvm::FunctionType *FnTy =
653         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
654     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
655     break;
656   }
657   case OMPRTL__kmpc_reduce: {
658     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
659     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
660     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
661     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
662     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
663                                                /*isVarArg=*/false);
664     llvm::Type *TypeParams[] = {
665         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
666         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
667         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
668     llvm::FunctionType *FnTy =
669         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
670     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
671     break;
672   }
673   case OMPRTL__kmpc_reduce_nowait: {
674     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
675     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
676     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
677     // *lck);
678     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
679     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
680                                                /*isVarArg=*/false);
681     llvm::Type *TypeParams[] = {
682         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
683         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
684         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
685     llvm::FunctionType *FnTy =
686         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
687     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
688     break;
689   }
690   case OMPRTL__kmpc_end_reduce: {
691     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
692     // kmp_critical_name *lck);
693     llvm::Type *TypeParams[] = {
694         getIdentTyPointerTy(), CGM.Int32Ty,
695         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
696     llvm::FunctionType *FnTy =
697         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
699     break;
700   }
701   case OMPRTL__kmpc_end_reduce_nowait: {
702     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
703     // kmp_critical_name *lck);
704     llvm::Type *TypeParams[] = {
705         getIdentTyPointerTy(), CGM.Int32Ty,
706         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
707     llvm::FunctionType *FnTy =
708         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
709     RTLFn =
710         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
711     break;
712   }
713   case OMPRTL__kmpc_omp_task_begin_if0: {
714     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
715     // *new_task);
716     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
717                                 CGM.VoidPtrTy};
718     llvm::FunctionType *FnTy =
719         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
720     RTLFn =
721         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
722     break;
723   }
724   case OMPRTL__kmpc_omp_task_complete_if0: {
725     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
726     // *new_task);
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
728                                 CGM.VoidPtrTy};
729     llvm::FunctionType *FnTy =
730         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
731     RTLFn = CGM.CreateRuntimeFunction(FnTy,
732                                       /*Name=*/"__kmpc_omp_task_complete_if0");
733     break;
734   }
735   case OMPRTL__kmpc_ordered: {
736     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
737     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
738     llvm::FunctionType *FnTy =
739         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
740     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
741     break;
742   }
743   case OMPRTL__kmpc_end_ordered: {
744     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
745     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
746     llvm::FunctionType *FnTy =
747         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
748     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
749     break;
750   }
751   case OMPRTL__kmpc_omp_taskwait: {
752     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
753     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
754     llvm::FunctionType *FnTy =
755         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
756     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
757     break;
758   }
759   }
760   return RTLFn;
761 }
762
763 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
764                                                              bool IVSigned) {
765   assert((IVSize == 32 || IVSize == 64) &&
766          "IV size is not compatible with the omp runtime");
767   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
768                                        : "__kmpc_for_static_init_4u")
769                            : (IVSigned ? "__kmpc_for_static_init_8"
770                                        : "__kmpc_for_static_init_8u");
771   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
772   auto PtrTy = llvm::PointerType::getUnqual(ITy);
773   llvm::Type *TypeParams[] = {
774     getIdentTyPointerTy(),                     // loc
775     CGM.Int32Ty,                               // tid
776     CGM.Int32Ty,                               // schedtype
777     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
778     PtrTy,                                     // p_lower
779     PtrTy,                                     // p_upper
780     PtrTy,                                     // p_stride
781     ITy,                                       // incr
782     ITy                                        // chunk
783   };
784   llvm::FunctionType *FnTy =
785       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
786   return CGM.CreateRuntimeFunction(FnTy, Name);
787 }
788
789 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
790                                                             bool IVSigned) {
791   assert((IVSize == 32 || IVSize == 64) &&
792          "IV size is not compatible with the omp runtime");
793   auto Name =
794       IVSize == 32
795           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
796           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
797   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
798   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
799                                CGM.Int32Ty,           // tid
800                                CGM.Int32Ty,           // schedtype
801                                ITy,                   // lower
802                                ITy,                   // upper
803                                ITy,                   // stride
804                                ITy                    // chunk
805   };
806   llvm::FunctionType *FnTy =
807       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
808   return CGM.CreateRuntimeFunction(FnTy, Name);
809 }
810
811 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
812                                                             bool IVSigned) {
813   assert((IVSize == 32 || IVSize == 64) &&
814          "IV size is not compatible with the omp runtime");
815   auto Name =
816       IVSize == 32
817           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
818           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
819   llvm::Type *TypeParams[] = {
820       getIdentTyPointerTy(), // loc
821       CGM.Int32Ty,           // tid
822   };
823   llvm::FunctionType *FnTy =
824       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
825   return CGM.CreateRuntimeFunction(FnTy, Name);
826 }
827
828 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
829                                                             bool IVSigned) {
830   assert((IVSize == 32 || IVSize == 64) &&
831          "IV size is not compatible with the omp runtime");
832   auto Name =
833       IVSize == 32
834           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
835           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
836   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
837   auto PtrTy = llvm::PointerType::getUnqual(ITy);
838   llvm::Type *TypeParams[] = {
839     getIdentTyPointerTy(),                     // loc
840     CGM.Int32Ty,                               // tid
841     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
842     PtrTy,                                     // p_lower
843     PtrTy,                                     // p_upper
844     PtrTy                                      // p_stride
845   };
846   llvm::FunctionType *FnTy =
847       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
848   return CGM.CreateRuntimeFunction(FnTy, Name);
849 }
850
851 llvm::Constant *
852 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
853   // Lookup the entry, lazily creating it if necessary.
854   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
855                                      Twine(CGM.getMangledName(VD)) + ".cache.");
856 }
857
858 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
859                                                      const VarDecl *VD,
860                                                      llvm::Value *VDAddr,
861                                                      SourceLocation Loc) {
862   auto VarTy = VDAddr->getType()->getPointerElementType();
863   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
864                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
865                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
866                          getOrCreateThreadPrivateCache(VD)};
867   return CGF.EmitRuntimeCall(
868       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
869 }
870
871 void CGOpenMPRuntime::emitThreadPrivateVarInit(
872     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
873     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
874   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
875   // library.
876   auto OMPLoc = emitUpdateLocation(CGF, Loc);
877   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
878                       OMPLoc);
879   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
880   // to register constructor/destructor for variable.
881   llvm::Value *Args[] = {OMPLoc,
882                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
883                          Ctor, CopyCtor, Dtor};
884   CGF.EmitRuntimeCall(
885       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
886 }
887
888 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
889     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
890     bool PerformInit, CodeGenFunction *CGF) {
891   VD = VD->getDefinition(CGM.getContext());
892   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
893     ThreadPrivateWithDefinition.insert(VD);
894     QualType ASTTy = VD->getType();
895
896     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
897     auto Init = VD->getAnyInitializer();
898     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
899       // Generate function that re-emits the declaration's initializer into the
900       // threadprivate copy of the variable VD
901       CodeGenFunction CtorCGF(CGM);
902       FunctionArgList Args;
903       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
904                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
905       Args.push_back(&Dst);
906
907       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
908           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
909           /*isVariadic=*/false);
910       auto FTy = CGM.getTypes().GetFunctionType(FI);
911       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
912           FTy, ".__kmpc_global_ctor_.", Loc);
913       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
914                             Args, SourceLocation());
915       auto ArgVal = CtorCGF.EmitLoadOfScalar(
916           CtorCGF.GetAddrOfLocalVar(&Dst),
917           /*Volatile=*/false, CGM.PointerAlignInBytes,
918           CGM.getContext().VoidPtrTy, Dst.getLocation());
919       auto Arg = CtorCGF.Builder.CreatePointerCast(
920           ArgVal,
921           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
922       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
923                                /*IsInitializer=*/true);
924       ArgVal = CtorCGF.EmitLoadOfScalar(
925           CtorCGF.GetAddrOfLocalVar(&Dst),
926           /*Volatile=*/false, CGM.PointerAlignInBytes,
927           CGM.getContext().VoidPtrTy, Dst.getLocation());
928       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
929       CtorCGF.FinishFunction();
930       Ctor = Fn;
931     }
932     if (VD->getType().isDestructedType() != QualType::DK_none) {
933       // Generate function that emits destructor call for the threadprivate copy
934       // of the variable VD
935       CodeGenFunction DtorCGF(CGM);
936       FunctionArgList Args;
937       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
938                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
939       Args.push_back(&Dst);
940
941       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
942           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
943           /*isVariadic=*/false);
944       auto FTy = CGM.getTypes().GetFunctionType(FI);
945       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
946           FTy, ".__kmpc_global_dtor_.", Loc);
947       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
948                             SourceLocation());
949       auto ArgVal = DtorCGF.EmitLoadOfScalar(
950           DtorCGF.GetAddrOfLocalVar(&Dst),
951           /*Volatile=*/false, CGM.PointerAlignInBytes,
952           CGM.getContext().VoidPtrTy, Dst.getLocation());
953       DtorCGF.emitDestroy(ArgVal, ASTTy,
954                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
955                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
956       DtorCGF.FinishFunction();
957       Dtor = Fn;
958     }
959     // Do not emit init function if it is not required.
960     if (!Ctor && !Dtor)
961       return nullptr;
962
963     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
964     auto CopyCtorTy =
965         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
966                                 /*isVarArg=*/false)->getPointerTo();
967     // Copying constructor for the threadprivate variable.
968     // Must be NULL - reserved by runtime, but currently it requires that this
969     // parameter is always NULL. Otherwise it fires assertion.
970     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
971     if (Ctor == nullptr) {
972       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
973                                             /*isVarArg=*/false)->getPointerTo();
974       Ctor = llvm::Constant::getNullValue(CtorTy);
975     }
976     if (Dtor == nullptr) {
977       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
978                                             /*isVarArg=*/false)->getPointerTo();
979       Dtor = llvm::Constant::getNullValue(DtorTy);
980     }
981     if (!CGF) {
982       auto InitFunctionTy =
983           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
984       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
985           InitFunctionTy, ".__omp_threadprivate_init_.");
986       CodeGenFunction InitCGF(CGM);
987       FunctionArgList ArgList;
988       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
989                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
990                             Loc);
991       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
992       InitCGF.FinishFunction();
993       return InitFunction;
994     }
995     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
996   }
997   return nullptr;
998 }
999
1000 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1001 /// function. Here is the logic:
1002 /// if (Cond) {
1003 ///   ThenGen();
1004 /// } else {
1005 ///   ElseGen();
1006 /// }
1007 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1008                             const RegionCodeGenTy &ThenGen,
1009                             const RegionCodeGenTy &ElseGen) {
1010   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1011
1012   // If the condition constant folds and can be elided, try to avoid emitting
1013   // the condition and the dead arm of the if/else.
1014   bool CondConstant;
1015   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1016     CodeGenFunction::RunCleanupsScope Scope(CGF);
1017     if (CondConstant) {
1018       ThenGen(CGF);
1019     } else {
1020       ElseGen(CGF);
1021     }
1022     return;
1023   }
1024
1025   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1026   // emit the conditional branch.
1027   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1028   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1029   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1030   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1031
1032   // Emit the 'then' code.
1033   CGF.EmitBlock(ThenBlock);
1034   {
1035     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1036     ThenGen(CGF);
1037   }
1038   CGF.EmitBranch(ContBlock);
1039   // Emit the 'else' code if present.
1040   {
1041     // There is no need to emit line number for unconditional branch.
1042     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1043     CGF.EmitBlock(ElseBlock);
1044   }
1045   {
1046     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1047     ElseGen(CGF);
1048   }
1049   {
1050     // There is no need to emit line number for unconditional branch.
1051     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1052     CGF.EmitBranch(ContBlock);
1053   }
1054   // Emit the continuation block for code after the if.
1055   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1056 }
1057
1058 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1059                                        llvm::Value *OutlinedFn,
1060                                        llvm::Value *CapturedStruct,
1061                                        const Expr *IfCond) {
1062   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1063   auto &&ThenGen =
1064       [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
1065         // Build call __kmpc_fork_call(loc, 1, microtask,
1066         // captured_struct/*context*/)
1067         llvm::Value *Args[] = {
1068             RTLoc,
1069             CGF.Builder.getInt32(
1070                 1), // Number of arguments after 'microtask' argument
1071             // (there is only one additional argument - 'context')
1072             CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
1073             CGF.EmitCastToVoidPtr(CapturedStruct)};
1074         auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1075         CGF.EmitRuntimeCall(RTLFn, Args);
1076       };
1077   auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
1078       CodeGenFunction &CGF) {
1079     auto ThreadID = getThreadID(CGF, Loc);
1080     // Build calls:
1081     // __kmpc_serialized_parallel(&Loc, GTid);
1082     llvm::Value *Args[] = {RTLoc, ThreadID};
1083     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1084                         Args);
1085
1086     // OutlinedFn(&GTid, &zero, CapturedStruct);
1087     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1088     auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
1089                                                           /*Signed*/ true);
1090     auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
1091     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1092     llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
1093     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1094
1095     // __kmpc_end_serialized_parallel(&Loc, GTid);
1096     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1097     CGF.EmitRuntimeCall(
1098         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1099   };
1100   if (IfCond) {
1101     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1102   } else {
1103     CodeGenFunction::RunCleanupsScope Scope(CGF);
1104     ThenGen(CGF);
1105   }
1106 }
1107
1108 // If we're inside an (outlined) parallel region, use the region info's
1109 // thread-ID variable (it is passed in a first argument of the outlined function
1110 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1111 // regular serial code region, get thread ID by calling kmp_int32
1112 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1113 // return the address of that temp.
1114 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1115                                                   SourceLocation Loc) {
1116   if (auto OMPRegionInfo =
1117           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1118     if (OMPRegionInfo->getThreadIDVariable())
1119       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1120
1121   auto ThreadID = getThreadID(CGF, Loc);
1122   auto Int32Ty =
1123       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1124   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1125   CGF.EmitStoreOfScalar(ThreadID,
1126                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
1127
1128   return ThreadIDTemp;
1129 }
1130
1131 llvm::Constant *
1132 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1133                                              const llvm::Twine &Name) {
1134   SmallString<256> Buffer;
1135   llvm::raw_svector_ostream Out(Buffer);
1136   Out << Name;
1137   auto RuntimeName = Out.str();
1138   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1139   if (Elem.second) {
1140     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1141            "OMP internal variable has different type than requested");
1142     return &*Elem.second;
1143   }
1144
1145   return Elem.second = new llvm::GlobalVariable(
1146              CGM.getModule(), Ty, /*IsConstant*/ false,
1147              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1148              Elem.first());
1149 }
1150
1151 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1152   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1153   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1154 }
1155
1156 namespace {
1157 template <size_t N> class CallEndCleanup : public EHScopeStack::Cleanup {
1158   llvm::Value *Callee;
1159   llvm::Value *Args[N];
1160
1161 public:
1162   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1163       : Callee(Callee) {
1164     assert(CleanupArgs.size() == N);
1165     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1166   }
1167   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1168     CGF.EmitRuntimeCall(Callee, Args);
1169   }
1170 };
1171 } // namespace
1172
1173 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1174                                          StringRef CriticalName,
1175                                          const RegionCodeGenTy &CriticalOpGen,
1176                                          SourceLocation Loc) {
1177   // __kmpc_critical(ident_t *, gtid, Lock);
1178   // CriticalOpGen();
1179   // __kmpc_end_critical(ident_t *, gtid, Lock);
1180   // Prepare arguments and build a call to __kmpc_critical
1181   {
1182     CodeGenFunction::RunCleanupsScope Scope(CGF);
1183     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1184                            getCriticalRegionLock(CriticalName)};
1185     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1186     // Build a call to __kmpc_end_critical
1187     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1188         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1189         llvm::makeArrayRef(Args));
1190     emitInlinedDirective(CGF, CriticalOpGen);
1191   }
1192 }
1193
1194 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1195                        const RegionCodeGenTy &BodyOpGen) {
1196   llvm::Value *CallBool = CGF.EmitScalarConversion(
1197       IfCond,
1198       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1199       CGF.getContext().BoolTy);
1200
1201   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1202   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1203   // Generate the branch (If-stmt)
1204   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1205   CGF.EmitBlock(ThenBlock);
1206   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
1207   // Emit the rest of bblocks/branches
1208   CGF.EmitBranch(ContBlock);
1209   CGF.EmitBlock(ContBlock, true);
1210 }
1211
1212 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1213                                        const RegionCodeGenTy &MasterOpGen,
1214                                        SourceLocation Loc) {
1215   // if(__kmpc_master(ident_t *, gtid)) {
1216   //   MasterOpGen();
1217   //   __kmpc_end_master(ident_t *, gtid);
1218   // }
1219   // Prepare arguments and build a call to __kmpc_master
1220   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1221   auto *IsMaster =
1222       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1223   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1224       MasterCallEndCleanup;
1225   emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
1226     CodeGenFunction::RunCleanupsScope Scope(CGF);
1227     CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1228         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1229         llvm::makeArrayRef(Args));
1230     MasterOpGen(CGF);
1231   });
1232 }
1233
1234 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1235                                         SourceLocation Loc) {
1236   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1237   llvm::Value *Args[] = {
1238       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1239       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1240   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1241 }
1242
1243 static llvm::Value *emitCopyprivateCopyFunction(
1244     CodeGenModule &CGM, llvm::Type *ArgsType,
1245     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1246     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1247   auto &C = CGM.getContext();
1248   // void copy_func(void *LHSArg, void *RHSArg);
1249   FunctionArgList Args;
1250   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1251                            C.VoidPtrTy);
1252   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1253                            C.VoidPtrTy);
1254   Args.push_back(&LHSArg);
1255   Args.push_back(&RHSArg);
1256   FunctionType::ExtInfo EI;
1257   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1258       C.VoidTy, Args, EI, /*isVariadic=*/false);
1259   auto *Fn = llvm::Function::Create(
1260       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1261       ".omp.copyprivate.copy_func", &CGM.getModule());
1262   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1263   CodeGenFunction CGF(CGM);
1264   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1265   // Dest = (void*[n])(LHSArg);
1266   // Src = (void*[n])(RHSArg);
1267   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1268       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1269                                     CGF.PointerAlignInBytes),
1270       ArgsType);
1271   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1272       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1273                                     CGF.PointerAlignInBytes),
1274       ArgsType);
1275   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1276   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1277   // ...
1278   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1279   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1280     auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1281         CGF.Builder.CreateAlignedLoad(
1282             CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1283             CGM.PointerAlignInBytes),
1284         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1285     auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1286         CGF.Builder.CreateAlignedLoad(
1287             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1288             CGM.PointerAlignInBytes),
1289         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1290     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1291     QualType Type = VD->getType();
1292     CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
1293                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1294                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1295                     AssignmentOps[I]);
1296   }
1297   CGF.FinishFunction();
1298   return Fn;
1299 }
1300
1301 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1302                                        const RegionCodeGenTy &SingleOpGen,
1303                                        SourceLocation Loc,
1304                                        ArrayRef<const Expr *> CopyprivateVars,
1305                                        ArrayRef<const Expr *> SrcExprs,
1306                                        ArrayRef<const Expr *> DstExprs,
1307                                        ArrayRef<const Expr *> AssignmentOps) {
1308   assert(CopyprivateVars.size() == SrcExprs.size() &&
1309          CopyprivateVars.size() == DstExprs.size() &&
1310          CopyprivateVars.size() == AssignmentOps.size());
1311   auto &C = CGM.getContext();
1312   // int32 did_it = 0;
1313   // if(__kmpc_single(ident_t *, gtid)) {
1314   //   SingleOpGen();
1315   //   __kmpc_end_single(ident_t *, gtid);
1316   //   did_it = 1;
1317   // }
1318   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1319   // <copy_func>, did_it);
1320
1321   llvm::AllocaInst *DidIt = nullptr;
1322   if (!CopyprivateVars.empty()) {
1323     // int32 did_it = 0;
1324     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1325     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1326     CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
1327                                    DidIt->getAlignment());
1328   }
1329   // Prepare arguments and build a call to __kmpc_single
1330   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1331   auto *IsSingle =
1332       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1333   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1334       SingleCallEndCleanup;
1335   emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
1336     CodeGenFunction::RunCleanupsScope Scope(CGF);
1337     CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1338         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1339         llvm::makeArrayRef(Args));
1340     SingleOpGen(CGF);
1341     if (DidIt) {
1342       // did_it = 1;
1343       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1344                                      DidIt->getAlignment());
1345     }
1346   });
1347   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1348   // <copy_func>, did_it);
1349   if (DidIt) {
1350     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1351     auto CopyprivateArrayTy =
1352         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1353                                /*IndexTypeQuals=*/0);
1354     // Create a list of all private variables for copyprivate.
1355     auto *CopyprivateList =
1356         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1357     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1358       auto *Elem = CGF.Builder.CreateStructGEP(
1359           CopyprivateList->getAllocatedType(), CopyprivateList, I);
1360       CGF.Builder.CreateAlignedStore(
1361           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1362               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1363           Elem, CGM.PointerAlignInBytes);
1364     }
1365     // Build function that copies private values from single region to all other
1366     // threads in the corresponding parallel region.
1367     auto *CpyFn = emitCopyprivateCopyFunction(
1368         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1369         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1370     auto *BufSize = llvm::ConstantInt::get(
1371         CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1372     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1373                                                                CGF.VoidPtrTy);
1374     auto *DidItVal =
1375         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1376     llvm::Value *Args[] = {
1377         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1378         getThreadID(CGF, Loc),        // i32 <gtid>
1379         BufSize,                      // size_t <buf_size>
1380         CL,                           // void *<copyprivate list>
1381         CpyFn,                        // void (*) (void *, void *) <copy_func>
1382         DidItVal                      // i32 did_it
1383     };
1384     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1385   }
1386 }
1387
1388 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1389                                         const RegionCodeGenTy &OrderedOpGen,
1390                                         SourceLocation Loc) {
1391   // __kmpc_ordered(ident_t *, gtid);
1392   // OrderedOpGen();
1393   // __kmpc_end_ordered(ident_t *, gtid);
1394   // Prepare arguments and build a call to __kmpc_ordered
1395   {
1396     CodeGenFunction::RunCleanupsScope Scope(CGF);
1397     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1398     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1399     // Build a call to __kmpc_end_ordered
1400     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1401         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1402         llvm::makeArrayRef(Args));
1403     emitInlinedDirective(CGF, OrderedOpGen);
1404   }
1405 }
1406
1407 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1408                                       OpenMPDirectiveKind Kind) {
1409   // Build call __kmpc_cancel_barrier(loc, thread_id);
1410   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1411   if (Kind == OMPD_for) {
1412     Flags =
1413         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1414   } else if (Kind == OMPD_sections) {
1415     Flags = static_cast<OpenMPLocationFlags>(Flags |
1416                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1417   } else if (Kind == OMPD_single) {
1418     Flags =
1419         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1420   } else if (Kind == OMPD_barrier) {
1421     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1422   } else {
1423     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1424   }
1425   // Build call __kmpc_cancel_barrier(loc, thread_id);
1426   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
1427   // one provides the same functionality and adds initial support for
1428   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
1429   // is provided default by the runtime library so it safe to make such
1430   // replacement.
1431   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1432                          getThreadID(CGF, Loc)};
1433   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1434 }
1435
1436 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1437 /// the enum sched_type in kmp.h).
1438 enum OpenMPSchedType {
1439   /// \brief Lower bound for default (unordered) versions.
1440   OMP_sch_lower = 32,
1441   OMP_sch_static_chunked = 33,
1442   OMP_sch_static = 34,
1443   OMP_sch_dynamic_chunked = 35,
1444   OMP_sch_guided_chunked = 36,
1445   OMP_sch_runtime = 37,
1446   OMP_sch_auto = 38,
1447   /// \brief Lower bound for 'ordered' versions.
1448   OMP_ord_lower = 64,
1449   OMP_ord_static_chunked = 65,
1450   OMP_ord_static = 66,
1451   OMP_ord_dynamic_chunked = 67,
1452   OMP_ord_guided_chunked = 68,
1453   OMP_ord_runtime = 69,
1454   OMP_ord_auto = 70,
1455   OMP_sch_default = OMP_sch_static,
1456 };
1457
1458 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1459 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1460                                           bool Chunked, bool Ordered) {
1461   switch (ScheduleKind) {
1462   case OMPC_SCHEDULE_static:
1463     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1464                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1465   case OMPC_SCHEDULE_dynamic:
1466     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1467   case OMPC_SCHEDULE_guided:
1468     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1469   case OMPC_SCHEDULE_runtime:
1470     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1471   case OMPC_SCHEDULE_auto:
1472     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1473   case OMPC_SCHEDULE_unknown:
1474     assert(!Chunked && "chunk was specified but schedule kind not known");
1475     return Ordered ? OMP_ord_static : OMP_sch_static;
1476   }
1477   llvm_unreachable("Unexpected runtime schedule");
1478 }
1479
1480 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1481                                          bool Chunked) const {
1482   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1483   return Schedule == OMP_sch_static;
1484 }
1485
1486 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1487   auto Schedule =
1488       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1489   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1490   return Schedule != OMP_sch_static;
1491 }
1492
1493 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1494                                   OpenMPScheduleClauseKind ScheduleKind,
1495                                   unsigned IVSize, bool IVSigned, bool Ordered,
1496                                   llvm::Value *IL, llvm::Value *LB,
1497                                   llvm::Value *UB, llvm::Value *ST,
1498                                   llvm::Value *Chunk) {
1499   OpenMPSchedType Schedule =
1500       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1501   if (Ordered ||
1502       (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1503        Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
1504     // Call __kmpc_dispatch_init(
1505     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1506     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1507     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1508
1509     // If the Chunk was not specified in the clause - use default value 1.
1510     if (Chunk == nullptr)
1511       Chunk = CGF.Builder.getIntN(IVSize, 1);
1512     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1513                             getThreadID(CGF, Loc),
1514                             CGF.Builder.getInt32(Schedule), // Schedule type
1515                             CGF.Builder.getIntN(IVSize, 0), // Lower
1516                             UB,                             // Upper
1517                             CGF.Builder.getIntN(IVSize, 1), // Stride
1518                             Chunk                           // Chunk
1519     };
1520     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1521   } else {
1522     // Call __kmpc_for_static_init(
1523     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1524     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1525     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1526     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1527     if (Chunk == nullptr) {
1528       assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1529              "expected static non-chunked schedule");
1530       // If the Chunk was not specified in the clause - use default value 1.
1531       Chunk = CGF.Builder.getIntN(IVSize, 1);
1532     } else
1533       assert((Schedule == OMP_sch_static_chunked ||
1534               Schedule == OMP_ord_static_chunked) &&
1535              "expected static chunked schedule");
1536     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1537                             getThreadID(CGF, Loc),
1538                             CGF.Builder.getInt32(Schedule), // Schedule type
1539                             IL,                             // &isLastIter
1540                             LB,                             // &LB
1541                             UB,                             // &UB
1542                             ST,                             // &Stride
1543                             CGF.Builder.getIntN(IVSize, 1), // Incr
1544                             Chunk                           // Chunk
1545     };
1546     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1547   }
1548 }
1549
1550 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1551                                           SourceLocation Loc) {
1552   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1553   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1554                          getThreadID(CGF, Loc)};
1555   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1556                       Args);
1557 }
1558
1559 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1560                                                  SourceLocation Loc,
1561                                                  unsigned IVSize,
1562                                                  bool IVSigned) {
1563   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1564   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1565                          getThreadID(CGF, Loc)};
1566   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1567 }
1568
1569 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1570                                           SourceLocation Loc, unsigned IVSize,
1571                                           bool IVSigned, llvm::Value *IL,
1572                                           llvm::Value *LB, llvm::Value *UB,
1573                                           llvm::Value *ST) {
1574   // Call __kmpc_dispatch_next(
1575   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1576   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1577   //          kmp_int[32|64] *p_stride);
1578   llvm::Value *Args[] = {
1579       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1580       IL, // &isLastIter
1581       LB, // &Lower
1582       UB, // &Upper
1583       ST  // &Stride
1584   };
1585   llvm::Value *Call =
1586       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1587   return CGF.EmitScalarConversion(
1588       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1589       CGF.getContext().BoolTy);
1590 }
1591
1592 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1593                                            llvm::Value *NumThreads,
1594                                            SourceLocation Loc) {
1595   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1596   llvm::Value *Args[] = {
1597       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1598       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1599   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1600                       Args);
1601 }
1602
1603 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1604                                 SourceLocation Loc) {
1605   // Build call void __kmpc_flush(ident_t *loc)
1606   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1607                       emitUpdateLocation(CGF, Loc));
1608 }
1609
1610 namespace {
1611 /// \brief Indexes of fields for type kmp_task_t.
1612 enum KmpTaskTFields {
1613   /// \brief List of shared variables.
1614   KmpTaskTShareds,
1615   /// \brief Task routine.
1616   KmpTaskTRoutine,
1617   /// \brief Partition id for the untied tasks.
1618   KmpTaskTPartId,
1619   /// \brief Function with call of destructors for private variables.
1620   KmpTaskTDestructors,
1621 };
1622 } // namespace
1623
1624 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1625   if (!KmpRoutineEntryPtrTy) {
1626     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1627     auto &C = CGM.getContext();
1628     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1629     FunctionProtoType::ExtProtoInfo EPI;
1630     KmpRoutineEntryPtrQTy = C.getPointerType(
1631         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1632     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1633   }
1634 }
1635
1636 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1637                                  QualType FieldTy) {
1638   auto *Field = FieldDecl::Create(
1639       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1640       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1641       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1642   Field->setAccess(AS_public);
1643   DC->addDecl(Field);
1644 }
1645
1646 namespace {
1647 struct PrivateHelpersTy {
1648   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1649                    const VarDecl *PrivateElemInit)
1650       : Original(Original), PrivateCopy(PrivateCopy),
1651         PrivateElemInit(PrivateElemInit) {}
1652   const VarDecl *Original;
1653   const VarDecl *PrivateCopy;
1654   const VarDecl *PrivateElemInit;
1655 };
1656 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1657 } // namespace
1658
1659 static RecordDecl *
1660 createPrivatesRecordDecl(CodeGenModule &CGM,
1661                          const ArrayRef<PrivateDataTy> Privates) {
1662   if (!Privates.empty()) {
1663     auto &C = CGM.getContext();
1664     // Build struct .kmp_privates_t. {
1665     //         /*  private vars  */
1666     //       };
1667     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1668     RD->startDefinition();
1669     for (auto &&Pair : Privates) {
1670       auto Type = Pair.second.Original->getType();
1671       Type = Type.getNonReferenceType();
1672       addFieldToRecordDecl(C, RD, Type);
1673     }
1674     RD->completeDefinition();
1675     return RD;
1676   }
1677   return nullptr;
1678 }
1679
1680 static RecordDecl *
1681 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1682                          QualType KmpRoutineEntryPointerQTy) {
1683   auto &C = CGM.getContext();
1684   // Build struct kmp_task_t {
1685   //         void *              shareds;
1686   //         kmp_routine_entry_t routine;
1687   //         kmp_int32           part_id;
1688   //         kmp_routine_entry_t destructors;
1689   //       };
1690   auto *RD = C.buildImplicitRecord("kmp_task_t");
1691   RD->startDefinition();
1692   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1693   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1694   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1695   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1696   RD->completeDefinition();
1697   return RD;
1698 }
1699
1700 static RecordDecl *
1701 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
1702                                      const ArrayRef<PrivateDataTy> Privates) {
1703   auto &C = CGM.getContext();
1704   // Build struct kmp_task_t_with_privates {
1705   //         kmp_task_t task_data;
1706   //         .kmp_privates_t. privates;
1707   //       };
1708   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
1709   RD->startDefinition();
1710   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
1711   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1712     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1713   }
1714   RD->completeDefinition();
1715   return RD;
1716 }
1717
1718 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1719 /// argument.
1720 /// \code
1721 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1722 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
1723 ///   tt->shareds);
1724 ///   return 0;
1725 /// }
1726 /// \endcode
1727 static llvm::Value *
1728 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1729                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
1730                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
1731                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
1732                       llvm::Value *TaskPrivatesMap) {
1733   auto &C = CGM.getContext();
1734   FunctionArgList Args;
1735   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1736   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1737                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1738   Args.push_back(&GtidArg);
1739   Args.push_back(&TaskTypeArg);
1740   FunctionType::ExtInfo Info;
1741   auto &TaskEntryFnInfo =
1742       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1743                                                     /*isVariadic=*/false);
1744   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1745   auto *TaskEntry =
1746       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1747                              ".omp_task_entry.", &CGM.getModule());
1748   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1749   CodeGenFunction CGF(CGM);
1750   CGF.disableDebugInfo();
1751   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1752
1753   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
1754   // tt->task_data.shareds);
1755   auto *GtidParam = CGF.EmitLoadOfScalar(
1756       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1757       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1758   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1759       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1760   LValue TDBase =
1761       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1762   auto *KmpTaskTWithPrivatesQTyRD =
1763       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1764   LValue Base =
1765       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
1766   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
1767   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
1768   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
1769   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
1770
1771   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
1772   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
1773   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1774       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
1775       CGF.ConvertTypeForMem(SharedsPtrTy));
1776
1777   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
1778   llvm::Value *PrivatesParam;
1779   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
1780     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
1781     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1782         PrivatesLVal.getAddress(), CGF.VoidPtrTy);
1783   } else {
1784     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
1785   }
1786
1787   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
1788                              TaskPrivatesMap, SharedsParam};
1789   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1790   CGF.EmitStoreThroughLValue(
1791       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1792       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1793   CGF.FinishFunction();
1794   return TaskEntry;
1795 }
1796
1797 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
1798                                             SourceLocation Loc,
1799                                             QualType KmpInt32Ty,
1800                                             QualType KmpTaskTWithPrivatesPtrQTy,
1801                                             QualType KmpTaskTWithPrivatesQTy) {
1802   auto &C = CGM.getContext();
1803   FunctionArgList Args;
1804   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1805   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1806                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1807   Args.push_back(&GtidArg);
1808   Args.push_back(&TaskTypeArg);
1809   FunctionType::ExtInfo Info;
1810   auto &DestructorFnInfo =
1811       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1812                                                     /*isVariadic=*/false);
1813   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
1814   auto *DestructorFn =
1815       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
1816                              ".omp_task_destructor.", &CGM.getModule());
1817   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
1818   CodeGenFunction CGF(CGM);
1819   CGF.disableDebugInfo();
1820   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
1821                     Args);
1822
1823   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1824       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1825   LValue Base =
1826       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1827   auto *KmpTaskTWithPrivatesQTyRD =
1828       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1829   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
1830   Base = CGF.EmitLValueForField(Base, *FI);
1831   for (auto *Field :
1832        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
1833     if (auto DtorKind = Field->getType().isDestructedType()) {
1834       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
1835       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
1836     }
1837   }
1838   CGF.FinishFunction();
1839   return DestructorFn;
1840 }
1841
1842 /// \brief Emit a privates mapping function for correct handling of private and
1843 /// firstprivate variables.
1844 /// \code
1845 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
1846 /// **noalias priv1,...,  <tyn> **noalias privn) {
1847 ///   *priv1 = &.privates.priv1;
1848 ///   ...;
1849 ///   *privn = &.privates.privn;
1850 /// }
1851 /// \endcode
1852 static llvm::Value *
1853 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
1854                                const ArrayRef<const Expr *> PrivateVars,
1855                                const ArrayRef<const Expr *> FirstprivateVars,
1856                                QualType PrivatesQTy,
1857                                const ArrayRef<PrivateDataTy> Privates) {
1858   auto &C = CGM.getContext();
1859   FunctionArgList Args;
1860   ImplicitParamDecl TaskPrivatesArg(
1861       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
1862       C.getPointerType(PrivatesQTy).withConst().withRestrict());
1863   Args.push_back(&TaskPrivatesArg);
1864   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
1865   unsigned Counter = 1;
1866   for (auto *E: PrivateVars) {
1867     Args.push_back(ImplicitParamDecl::Create(
1868         C, /*DC=*/nullptr, Loc,
1869         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
1870                             .withConst()
1871                             .withRestrict()));
1872     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1873     PrivateVarsPos[VD] = Counter;
1874     ++Counter;
1875   }
1876   for (auto *E : FirstprivateVars) {
1877     Args.push_back(ImplicitParamDecl::Create(
1878         C, /*DC=*/nullptr, Loc,
1879         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
1880                             .withConst()
1881                             .withRestrict()));
1882     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1883     PrivateVarsPos[VD] = Counter;
1884     ++Counter;
1885   }
1886   FunctionType::ExtInfo Info;
1887   auto &TaskPrivatesMapFnInfo =
1888       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
1889                                                     /*isVariadic=*/false);
1890   auto *TaskPrivatesMapTy =
1891       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
1892   auto *TaskPrivatesMap = llvm::Function::Create(
1893       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
1894       ".omp_task_privates_map.", &CGM.getModule());
1895   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
1896                                 TaskPrivatesMap);
1897   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
1898   CodeGenFunction CGF(CGM);
1899   CGF.disableDebugInfo();
1900   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
1901                     TaskPrivatesMapFnInfo, Args);
1902
1903   // *privi = &.privates.privi;
1904   auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
1905       CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
1906   LValue Base =
1907       CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
1908   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
1909   Counter = 0;
1910   for (auto *Field : PrivatesQTyRD->fields()) {
1911     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
1912     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
1913     auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
1914                                                   VD->getType());
1915     auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
1916     CGF.EmitStoreOfScalar(
1917         FieldLVal.getAddress(),
1918         CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
1919                                        RefLVal.getType()->getPointeeType()));
1920     ++Counter;
1921   }
1922   CGF.FinishFunction();
1923   return TaskPrivatesMap;
1924 }
1925
1926 static int array_pod_sort_comparator(const PrivateDataTy *P1,
1927                                      const PrivateDataTy *P2) {
1928   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
1929 }
1930
1931 void CGOpenMPRuntime::emitTaskCall(
1932     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
1933     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
1934     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
1935     const Expr *IfCond, const ArrayRef<const Expr *> PrivateVars,
1936     const ArrayRef<const Expr *> PrivateCopies,
1937     const ArrayRef<const Expr *> FirstprivateVars,
1938     const ArrayRef<const Expr *> FirstprivateCopies,
1939     const ArrayRef<const Expr *> FirstprivateInits) {
1940   auto &C = CGM.getContext();
1941   llvm::SmallVector<PrivateDataTy, 8> Privates;
1942   // Aggregate privates and sort them by the alignment.
1943   auto I = PrivateCopies.begin();
1944   for (auto *E : PrivateVars) {
1945     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1946     Privates.push_back(std::make_pair(
1947         C.getTypeAlignInChars(VD->getType()),
1948         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
1949                          /*PrivateElemInit=*/nullptr)));
1950     ++I;
1951   }
1952   I = FirstprivateCopies.begin();
1953   auto IElemInitRef = FirstprivateInits.begin();
1954   for (auto *E : FirstprivateVars) {
1955     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1956     Privates.push_back(std::make_pair(
1957         C.getTypeAlignInChars(VD->getType()),
1958         PrivateHelpersTy(
1959             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
1960             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
1961     ++I, ++IElemInitRef;
1962   }
1963   llvm::array_pod_sort(Privates.begin(), Privates.end(),
1964                        array_pod_sort_comparator);
1965   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1966   // Build type kmp_routine_entry_t (if not built yet).
1967   emitKmpRoutineEntryT(KmpInt32Ty);
1968   // Build type kmp_task_t (if not built yet).
1969   if (KmpTaskTQTy.isNull()) {
1970     KmpTaskTQTy = C.getRecordType(
1971         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
1972   }
1973   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
1974   // Build particular struct kmp_task_t for the given task.
1975   auto *KmpTaskTWithPrivatesQTyRD =
1976       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
1977   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
1978   QualType KmpTaskTWithPrivatesPtrQTy =
1979       C.getPointerType(KmpTaskTWithPrivatesQTy);
1980   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
1981   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
1982   auto KmpTaskTWithPrivatesTySize =
1983       CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
1984   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
1985
1986   // Emit initial values for private copies (if any).
1987   llvm::Value *TaskPrivatesMap = nullptr;
1988   auto *TaskPrivatesMapTy =
1989       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
1990                 3)
1991           ->getType();
1992   if (!Privates.empty()) {
1993     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
1994     TaskPrivatesMap = emitTaskPrivateMappingFunction(
1995         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
1996     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1997         TaskPrivatesMap, TaskPrivatesMapTy);
1998   } else {
1999     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2000         cast<llvm::PointerType>(TaskPrivatesMapTy));
2001   }
2002   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2003   // kmp_task_t *tt);
2004   auto *TaskEntry = emitProxyTaskFunction(
2005       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2006       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2007
2008   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2009   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2010   // kmp_routine_entry_t *task_entry);
2011   // Task flags. Format is taken from
2012   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2013   // description of kmp_tasking_flags struct.
2014   const unsigned TiedFlag = 0x1;
2015   const unsigned FinalFlag = 0x2;
2016   unsigned Flags = Tied ? TiedFlag : 0;
2017   auto *TaskFlags =
2018       Final.getPointer()
2019           ? CGF.Builder.CreateSelect(Final.getPointer(),
2020                                      CGF.Builder.getInt32(FinalFlag),
2021                                      CGF.Builder.getInt32(/*C=*/0))
2022           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2023   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2024   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
2025   llvm::Value *AllocArgs[] = {
2026       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
2027       KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
2028       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
2029                                                       KmpRoutineEntryPtrTy)};
2030   auto *NewTask = CGF.EmitRuntimeCall(
2031       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2032   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033       NewTask, KmpTaskTWithPrivatesPtrTy);
2034   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2035                                                KmpTaskTWithPrivatesQTy);
2036   LValue TDBase =
2037       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2038   // Fill the data in the resulting kmp_task_t record.
2039   // Copy shareds if there are any.
2040   llvm::Value *KmpTaskSharedsPtr = nullptr;
2041   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2042     KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
2043         CGF.EmitLValueForField(
2044             TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
2045         Loc);
2046     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2047   }
2048   // Emit initial values for private copies (if any).
2049   bool NeedsCleanup = false;
2050   if (!Privates.empty()) {
2051     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2052     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2053     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2054     LValue SharedsBase;
2055     if (!FirstprivateVars.empty()) {
2056       SharedsBase = CGF.MakeNaturalAlignAddrLValue(
2057           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2058               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2059           SharedsTy);
2060     }
2061     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2062         cast<CapturedStmt>(*D.getAssociatedStmt()));
2063     for (auto &&Pair : Privates) {
2064       auto *VD = Pair.second.PrivateCopy;
2065       auto *Init = VD->getAnyInitializer();
2066       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2067       if (Init) {
2068         if (auto *Elem = Pair.second.PrivateElemInit) {
2069           auto *OriginalVD = Pair.second.Original;
2070           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2071           auto SharedRefLValue =
2072               CGF.EmitLValueForField(SharedsBase, SharedField);
2073           QualType Type = OriginalVD->getType();
2074           if (Type->isArrayType()) {
2075             // Initialize firstprivate array.
2076             if (!isa<CXXConstructExpr>(Init) ||
2077                 CGF.isTrivialInitializer(Init)) {
2078               // Perform simple memcpy.
2079               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2080                                       SharedRefLValue.getAddress(), Type);
2081             } else {
2082               // Initialize firstprivate array using element-by-element
2083               // intialization.
2084               CGF.EmitOMPAggregateAssign(
2085                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2086                   Type, [&CGF, Elem, Init, &CapturesInfo](
2087                             llvm::Value *DestElement, llvm::Value *SrcElement) {
2088                     // Clean up any temporaries needed by the initialization.
2089                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2090                     InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
2091                       return SrcElement;
2092                     });
2093                     (void)InitScope.Privatize();
2094                     // Emit initialization for single element.
2095                     auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
2096                     CGF.CapturedStmtInfo = &CapturesInfo;
2097                     CGF.EmitAnyExprToMem(Init, DestElement,
2098                                          Init->getType().getQualifiers(),
2099                                          /*IsInitializer=*/false);
2100                     CGF.CapturedStmtInfo = OldCapturedStmtInfo;
2101                   });
2102             }
2103           } else {
2104             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2105             InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
2106               return SharedRefLValue.getAddress();
2107             });
2108             (void)InitScope.Privatize();
2109             auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
2110             CGF.CapturedStmtInfo = &CapturesInfo;
2111             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2112                                /*capturedByInit=*/false);
2113             CGF.CapturedStmtInfo = OldCapturedStmtInfo;
2114           }
2115         } else {
2116           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2117         }
2118       }
2119       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2120       ++FI;
2121     }
2122   }
2123   // Provide pointer to function with destructors for privates.
2124   llvm::Value *DestructorFn =
2125       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2126                                              KmpTaskTWithPrivatesPtrQTy,
2127                                              KmpTaskTWithPrivatesQTy)
2128                    : llvm::ConstantPointerNull::get(
2129                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2130   LValue Destructor = CGF.EmitLValueForField(
2131       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2132   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2133                             DestructorFn, KmpRoutineEntryPtrTy),
2134                         Destructor);
2135   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2136   // libcall.
2137   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2138   // *new_task);
2139   auto *ThreadID = getThreadID(CGF, Loc);
2140   llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask};
2141   auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) {
2142     // TODO: add check for untied tasks.
2143     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
2144   };
2145   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2146       IfCallEndCleanup;
2147   auto &&ElseCodeGen =
2148       [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry](
2149           CodeGenFunction &CGF) {
2150         CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2151         CGF.EmitRuntimeCall(
2152             createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs);
2153         // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2154         // kmp_task_t *new_task);
2155         CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2156             NormalAndEHCleanup,
2157             createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2158             llvm::makeArrayRef(TaskArgs));
2159
2160         // Call proxy_task_entry(gtid, new_task);
2161         llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2162         CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2163       };
2164   if (IfCond) {
2165     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2166   } else {
2167     CodeGenFunction::RunCleanupsScope Scope(CGF);
2168     ThenCodeGen(CGF);
2169   }
2170 }
2171
2172 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2173                                           llvm::Type *ArgsType,
2174                                           ArrayRef<const Expr *> LHSExprs,
2175                                           ArrayRef<const Expr *> RHSExprs,
2176                                           ArrayRef<const Expr *> ReductionOps) {
2177   auto &C = CGM.getContext();
2178
2179   // void reduction_func(void *LHSArg, void *RHSArg);
2180   FunctionArgList Args;
2181   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2182                            C.VoidPtrTy);
2183   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2184                            C.VoidPtrTy);
2185   Args.push_back(&LHSArg);
2186   Args.push_back(&RHSArg);
2187   FunctionType::ExtInfo EI;
2188   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2189       C.VoidTy, Args, EI, /*isVariadic=*/false);
2190   auto *Fn = llvm::Function::Create(
2191       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2192       ".omp.reduction.reduction_func", &CGM.getModule());
2193   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2194   CodeGenFunction CGF(CGM);
2195   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2196
2197   // Dst = (void*[n])(LHSArg);
2198   // Src = (void*[n])(RHSArg);
2199   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2200       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
2201                                     CGF.PointerAlignInBytes),
2202       ArgsType);
2203   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2204       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
2205                                     CGF.PointerAlignInBytes),
2206       ArgsType);
2207
2208   //  ...
2209   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2210   //  ...
2211   CodeGenFunction::OMPPrivateScope Scope(CGF);
2212   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
2213     Scope.addPrivate(
2214         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
2215         [&]() -> llvm::Value *{
2216           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2217               CGF.Builder.CreateAlignedLoad(
2218                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
2219                   CGM.PointerAlignInBytes),
2220               CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
2221         });
2222     Scope.addPrivate(
2223         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
2224         [&]() -> llvm::Value *{
2225           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2226               CGF.Builder.CreateAlignedLoad(
2227                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
2228                   CGM.PointerAlignInBytes),
2229               CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
2230         });
2231   }
2232   Scope.Privatize();
2233   for (auto *E : ReductionOps) {
2234     CGF.EmitIgnoredExpr(E);
2235   }
2236   Scope.ForceCleanup();
2237   CGF.FinishFunction();
2238   return Fn;
2239 }
2240
2241 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2242                                     ArrayRef<const Expr *> LHSExprs,
2243                                     ArrayRef<const Expr *> RHSExprs,
2244                                     ArrayRef<const Expr *> ReductionOps,
2245                                     bool WithNowait) {
2246   // Next code should be emitted for reduction:
2247   //
2248   // static kmp_critical_name lock = { 0 };
2249   //
2250   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2251   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2252   //  ...
2253   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2254   //  *(Type<n>-1*)rhs[<n>-1]);
2255   // }
2256   //
2257   // ...
2258   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2259   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2260   // RedList, reduce_func, &<lock>)) {
2261   // case 1:
2262   //  ...
2263   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2264   //  ...
2265   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2266   // break;
2267   // case 2:
2268   //  ...
2269   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2270   //  ...
2271   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2272   // break;
2273   // default:;
2274   // }
2275
2276   auto &C = CGM.getContext();
2277
2278   // 1. Build a list of reduction variables.
2279   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2280   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2281   QualType ReductionArrayTy =
2282       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2283                              /*IndexTypeQuals=*/0);
2284   auto *ReductionList =
2285       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2286   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2287     auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
2288     CGF.Builder.CreateAlignedStore(
2289         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2290             CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
2291         Elem, CGM.PointerAlignInBytes);
2292   }
2293
2294   // 2. Emit reduce_func().
2295   auto *ReductionFn = emitReductionFunction(
2296       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2297       RHSExprs, ReductionOps);
2298
2299   // 3. Create static kmp_critical_name lock = { 0 };
2300   auto *Lock = getCriticalRegionLock(".reduction");
2301
2302   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2303   // RedList, reduce_func, &<lock>);
2304   auto *IdentTLoc = emitUpdateLocation(
2305       CGF, Loc,
2306       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2307   auto *ThreadId = getThreadID(CGF, Loc);
2308   auto *ReductionArrayTySize = llvm::ConstantInt::get(
2309       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2310   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
2311                                                              CGF.VoidPtrTy);
2312   llvm::Value *Args[] = {
2313       IdentTLoc,                             // ident_t *<loc>
2314       ThreadId,                              // i32 <gtid>
2315       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2316       ReductionArrayTySize,                  // size_type sizeof(RedList)
2317       RL,                                    // void *RedList
2318       ReductionFn, // void (*) (void *, void *) <reduce_func>
2319       Lock         // kmp_critical_name *&<lock>
2320   };
2321   auto Res = CGF.EmitRuntimeCall(
2322       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2323                                        : OMPRTL__kmpc_reduce),
2324       Args);
2325
2326   // 5. Build switch(res)
2327   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2328   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2329
2330   // 6. Build case 1:
2331   //  ...
2332   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2333   //  ...
2334   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2335   // break;
2336   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2337   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2338   CGF.EmitBlock(Case1BB);
2339
2340   {
2341     CodeGenFunction::RunCleanupsScope Scope(CGF);
2342     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2343     llvm::Value *EndArgs[] = {
2344         IdentTLoc, // ident_t *<loc>
2345         ThreadId,  // i32 <gtid>
2346         Lock       // kmp_critical_name *&<lock>
2347     };
2348     CGF.EHStack
2349         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2350             NormalAndEHCleanup,
2351             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2352                                              : OMPRTL__kmpc_end_reduce),
2353             llvm::makeArrayRef(EndArgs));
2354     for (auto *E : ReductionOps) {
2355       CGF.EmitIgnoredExpr(E);
2356     }
2357   }
2358
2359   CGF.EmitBranch(DefaultBB);
2360
2361   // 7. Build case 2:
2362   //  ...
2363   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2364   //  ...
2365   // break;
2366   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2367   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2368   CGF.EmitBlock(Case2BB);
2369
2370   {
2371     CodeGenFunction::RunCleanupsScope Scope(CGF);
2372     if (!WithNowait) {
2373       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2374       llvm::Value *EndArgs[] = {
2375           IdentTLoc, // ident_t *<loc>
2376           ThreadId,  // i32 <gtid>
2377           Lock       // kmp_critical_name *&<lock>
2378       };
2379       CGF.EHStack
2380           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2381               NormalAndEHCleanup,
2382               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2383               llvm::makeArrayRef(EndArgs));
2384     }
2385     auto I = LHSExprs.begin();
2386     for (auto *E : ReductionOps) {
2387       const Expr *XExpr = nullptr;
2388       const Expr *EExpr = nullptr;
2389       const Expr *UpExpr = nullptr;
2390       BinaryOperatorKind BO = BO_Comma;
2391       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2392         if (BO->getOpcode() == BO_Assign) {
2393           XExpr = BO->getLHS();
2394           UpExpr = BO->getRHS();
2395         }
2396       }
2397       // Try to emit update expression as a simple atomic.
2398       auto *RHSExpr = UpExpr;
2399       if (RHSExpr) {
2400         // Analyze RHS part of the whole expression.
2401         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2402                 RHSExpr->IgnoreParenImpCasts())) {
2403           // If this is a conditional operator, analyze its condition for
2404           // min/max reduction operator.
2405           RHSExpr = ACO->getCond();
2406         }
2407         if (auto *BORHS =
2408                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2409           EExpr = BORHS->getRHS();
2410           BO = BORHS->getOpcode();
2411         }
2412       }
2413       if (XExpr) {
2414         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2415         LValue X = CGF.EmitLValue(XExpr);
2416         RValue E;
2417         if (EExpr)
2418           E = CGF.EmitAnyExpr(EExpr);
2419         CGF.EmitOMPAtomicSimpleUpdateExpr(
2420             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2421             [&CGF, UpExpr, VD](RValue XRValue) {
2422               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2423               PrivateScope.addPrivate(
2424                   VD, [&CGF, VD, XRValue]() -> llvm::Value *{
2425                     auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
2426                     CGF.EmitStoreThroughLValue(
2427                         XRValue,
2428                         CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
2429                     return LHSTemp;
2430                   });
2431               (void)PrivateScope.Privatize();
2432               return CGF.EmitAnyExpr(UpExpr);
2433             });
2434       } else {
2435         // Emit as a critical region.
2436         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2437           CGF.EmitIgnoredExpr(E);
2438         }, Loc);
2439       }
2440       ++I;
2441     }
2442   }
2443
2444   CGF.EmitBranch(DefaultBB);
2445   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2446 }
2447
2448 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2449                                        SourceLocation Loc) {
2450   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2451   // global_tid);
2452   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2453   // Ignore return result until untied tasks are supported.
2454   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2455 }
2456
2457 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2458                                            const RegionCodeGenTy &CodeGen) {
2459   InlinedOpenMPRegionRAII Region(CGF, CodeGen);
2460   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2461 }
2462