]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Update llvm/clang to r240225.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGOpenMPRuntime.cpp
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26
27 using namespace clang;
28 using namespace CodeGen;
29
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44   };
45
46   CGOpenMPRegionInfo(const CapturedStmt &CS,
47                      const CGOpenMPRegionKind RegionKind,
48                      const RegionCodeGenTy &CodeGen)
49       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50         CodeGen(CodeGen) {}
51
52   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen)
54       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind),
55         CodeGen(CodeGen) {}
56
57   /// \brief Get a variable or parameter for storing global thread id
58   /// inside OpenMP construct.
59   virtual const VarDecl *getThreadIDVariable() const = 0;
60
61   /// \brief Emit the captured statement body.
62   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
63
64   /// \brief Get an LValue for the current ThreadID variable.
65   /// \return LValue for thread id variable. This LValue always has type int32*.
66   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
67
68   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
69
70   static bool classof(const CGCapturedStmtInfo *Info) {
71     return Info->getKind() == CR_OpenMP;
72   }
73
74 protected:
75   CGOpenMPRegionKind RegionKind;
76   const RegionCodeGenTy &CodeGen;
77 };
78
79 /// \brief API for captured statement code generation in OpenMP constructs.
80 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
81 public:
82   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
83                              const RegionCodeGenTy &CodeGen)
84       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen),
85         ThreadIDVar(ThreadIDVar) {
86     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
87   }
88   /// \brief Get a variable or parameter for storing global thread id
89   /// inside OpenMP construct.
90   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
91
92   /// \brief Get the name of the capture helper.
93   StringRef getHelperName() const override { return ".omp_outlined."; }
94
95   static bool classof(const CGCapturedStmtInfo *Info) {
96     return CGOpenMPRegionInfo::classof(Info) &&
97            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
98                ParallelOutlinedRegion;
99   }
100
101 private:
102   /// \brief A variable or parameter storing global thread id for OpenMP
103   /// constructs.
104   const VarDecl *ThreadIDVar;
105 };
106
107 /// \brief API for captured statement code generation in OpenMP constructs.
108 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
109 public:
110   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
111                                  const VarDecl *ThreadIDVar,
112                                  const RegionCodeGenTy &CodeGen)
113       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen),
114         ThreadIDVar(ThreadIDVar) {
115     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
116   }
117   /// \brief Get a variable or parameter for storing global thread id
118   /// inside OpenMP construct.
119   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
120
121   /// \brief Get an LValue for the current ThreadID variable.
122   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
123
124   /// \brief Get the name of the capture helper.
125   StringRef getHelperName() const override { return ".omp_outlined."; }
126
127   static bool classof(const CGCapturedStmtInfo *Info) {
128     return CGOpenMPRegionInfo::classof(Info) &&
129            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
130                TaskOutlinedRegion;
131   }
132
133 private:
134   /// \brief A variable or parameter storing global thread id for OpenMP
135   /// constructs.
136   const VarDecl *ThreadIDVar;
137 };
138
139 /// \brief API for inlined captured statement code generation in OpenMP
140 /// constructs.
141 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
142 public:
143   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
144                             const RegionCodeGenTy &CodeGen)
145       : CGOpenMPRegionInfo(InlinedRegion, CodeGen), OldCSI(OldCSI),
146         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
147   // \brief Retrieve the value of the context parameter.
148   llvm::Value *getContextValue() const override {
149     if (OuterRegionInfo)
150       return OuterRegionInfo->getContextValue();
151     llvm_unreachable("No context value for inlined OpenMP region");
152   }
153   virtual void setContextValue(llvm::Value *V) override {
154     if (OuterRegionInfo) {
155       OuterRegionInfo->setContextValue(V);
156       return;
157     }
158     llvm_unreachable("No context value for inlined OpenMP region");
159   }
160   /// \brief Lookup the captured field decl for a variable.
161   const FieldDecl *lookup(const VarDecl *VD) const override {
162     if (OuterRegionInfo)
163       return OuterRegionInfo->lookup(VD);
164     // If there is no outer outlined region,no need to lookup in a list of
165     // captured variables, we can use the original one.
166     return nullptr;
167   }
168   FieldDecl *getThisFieldDecl() const override {
169     if (OuterRegionInfo)
170       return OuterRegionInfo->getThisFieldDecl();
171     return nullptr;
172   }
173   /// \brief Get a variable or parameter for storing global thread id
174   /// inside OpenMP construct.
175   const VarDecl *getThreadIDVariable() const override {
176     if (OuterRegionInfo)
177       return OuterRegionInfo->getThreadIDVariable();
178     return nullptr;
179   }
180
181   /// \brief Get the name of the capture helper.
182   StringRef getHelperName() const override {
183     if (auto *OuterRegionInfo = getOldCSI())
184       return OuterRegionInfo->getHelperName();
185     llvm_unreachable("No helper name for inlined OpenMP construct");
186   }
187
188   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
189
190   static bool classof(const CGCapturedStmtInfo *Info) {
191     return CGOpenMPRegionInfo::classof(Info) &&
192            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
193   }
194
195 private:
196   /// \brief CodeGen info about outer OpenMP region.
197   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
198   CGOpenMPRegionInfo *OuterRegionInfo;
199 };
200
201 /// \brief RAII for emitting code of OpenMP constructs.
202 class InlinedOpenMPRegionRAII {
203   CodeGenFunction &CGF;
204
205 public:
206   /// \brief Constructs region for combined constructs.
207   /// \param CodeGen Code generation sequence for combined directives. Includes
208   /// a list of functions used for code generation of implicitly inlined
209   /// regions.
210   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen)
211       : CGF(CGF) {
212     // Start emission for the construct.
213     CGF.CapturedStmtInfo =
214         new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen);
215   }
216   ~InlinedOpenMPRegionRAII() {
217     // Restore original CapturedStmtInfo only if we're done with code emission.
218     auto *OldCSI =
219         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
220     delete CGF.CapturedStmtInfo;
221     CGF.CapturedStmtInfo = OldCSI;
222   }
223 };
224
225 } // namespace
226
227 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
228   return CGF.MakeNaturalAlignAddrLValue(
229       CGF.Builder.CreateAlignedLoad(
230           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
231           CGF.PointerAlignInBytes),
232       getThreadIDVariable()
233           ->getType()
234           ->castAs<PointerType>()
235           ->getPointeeType());
236 }
237
238 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
239   // 1.2.2 OpenMP Language Terminology
240   // Structured block - An executable statement with a single entry at the
241   // top and a single exit at the bottom.
242   // The point of exit cannot be a branch out of the structured block.
243   // longjmp() and throw() must not violate the entry/exit criteria.
244   CGF.EHStack.pushTerminate();
245   {
246     CodeGenFunction::RunCleanupsScope Scope(CGF);
247     CodeGen(CGF);
248   }
249   CGF.EHStack.popTerminate();
250 }
251
252 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
253     CodeGenFunction &CGF) {
254   return CGF.MakeNaturalAlignAddrLValue(
255       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
256       getThreadIDVariable()->getType());
257 }
258
259 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
260     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
261   IdentTy = llvm::StructType::create(
262       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
263       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
264       CGM.Int8PtrTy /* psource */, nullptr);
265   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
266   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
267                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
268   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
269   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
270 }
271
272 void CGOpenMPRuntime::clear() {
273   InternalVars.clear();
274 }
275
276 llvm::Value *
277 CGOpenMPRuntime::emitParallelOutlinedFunction(const OMPExecutableDirective &D,
278                                               const VarDecl *ThreadIDVar,
279                                               const RegionCodeGenTy &CodeGen) {
280   assert(ThreadIDVar->getType()->isPointerType() &&
281          "thread id variable must be of type kmp_int32 *");
282   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
283   CodeGenFunction CGF(CGM, true);
284   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
285   CGF.CapturedStmtInfo = &CGInfo;
286   return CGF.GenerateCapturedStmtFunction(*CS);
287 }
288
289 llvm::Value *
290 CGOpenMPRuntime::emitTaskOutlinedFunction(const OMPExecutableDirective &D,
291                                           const VarDecl *ThreadIDVar,
292                                           const RegionCodeGenTy &CodeGen) {
293   assert(!ThreadIDVar->getType()->isPointerType() &&
294          "thread id variable must be of type kmp_int32 for tasks");
295   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
296   CodeGenFunction CGF(CGM, true);
297   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen);
298   CGF.CapturedStmtInfo = &CGInfo;
299   return CGF.GenerateCapturedStmtFunction(*CS);
300 }
301
302 llvm::Value *
303 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
304   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
305   if (!Entry) {
306     if (!DefaultOpenMPPSource) {
307       // Initialize default location for psource field of ident_t structure of
308       // all ident_t objects. Format is ";file;function;line;column;;".
309       // Taken from
310       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
311       DefaultOpenMPPSource =
312           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
313       DefaultOpenMPPSource =
314           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
315     }
316     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
317         CGM.getModule(), IdentTy, /*isConstant*/ true,
318         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
319     DefaultOpenMPLocation->setUnnamedAddr(true);
320
321     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
322     llvm::Constant *Values[] = {Zero,
323                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
324                                 Zero, Zero, DefaultOpenMPPSource};
325     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
326     DefaultOpenMPLocation->setInitializer(Init);
327     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
328     return DefaultOpenMPLocation;
329   }
330   return Entry;
331 }
332
333 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
334                                                  SourceLocation Loc,
335                                                  OpenMPLocationFlags Flags) {
336   // If no debug info is generated - return global default location.
337   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
338       Loc.isInvalid())
339     return getOrCreateDefaultLocation(Flags);
340
341   assert(CGF.CurFn && "No function in current CodeGenFunction.");
342
343   llvm::Value *LocValue = nullptr;
344   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
345   if (I != OpenMPLocThreadIDMap.end())
346     LocValue = I->second.DebugLoc;
347   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
348   // GetOpenMPThreadID was called before this routine.
349   if (LocValue == nullptr) {
350     // Generate "ident_t .kmpc_loc.addr;"
351     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
352     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
353     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
354     Elem.second.DebugLoc = AI;
355     LocValue = AI;
356
357     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
358     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
359     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
360                              llvm::ConstantExpr::getSizeOf(IdentTy),
361                              CGM.PointerAlignInBytes);
362   }
363
364   // char **psource = &.kmpc_loc_<flags>.addr.psource;
365   auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
366                                                          IdentField_PSource);
367
368   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
369   if (OMPDebugLoc == nullptr) {
370     SmallString<128> Buffer2;
371     llvm::raw_svector_ostream OS2(Buffer2);
372     // Build debug location
373     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
374     OS2 << ";" << PLoc.getFilename() << ";";
375     if (const FunctionDecl *FD =
376             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
377       OS2 << FD->getQualifiedNameAsString();
378     }
379     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
380     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
381     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
382   }
383   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
384   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
385
386   return LocValue;
387 }
388
389 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
390                                           SourceLocation Loc) {
391   assert(CGF.CurFn && "No function in current CodeGenFunction.");
392
393   llvm::Value *ThreadID = nullptr;
394   // Check whether we've already cached a load of the thread id in this
395   // function.
396   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
397   if (I != OpenMPLocThreadIDMap.end()) {
398     ThreadID = I->second.ThreadID;
399     if (ThreadID != nullptr)
400       return ThreadID;
401   }
402   if (auto OMPRegionInfo =
403           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
404     if (OMPRegionInfo->getThreadIDVariable()) {
405       // Check if this an outlined function with thread id passed as argument.
406       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
407       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
408       // If value loaded in entry block, cache it and use it everywhere in
409       // function.
410       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
411         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
412         Elem.second.ThreadID = ThreadID;
413       }
414       return ThreadID;
415     }
416   }
417
418   // This is not an outlined function region - need to call __kmpc_int32
419   // kmpc_global_thread_num(ident_t *loc).
420   // Generate thread id value and cache this value for use across the
421   // function.
422   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
423   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
424   ThreadID =
425       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
426                           emitUpdateLocation(CGF, Loc));
427   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
428   Elem.second.ThreadID = ThreadID;
429   return ThreadID;
430 }
431
432 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
433   assert(CGF.CurFn && "No function in current CodeGenFunction.");
434   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
435     OpenMPLocThreadIDMap.erase(CGF.CurFn);
436 }
437
438 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
439   return llvm::PointerType::getUnqual(IdentTy);
440 }
441
442 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
443   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
444 }
445
446 llvm::Constant *
447 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
448   llvm::Constant *RTLFn = nullptr;
449   switch (Function) {
450   case OMPRTL__kmpc_fork_call: {
451     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
452     // microtask, ...);
453     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
454                                 getKmpc_MicroPointerTy()};
455     llvm::FunctionType *FnTy =
456         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
457     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
458     break;
459   }
460   case OMPRTL__kmpc_global_thread_num: {
461     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
462     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
463     llvm::FunctionType *FnTy =
464         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
465     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
466     break;
467   }
468   case OMPRTL__kmpc_threadprivate_cached: {
469     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
470     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
471     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
472                                 CGM.VoidPtrTy, CGM.SizeTy,
473                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
474     llvm::FunctionType *FnTy =
475         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
476     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
477     break;
478   }
479   case OMPRTL__kmpc_critical: {
480     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
481     // kmp_critical_name *crit);
482     llvm::Type *TypeParams[] = {
483         getIdentTyPointerTy(), CGM.Int32Ty,
484         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
485     llvm::FunctionType *FnTy =
486         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
487     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
488     break;
489   }
490   case OMPRTL__kmpc_threadprivate_register: {
491     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
492     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
493     // typedef void *(*kmpc_ctor)(void *);
494     auto KmpcCtorTy =
495         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
496                                 /*isVarArg*/ false)->getPointerTo();
497     // typedef void *(*kmpc_cctor)(void *, void *);
498     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
499     auto KmpcCopyCtorTy =
500         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
501                                 /*isVarArg*/ false)->getPointerTo();
502     // typedef void (*kmpc_dtor)(void *);
503     auto KmpcDtorTy =
504         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
505             ->getPointerTo();
506     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
507                               KmpcCopyCtorTy, KmpcDtorTy};
508     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
509                                         /*isVarArg*/ false);
510     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
511     break;
512   }
513   case OMPRTL__kmpc_end_critical: {
514     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
515     // kmp_critical_name *crit);
516     llvm::Type *TypeParams[] = {
517         getIdentTyPointerTy(), CGM.Int32Ty,
518         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
519     llvm::FunctionType *FnTy =
520         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
521     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
522     break;
523   }
524   case OMPRTL__kmpc_cancel_barrier: {
525     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
526     // global_tid);
527     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
528     llvm::FunctionType *FnTy =
529         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
530     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
531     break;
532   }
533   case OMPRTL__kmpc_for_static_fini: {
534     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
535     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
536     llvm::FunctionType *FnTy =
537         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
538     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
539     break;
540   }
541   case OMPRTL__kmpc_push_num_threads: {
542     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
543     // kmp_int32 num_threads)
544     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
545                                 CGM.Int32Ty};
546     llvm::FunctionType *FnTy =
547         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
549     break;
550   }
551   case OMPRTL__kmpc_serialized_parallel: {
552     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
553     // global_tid);
554     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
555     llvm::FunctionType *FnTy =
556         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
557     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
558     break;
559   }
560   case OMPRTL__kmpc_end_serialized_parallel: {
561     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
562     // global_tid);
563     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
564     llvm::FunctionType *FnTy =
565         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
566     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
567     break;
568   }
569   case OMPRTL__kmpc_flush: {
570     // Build void __kmpc_flush(ident_t *loc);
571     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
572     llvm::FunctionType *FnTy =
573         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
574     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
575     break;
576   }
577   case OMPRTL__kmpc_master: {
578     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
579     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
580     llvm::FunctionType *FnTy =
581         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
582     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
583     break;
584   }
585   case OMPRTL__kmpc_end_master: {
586     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
587     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
588     llvm::FunctionType *FnTy =
589         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
590     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
591     break;
592   }
593   case OMPRTL__kmpc_omp_taskyield: {
594     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
595     // int end_part);
596     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
597     llvm::FunctionType *FnTy =
598         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
599     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
600     break;
601   }
602   case OMPRTL__kmpc_single: {
603     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
604     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
605     llvm::FunctionType *FnTy =
606         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
607     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
608     break;
609   }
610   case OMPRTL__kmpc_end_single: {
611     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
612     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
613     llvm::FunctionType *FnTy =
614         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
615     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
616     break;
617   }
618   case OMPRTL__kmpc_omp_task_alloc: {
619     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
620     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
621     // kmp_routine_entry_t *task_entry);
622     assert(KmpRoutineEntryPtrTy != nullptr &&
623            "Type kmp_routine_entry_t must be created.");
624     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
625                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
626     // Return void * and then cast to particular kmp_task_t type.
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
630     break;
631   }
632   case OMPRTL__kmpc_omp_task: {
633     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
634     // *new_task);
635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
636                                 CGM.VoidPtrTy};
637     llvm::FunctionType *FnTy =
638         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
639     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
640     break;
641   }
642   case OMPRTL__kmpc_copyprivate: {
643     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
644     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
645     // kmp_int32 didit);
646     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
647     auto *CpyFnTy =
648         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
650                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
651                                 CGM.Int32Ty};
652     llvm::FunctionType *FnTy =
653         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
654     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
655     break;
656   }
657   case OMPRTL__kmpc_reduce: {
658     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
659     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
660     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
661     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
662     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
663                                                /*isVarArg=*/false);
664     llvm::Type *TypeParams[] = {
665         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
666         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
667         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
668     llvm::FunctionType *FnTy =
669         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
670     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
671     break;
672   }
673   case OMPRTL__kmpc_reduce_nowait: {
674     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
675     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
676     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
677     // *lck);
678     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
679     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
680                                                /*isVarArg=*/false);
681     llvm::Type *TypeParams[] = {
682         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
683         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
684         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
685     llvm::FunctionType *FnTy =
686         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
687     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
688     break;
689   }
690   case OMPRTL__kmpc_end_reduce: {
691     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
692     // kmp_critical_name *lck);
693     llvm::Type *TypeParams[] = {
694         getIdentTyPointerTy(), CGM.Int32Ty,
695         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
696     llvm::FunctionType *FnTy =
697         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
699     break;
700   }
701   case OMPRTL__kmpc_end_reduce_nowait: {
702     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
703     // kmp_critical_name *lck);
704     llvm::Type *TypeParams[] = {
705         getIdentTyPointerTy(), CGM.Int32Ty,
706         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
707     llvm::FunctionType *FnTy =
708         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
709     RTLFn =
710         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
711     break;
712   }
713   case OMPRTL__kmpc_omp_task_begin_if0: {
714     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
715     // *new_task);
716     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
717                                 CGM.VoidPtrTy};
718     llvm::FunctionType *FnTy =
719         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
720     RTLFn =
721         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
722     break;
723   }
724   case OMPRTL__kmpc_omp_task_complete_if0: {
725     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
726     // *new_task);
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
728                                 CGM.VoidPtrTy};
729     llvm::FunctionType *FnTy =
730         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
731     RTLFn = CGM.CreateRuntimeFunction(FnTy,
732                                       /*Name=*/"__kmpc_omp_task_complete_if0");
733     break;
734   }
735   case OMPRTL__kmpc_ordered: {
736     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
737     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
738     llvm::FunctionType *FnTy =
739         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
740     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
741     break;
742   }
743   case OMPRTL__kmpc_end_ordered: {
744     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
745     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
746     llvm::FunctionType *FnTy =
747         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
748     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
749     break;
750   }
751   case OMPRTL__kmpc_omp_taskwait: {
752     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
753     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
754     llvm::FunctionType *FnTy =
755         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
756     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
757     break;
758   }
759   case OMPRTL__kmpc_taskgroup: {
760     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
761     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
762     llvm::FunctionType *FnTy =
763         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
764     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
765     break;
766   }
767   case OMPRTL__kmpc_end_taskgroup: {
768     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
769     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
770     llvm::FunctionType *FnTy =
771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
773     break;
774   }
775   case OMPRTL__kmpc_push_proc_bind: {
776     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
777     // int proc_bind)
778     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
779     llvm::FunctionType *FnTy =
780         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
781     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
782     break;
783   }
784   }
785   return RTLFn;
786 }
787
788 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
789                                                              bool IVSigned) {
790   assert((IVSize == 32 || IVSize == 64) &&
791          "IV size is not compatible with the omp runtime");
792   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
793                                        : "__kmpc_for_static_init_4u")
794                            : (IVSigned ? "__kmpc_for_static_init_8"
795                                        : "__kmpc_for_static_init_8u");
796   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
797   auto PtrTy = llvm::PointerType::getUnqual(ITy);
798   llvm::Type *TypeParams[] = {
799     getIdentTyPointerTy(),                     // loc
800     CGM.Int32Ty,                               // tid
801     CGM.Int32Ty,                               // schedtype
802     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
803     PtrTy,                                     // p_lower
804     PtrTy,                                     // p_upper
805     PtrTy,                                     // p_stride
806     ITy,                                       // incr
807     ITy                                        // chunk
808   };
809   llvm::FunctionType *FnTy =
810       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
811   return CGM.CreateRuntimeFunction(FnTy, Name);
812 }
813
814 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
815                                                             bool IVSigned) {
816   assert((IVSize == 32 || IVSize == 64) &&
817          "IV size is not compatible with the omp runtime");
818   auto Name =
819       IVSize == 32
820           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
821           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
822   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
823   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
824                                CGM.Int32Ty,           // tid
825                                CGM.Int32Ty,           // schedtype
826                                ITy,                   // lower
827                                ITy,                   // upper
828                                ITy,                   // stride
829                                ITy                    // chunk
830   };
831   llvm::FunctionType *FnTy =
832       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
833   return CGM.CreateRuntimeFunction(FnTy, Name);
834 }
835
836 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
837                                                             bool IVSigned) {
838   assert((IVSize == 32 || IVSize == 64) &&
839          "IV size is not compatible with the omp runtime");
840   auto Name =
841       IVSize == 32
842           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
843           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
844   llvm::Type *TypeParams[] = {
845       getIdentTyPointerTy(), // loc
846       CGM.Int32Ty,           // tid
847   };
848   llvm::FunctionType *FnTy =
849       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
850   return CGM.CreateRuntimeFunction(FnTy, Name);
851 }
852
853 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
854                                                             bool IVSigned) {
855   assert((IVSize == 32 || IVSize == 64) &&
856          "IV size is not compatible with the omp runtime");
857   auto Name =
858       IVSize == 32
859           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
860           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
861   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
862   auto PtrTy = llvm::PointerType::getUnqual(ITy);
863   llvm::Type *TypeParams[] = {
864     getIdentTyPointerTy(),                     // loc
865     CGM.Int32Ty,                               // tid
866     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
867     PtrTy,                                     // p_lower
868     PtrTy,                                     // p_upper
869     PtrTy                                      // p_stride
870   };
871   llvm::FunctionType *FnTy =
872       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
873   return CGM.CreateRuntimeFunction(FnTy, Name);
874 }
875
876 llvm::Constant *
877 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
878   // Lookup the entry, lazily creating it if necessary.
879   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
880                                      Twine(CGM.getMangledName(VD)) + ".cache.");
881 }
882
883 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
884                                                      const VarDecl *VD,
885                                                      llvm::Value *VDAddr,
886                                                      SourceLocation Loc) {
887   auto VarTy = VDAddr->getType()->getPointerElementType();
888   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
889                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
890                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
891                          getOrCreateThreadPrivateCache(VD)};
892   return CGF.EmitRuntimeCall(
893       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
894 }
895
896 void CGOpenMPRuntime::emitThreadPrivateVarInit(
897     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
898     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
899   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
900   // library.
901   auto OMPLoc = emitUpdateLocation(CGF, Loc);
902   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
903                       OMPLoc);
904   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
905   // to register constructor/destructor for variable.
906   llvm::Value *Args[] = {OMPLoc,
907                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
908                          Ctor, CopyCtor, Dtor};
909   CGF.EmitRuntimeCall(
910       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
911 }
912
913 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
914     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
915     bool PerformInit, CodeGenFunction *CGF) {
916   VD = VD->getDefinition(CGM.getContext());
917   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
918     ThreadPrivateWithDefinition.insert(VD);
919     QualType ASTTy = VD->getType();
920
921     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
922     auto Init = VD->getAnyInitializer();
923     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
924       // Generate function that re-emits the declaration's initializer into the
925       // threadprivate copy of the variable VD
926       CodeGenFunction CtorCGF(CGM);
927       FunctionArgList Args;
928       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
929                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
930       Args.push_back(&Dst);
931
932       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
933           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
934           /*isVariadic=*/false);
935       auto FTy = CGM.getTypes().GetFunctionType(FI);
936       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
937           FTy, ".__kmpc_global_ctor_.", Loc);
938       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
939                             Args, SourceLocation());
940       auto ArgVal = CtorCGF.EmitLoadOfScalar(
941           CtorCGF.GetAddrOfLocalVar(&Dst),
942           /*Volatile=*/false, CGM.PointerAlignInBytes,
943           CGM.getContext().VoidPtrTy, Dst.getLocation());
944       auto Arg = CtorCGF.Builder.CreatePointerCast(
945           ArgVal,
946           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
947       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
948                                /*IsInitializer=*/true);
949       ArgVal = CtorCGF.EmitLoadOfScalar(
950           CtorCGF.GetAddrOfLocalVar(&Dst),
951           /*Volatile=*/false, CGM.PointerAlignInBytes,
952           CGM.getContext().VoidPtrTy, Dst.getLocation());
953       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
954       CtorCGF.FinishFunction();
955       Ctor = Fn;
956     }
957     if (VD->getType().isDestructedType() != QualType::DK_none) {
958       // Generate function that emits destructor call for the threadprivate copy
959       // of the variable VD
960       CodeGenFunction DtorCGF(CGM);
961       FunctionArgList Args;
962       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
963                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
964       Args.push_back(&Dst);
965
966       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
967           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
968           /*isVariadic=*/false);
969       auto FTy = CGM.getTypes().GetFunctionType(FI);
970       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
971           FTy, ".__kmpc_global_dtor_.", Loc);
972       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
973                             SourceLocation());
974       auto ArgVal = DtorCGF.EmitLoadOfScalar(
975           DtorCGF.GetAddrOfLocalVar(&Dst),
976           /*Volatile=*/false, CGM.PointerAlignInBytes,
977           CGM.getContext().VoidPtrTy, Dst.getLocation());
978       DtorCGF.emitDestroy(ArgVal, ASTTy,
979                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
980                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
981       DtorCGF.FinishFunction();
982       Dtor = Fn;
983     }
984     // Do not emit init function if it is not required.
985     if (!Ctor && !Dtor)
986       return nullptr;
987
988     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
989     auto CopyCtorTy =
990         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
991                                 /*isVarArg=*/false)->getPointerTo();
992     // Copying constructor for the threadprivate variable.
993     // Must be NULL - reserved by runtime, but currently it requires that this
994     // parameter is always NULL. Otherwise it fires assertion.
995     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
996     if (Ctor == nullptr) {
997       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
998                                             /*isVarArg=*/false)->getPointerTo();
999       Ctor = llvm::Constant::getNullValue(CtorTy);
1000     }
1001     if (Dtor == nullptr) {
1002       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1003                                             /*isVarArg=*/false)->getPointerTo();
1004       Dtor = llvm::Constant::getNullValue(DtorTy);
1005     }
1006     if (!CGF) {
1007       auto InitFunctionTy =
1008           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1009       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1010           InitFunctionTy, ".__omp_threadprivate_init_.");
1011       CodeGenFunction InitCGF(CGM);
1012       FunctionArgList ArgList;
1013       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1014                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1015                             Loc);
1016       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1017       InitCGF.FinishFunction();
1018       return InitFunction;
1019     }
1020     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1021   }
1022   return nullptr;
1023 }
1024
1025 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1026 /// function. Here is the logic:
1027 /// if (Cond) {
1028 ///   ThenGen();
1029 /// } else {
1030 ///   ElseGen();
1031 /// }
1032 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1033                             const RegionCodeGenTy &ThenGen,
1034                             const RegionCodeGenTy &ElseGen) {
1035   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1036
1037   // If the condition constant folds and can be elided, try to avoid emitting
1038   // the condition and the dead arm of the if/else.
1039   bool CondConstant;
1040   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1041     CodeGenFunction::RunCleanupsScope Scope(CGF);
1042     if (CondConstant) {
1043       ThenGen(CGF);
1044     } else {
1045       ElseGen(CGF);
1046     }
1047     return;
1048   }
1049
1050   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1051   // emit the conditional branch.
1052   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1053   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1054   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1055   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1056
1057   // Emit the 'then' code.
1058   CGF.EmitBlock(ThenBlock);
1059   {
1060     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1061     ThenGen(CGF);
1062   }
1063   CGF.EmitBranch(ContBlock);
1064   // Emit the 'else' code if present.
1065   {
1066     // There is no need to emit line number for unconditional branch.
1067     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1068     CGF.EmitBlock(ElseBlock);
1069   }
1070   {
1071     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1072     ElseGen(CGF);
1073   }
1074   {
1075     // There is no need to emit line number for unconditional branch.
1076     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1077     CGF.EmitBranch(ContBlock);
1078   }
1079   // Emit the continuation block for code after the if.
1080   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1081 }
1082
1083 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1084                                        llvm::Value *OutlinedFn,
1085                                        llvm::Value *CapturedStruct,
1086                                        const Expr *IfCond) {
1087   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1088   auto &&ThenGen =
1089       [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
1090         // Build call __kmpc_fork_call(loc, 1, microtask,
1091         // captured_struct/*context*/)
1092         llvm::Value *Args[] = {
1093             RTLoc,
1094             CGF.Builder.getInt32(
1095                 1), // Number of arguments after 'microtask' argument
1096             // (there is only one additional argument - 'context')
1097             CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
1098             CGF.EmitCastToVoidPtr(CapturedStruct)};
1099         auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1100         CGF.EmitRuntimeCall(RTLFn, Args);
1101       };
1102   auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
1103       CodeGenFunction &CGF) {
1104     auto ThreadID = getThreadID(CGF, Loc);
1105     // Build calls:
1106     // __kmpc_serialized_parallel(&Loc, GTid);
1107     llvm::Value *Args[] = {RTLoc, ThreadID};
1108     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1109                         Args);
1110
1111     // OutlinedFn(&GTid, &zero, CapturedStruct);
1112     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1113     auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
1114                                                           /*Signed*/ true);
1115     auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
1116     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1117     llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
1118     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1119
1120     // __kmpc_end_serialized_parallel(&Loc, GTid);
1121     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1122     CGF.EmitRuntimeCall(
1123         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1124   };
1125   if (IfCond) {
1126     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1127   } else {
1128     CodeGenFunction::RunCleanupsScope Scope(CGF);
1129     ThenGen(CGF);
1130   }
1131 }
1132
1133 // If we're inside an (outlined) parallel region, use the region info's
1134 // thread-ID variable (it is passed in a first argument of the outlined function
1135 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1136 // regular serial code region, get thread ID by calling kmp_int32
1137 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1138 // return the address of that temp.
1139 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1140                                                   SourceLocation Loc) {
1141   if (auto OMPRegionInfo =
1142           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1143     if (OMPRegionInfo->getThreadIDVariable())
1144       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1145
1146   auto ThreadID = getThreadID(CGF, Loc);
1147   auto Int32Ty =
1148       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1149   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1150   CGF.EmitStoreOfScalar(ThreadID,
1151                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
1152
1153   return ThreadIDTemp;
1154 }
1155
1156 llvm::Constant *
1157 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1158                                              const llvm::Twine &Name) {
1159   SmallString<256> Buffer;
1160   llvm::raw_svector_ostream Out(Buffer);
1161   Out << Name;
1162   auto RuntimeName = Out.str();
1163   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1164   if (Elem.second) {
1165     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1166            "OMP internal variable has different type than requested");
1167     return &*Elem.second;
1168   }
1169
1170   return Elem.second = new llvm::GlobalVariable(
1171              CGM.getModule(), Ty, /*IsConstant*/ false,
1172              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1173              Elem.first());
1174 }
1175
1176 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1177   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1178   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1179 }
1180
1181 namespace {
1182 template <size_t N> class CallEndCleanup : public EHScopeStack::Cleanup {
1183   llvm::Value *Callee;
1184   llvm::Value *Args[N];
1185
1186 public:
1187   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1188       : Callee(Callee) {
1189     assert(CleanupArgs.size() == N);
1190     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1191   }
1192   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1193     CGF.EmitRuntimeCall(Callee, Args);
1194   }
1195 };
1196 } // namespace
1197
1198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1199                                          StringRef CriticalName,
1200                                          const RegionCodeGenTy &CriticalOpGen,
1201                                          SourceLocation Loc) {
1202   // __kmpc_critical(ident_t *, gtid, Lock);
1203   // CriticalOpGen();
1204   // __kmpc_end_critical(ident_t *, gtid, Lock);
1205   // Prepare arguments and build a call to __kmpc_critical
1206   {
1207     CodeGenFunction::RunCleanupsScope Scope(CGF);
1208     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1209                            getCriticalRegionLock(CriticalName)};
1210     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1211     // Build a call to __kmpc_end_critical
1212     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1213         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1214         llvm::makeArrayRef(Args));
1215     emitInlinedDirective(CGF, CriticalOpGen);
1216   }
1217 }
1218
1219 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1220                        const RegionCodeGenTy &BodyOpGen) {
1221   llvm::Value *CallBool = CGF.EmitScalarConversion(
1222       IfCond,
1223       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1224       CGF.getContext().BoolTy);
1225
1226   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1227   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1228   // Generate the branch (If-stmt)
1229   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1230   CGF.EmitBlock(ThenBlock);
1231   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, BodyOpGen);
1232   // Emit the rest of bblocks/branches
1233   CGF.EmitBranch(ContBlock);
1234   CGF.EmitBlock(ContBlock, true);
1235 }
1236
1237 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1238                                        const RegionCodeGenTy &MasterOpGen,
1239                                        SourceLocation Loc) {
1240   // if(__kmpc_master(ident_t *, gtid)) {
1241   //   MasterOpGen();
1242   //   __kmpc_end_master(ident_t *, gtid);
1243   // }
1244   // Prepare arguments and build a call to __kmpc_master
1245   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1246   auto *IsMaster =
1247       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1248   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1249       MasterCallEndCleanup;
1250   emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
1251     CodeGenFunction::RunCleanupsScope Scope(CGF);
1252     CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1253         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1254         llvm::makeArrayRef(Args));
1255     MasterOpGen(CGF);
1256   });
1257 }
1258
1259 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1260                                         SourceLocation Loc) {
1261   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1262   llvm::Value *Args[] = {
1263       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1264       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1265   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1266 }
1267
1268 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1269                                           const RegionCodeGenTy &TaskgroupOpGen,
1270                                           SourceLocation Loc) {
1271   // __kmpc_taskgroup(ident_t *, gtid);
1272   // TaskgroupOpGen();
1273   // __kmpc_end_taskgroup(ident_t *, gtid);
1274   // Prepare arguments and build a call to __kmpc_taskgroup
1275   {
1276     CodeGenFunction::RunCleanupsScope Scope(CGF);
1277     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1278     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1279     // Build a call to __kmpc_end_taskgroup
1280     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1281         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1282         llvm::makeArrayRef(Args));
1283     emitInlinedDirective(CGF, TaskgroupOpGen);
1284   }
1285 }
1286
1287 static llvm::Value *emitCopyprivateCopyFunction(
1288     CodeGenModule &CGM, llvm::Type *ArgsType,
1289     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1290     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1291   auto &C = CGM.getContext();
1292   // void copy_func(void *LHSArg, void *RHSArg);
1293   FunctionArgList Args;
1294   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1295                            C.VoidPtrTy);
1296   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1297                            C.VoidPtrTy);
1298   Args.push_back(&LHSArg);
1299   Args.push_back(&RHSArg);
1300   FunctionType::ExtInfo EI;
1301   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1302       C.VoidTy, Args, EI, /*isVariadic=*/false);
1303   auto *Fn = llvm::Function::Create(
1304       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1305       ".omp.copyprivate.copy_func", &CGM.getModule());
1306   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1307   CodeGenFunction CGF(CGM);
1308   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1309   // Dest = (void*[n])(LHSArg);
1310   // Src = (void*[n])(RHSArg);
1311   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1312       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1313                                     CGF.PointerAlignInBytes),
1314       ArgsType);
1315   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1316       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1317                                     CGF.PointerAlignInBytes),
1318       ArgsType);
1319   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1320   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1321   // ...
1322   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1323   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1324     auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1325         CGF.Builder.CreateAlignedLoad(
1326             CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1327             CGM.PointerAlignInBytes),
1328         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1329     auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1330         CGF.Builder.CreateAlignedLoad(
1331             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1332             CGM.PointerAlignInBytes),
1333         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1334     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1335     QualType Type = VD->getType();
1336     CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
1337                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1338                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1339                     AssignmentOps[I]);
1340   }
1341   CGF.FinishFunction();
1342   return Fn;
1343 }
1344
1345 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1346                                        const RegionCodeGenTy &SingleOpGen,
1347                                        SourceLocation Loc,
1348                                        ArrayRef<const Expr *> CopyprivateVars,
1349                                        ArrayRef<const Expr *> SrcExprs,
1350                                        ArrayRef<const Expr *> DstExprs,
1351                                        ArrayRef<const Expr *> AssignmentOps) {
1352   assert(CopyprivateVars.size() == SrcExprs.size() &&
1353          CopyprivateVars.size() == DstExprs.size() &&
1354          CopyprivateVars.size() == AssignmentOps.size());
1355   auto &C = CGM.getContext();
1356   // int32 did_it = 0;
1357   // if(__kmpc_single(ident_t *, gtid)) {
1358   //   SingleOpGen();
1359   //   __kmpc_end_single(ident_t *, gtid);
1360   //   did_it = 1;
1361   // }
1362   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1363   // <copy_func>, did_it);
1364
1365   llvm::AllocaInst *DidIt = nullptr;
1366   if (!CopyprivateVars.empty()) {
1367     // int32 did_it = 0;
1368     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1369     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1370     CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
1371                                    DidIt->getAlignment());
1372   }
1373   // Prepare arguments and build a call to __kmpc_single
1374   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1375   auto *IsSingle =
1376       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1377   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1378       SingleCallEndCleanup;
1379   emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
1380     CodeGenFunction::RunCleanupsScope Scope(CGF);
1381     CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1382         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1383         llvm::makeArrayRef(Args));
1384     SingleOpGen(CGF);
1385     if (DidIt) {
1386       // did_it = 1;
1387       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1388                                      DidIt->getAlignment());
1389     }
1390   });
1391   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1392   // <copy_func>, did_it);
1393   if (DidIt) {
1394     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1395     auto CopyprivateArrayTy =
1396         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1397                                /*IndexTypeQuals=*/0);
1398     // Create a list of all private variables for copyprivate.
1399     auto *CopyprivateList =
1400         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1401     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1402       auto *Elem = CGF.Builder.CreateStructGEP(
1403           CopyprivateList->getAllocatedType(), CopyprivateList, I);
1404       CGF.Builder.CreateAlignedStore(
1405           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1406               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1407           Elem, CGM.PointerAlignInBytes);
1408     }
1409     // Build function that copies private values from single region to all other
1410     // threads in the corresponding parallel region.
1411     auto *CpyFn = emitCopyprivateCopyFunction(
1412         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1413         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1414     auto *BufSize = llvm::ConstantInt::get(
1415         CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1416     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1417                                                                CGF.VoidPtrTy);
1418     auto *DidItVal =
1419         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1420     llvm::Value *Args[] = {
1421         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1422         getThreadID(CGF, Loc),        // i32 <gtid>
1423         BufSize,                      // size_t <buf_size>
1424         CL,                           // void *<copyprivate list>
1425         CpyFn,                        // void (*) (void *, void *) <copy_func>
1426         DidItVal                      // i32 did_it
1427     };
1428     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1429   }
1430 }
1431
1432 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1433                                         const RegionCodeGenTy &OrderedOpGen,
1434                                         SourceLocation Loc) {
1435   // __kmpc_ordered(ident_t *, gtid);
1436   // OrderedOpGen();
1437   // __kmpc_end_ordered(ident_t *, gtid);
1438   // Prepare arguments and build a call to __kmpc_ordered
1439   {
1440     CodeGenFunction::RunCleanupsScope Scope(CGF);
1441     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1442     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1443     // Build a call to __kmpc_end_ordered
1444     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1445         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1446         llvm::makeArrayRef(Args));
1447     emitInlinedDirective(CGF, OrderedOpGen);
1448   }
1449 }
1450
1451 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1452                                       OpenMPDirectiveKind Kind) {
1453   // Build call __kmpc_cancel_barrier(loc, thread_id);
1454   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1455   if (Kind == OMPD_for) {
1456     Flags =
1457         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1458   } else if (Kind == OMPD_sections) {
1459     Flags = static_cast<OpenMPLocationFlags>(Flags |
1460                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1461   } else if (Kind == OMPD_single) {
1462     Flags =
1463         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1464   } else if (Kind == OMPD_barrier) {
1465     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1466   } else {
1467     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1468   }
1469   // Build call __kmpc_cancel_barrier(loc, thread_id);
1470   // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this
1471   // one provides the same functionality and adds initial support for
1472   // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier()
1473   // is provided default by the runtime library so it safe to make such
1474   // replacement.
1475   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1476                          getThreadID(CGF, Loc)};
1477   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1478 }
1479
1480 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1481 /// the enum sched_type in kmp.h).
1482 enum OpenMPSchedType {
1483   /// \brief Lower bound for default (unordered) versions.
1484   OMP_sch_lower = 32,
1485   OMP_sch_static_chunked = 33,
1486   OMP_sch_static = 34,
1487   OMP_sch_dynamic_chunked = 35,
1488   OMP_sch_guided_chunked = 36,
1489   OMP_sch_runtime = 37,
1490   OMP_sch_auto = 38,
1491   /// \brief Lower bound for 'ordered' versions.
1492   OMP_ord_lower = 64,
1493   OMP_ord_static_chunked = 65,
1494   OMP_ord_static = 66,
1495   OMP_ord_dynamic_chunked = 67,
1496   OMP_ord_guided_chunked = 68,
1497   OMP_ord_runtime = 69,
1498   OMP_ord_auto = 70,
1499   OMP_sch_default = OMP_sch_static,
1500 };
1501
1502 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1503 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1504                                           bool Chunked, bool Ordered) {
1505   switch (ScheduleKind) {
1506   case OMPC_SCHEDULE_static:
1507     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1508                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1509   case OMPC_SCHEDULE_dynamic:
1510     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1511   case OMPC_SCHEDULE_guided:
1512     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1513   case OMPC_SCHEDULE_runtime:
1514     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1515   case OMPC_SCHEDULE_auto:
1516     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1517   case OMPC_SCHEDULE_unknown:
1518     assert(!Chunked && "chunk was specified but schedule kind not known");
1519     return Ordered ? OMP_ord_static : OMP_sch_static;
1520   }
1521   llvm_unreachable("Unexpected runtime schedule");
1522 }
1523
1524 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1525                                          bool Chunked) const {
1526   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1527   return Schedule == OMP_sch_static;
1528 }
1529
1530 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1531   auto Schedule =
1532       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1533   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1534   return Schedule != OMP_sch_static;
1535 }
1536
1537 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1538                                   OpenMPScheduleClauseKind ScheduleKind,
1539                                   unsigned IVSize, bool IVSigned, bool Ordered,
1540                                   llvm::Value *IL, llvm::Value *LB,
1541                                   llvm::Value *UB, llvm::Value *ST,
1542                                   llvm::Value *Chunk) {
1543   OpenMPSchedType Schedule =
1544       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1545   if (Ordered ||
1546       (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1547        Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
1548     // Call __kmpc_dispatch_init(
1549     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1550     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1551     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1552
1553     // If the Chunk was not specified in the clause - use default value 1.
1554     if (Chunk == nullptr)
1555       Chunk = CGF.Builder.getIntN(IVSize, 1);
1556     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1557                             getThreadID(CGF, Loc),
1558                             CGF.Builder.getInt32(Schedule), // Schedule type
1559                             CGF.Builder.getIntN(IVSize, 0), // Lower
1560                             UB,                             // Upper
1561                             CGF.Builder.getIntN(IVSize, 1), // Stride
1562                             Chunk                           // Chunk
1563     };
1564     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1565   } else {
1566     // Call __kmpc_for_static_init(
1567     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1568     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1569     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1570     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1571     if (Chunk == nullptr) {
1572       assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1573              "expected static non-chunked schedule");
1574       // If the Chunk was not specified in the clause - use default value 1.
1575       Chunk = CGF.Builder.getIntN(IVSize, 1);
1576     } else
1577       assert((Schedule == OMP_sch_static_chunked ||
1578               Schedule == OMP_ord_static_chunked) &&
1579              "expected static chunked schedule");
1580     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1581                             getThreadID(CGF, Loc),
1582                             CGF.Builder.getInt32(Schedule), // Schedule type
1583                             IL,                             // &isLastIter
1584                             LB,                             // &LB
1585                             UB,                             // &UB
1586                             ST,                             // &Stride
1587                             CGF.Builder.getIntN(IVSize, 1), // Incr
1588                             Chunk                           // Chunk
1589     };
1590     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1591   }
1592 }
1593
1594 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1595                                           SourceLocation Loc) {
1596   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1597   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1598                          getThreadID(CGF, Loc)};
1599   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1600                       Args);
1601 }
1602
1603 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1604                                                  SourceLocation Loc,
1605                                                  unsigned IVSize,
1606                                                  bool IVSigned) {
1607   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1608   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1609                          getThreadID(CGF, Loc)};
1610   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1611 }
1612
1613 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1614                                           SourceLocation Loc, unsigned IVSize,
1615                                           bool IVSigned, llvm::Value *IL,
1616                                           llvm::Value *LB, llvm::Value *UB,
1617                                           llvm::Value *ST) {
1618   // Call __kmpc_dispatch_next(
1619   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1620   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1621   //          kmp_int[32|64] *p_stride);
1622   llvm::Value *Args[] = {
1623       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1624       IL, // &isLastIter
1625       LB, // &Lower
1626       UB, // &Upper
1627       ST  // &Stride
1628   };
1629   llvm::Value *Call =
1630       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1631   return CGF.EmitScalarConversion(
1632       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1633       CGF.getContext().BoolTy);
1634 }
1635
1636 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1637                                            llvm::Value *NumThreads,
1638                                            SourceLocation Loc) {
1639   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1640   llvm::Value *Args[] = {
1641       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1642       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1643   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1644                       Args);
1645 }
1646
1647 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1648                                          OpenMPProcBindClauseKind ProcBind,
1649                                          SourceLocation Loc) {
1650   // Constants for proc bind value accepted by the runtime.
1651   enum ProcBindTy {
1652     ProcBindFalse = 0,
1653     ProcBindTrue,
1654     ProcBindMaster,
1655     ProcBindClose,
1656     ProcBindSpread,
1657     ProcBindIntel,
1658     ProcBindDefault
1659   } RuntimeProcBind;
1660   switch (ProcBind) {
1661   case OMPC_PROC_BIND_master:
1662     RuntimeProcBind = ProcBindMaster;
1663     break;
1664   case OMPC_PROC_BIND_close:
1665     RuntimeProcBind = ProcBindClose;
1666     break;
1667   case OMPC_PROC_BIND_spread:
1668     RuntimeProcBind = ProcBindSpread;
1669     break;
1670   case OMPC_PROC_BIND_unknown:
1671     llvm_unreachable("Unsupported proc_bind value.");
1672   }
1673   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1674   llvm::Value *Args[] = {
1675       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1676       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1677   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1678 }
1679
1680 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1681                                 SourceLocation Loc) {
1682   // Build call void __kmpc_flush(ident_t *loc)
1683   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1684                       emitUpdateLocation(CGF, Loc));
1685 }
1686
1687 namespace {
1688 /// \brief Indexes of fields for type kmp_task_t.
1689 enum KmpTaskTFields {
1690   /// \brief List of shared variables.
1691   KmpTaskTShareds,
1692   /// \brief Task routine.
1693   KmpTaskTRoutine,
1694   /// \brief Partition id for the untied tasks.
1695   KmpTaskTPartId,
1696   /// \brief Function with call of destructors for private variables.
1697   KmpTaskTDestructors,
1698 };
1699 } // namespace
1700
1701 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1702   if (!KmpRoutineEntryPtrTy) {
1703     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1704     auto &C = CGM.getContext();
1705     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1706     FunctionProtoType::ExtProtoInfo EPI;
1707     KmpRoutineEntryPtrQTy = C.getPointerType(
1708         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1709     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1710   }
1711 }
1712
1713 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1714                                  QualType FieldTy) {
1715   auto *Field = FieldDecl::Create(
1716       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1717       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1718       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1719   Field->setAccess(AS_public);
1720   DC->addDecl(Field);
1721 }
1722
1723 namespace {
1724 struct PrivateHelpersTy {
1725   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1726                    const VarDecl *PrivateElemInit)
1727       : Original(Original), PrivateCopy(PrivateCopy),
1728         PrivateElemInit(PrivateElemInit) {}
1729   const VarDecl *Original;
1730   const VarDecl *PrivateCopy;
1731   const VarDecl *PrivateElemInit;
1732 };
1733 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1734 } // namespace
1735
1736 static RecordDecl *
1737 createPrivatesRecordDecl(CodeGenModule &CGM,
1738                          const ArrayRef<PrivateDataTy> Privates) {
1739   if (!Privates.empty()) {
1740     auto &C = CGM.getContext();
1741     // Build struct .kmp_privates_t. {
1742     //         /*  private vars  */
1743     //       };
1744     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1745     RD->startDefinition();
1746     for (auto &&Pair : Privates) {
1747       auto Type = Pair.second.Original->getType();
1748       Type = Type.getNonReferenceType();
1749       addFieldToRecordDecl(C, RD, Type);
1750     }
1751     RD->completeDefinition();
1752     return RD;
1753   }
1754   return nullptr;
1755 }
1756
1757 static RecordDecl *
1758 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1759                          QualType KmpRoutineEntryPointerQTy) {
1760   auto &C = CGM.getContext();
1761   // Build struct kmp_task_t {
1762   //         void *              shareds;
1763   //         kmp_routine_entry_t routine;
1764   //         kmp_int32           part_id;
1765   //         kmp_routine_entry_t destructors;
1766   //       };
1767   auto *RD = C.buildImplicitRecord("kmp_task_t");
1768   RD->startDefinition();
1769   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1770   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1771   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1772   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1773   RD->completeDefinition();
1774   return RD;
1775 }
1776
1777 static RecordDecl *
1778 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
1779                                      const ArrayRef<PrivateDataTy> Privates) {
1780   auto &C = CGM.getContext();
1781   // Build struct kmp_task_t_with_privates {
1782   //         kmp_task_t task_data;
1783   //         .kmp_privates_t. privates;
1784   //       };
1785   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
1786   RD->startDefinition();
1787   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
1788   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1789     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1790   }
1791   RD->completeDefinition();
1792   return RD;
1793 }
1794
1795 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1796 /// argument.
1797 /// \code
1798 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1799 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
1800 ///   tt->shareds);
1801 ///   return 0;
1802 /// }
1803 /// \endcode
1804 static llvm::Value *
1805 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1806                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
1807                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
1808                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
1809                       llvm::Value *TaskPrivatesMap) {
1810   auto &C = CGM.getContext();
1811   FunctionArgList Args;
1812   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1813   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1814                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1815   Args.push_back(&GtidArg);
1816   Args.push_back(&TaskTypeArg);
1817   FunctionType::ExtInfo Info;
1818   auto &TaskEntryFnInfo =
1819       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1820                                                     /*isVariadic=*/false);
1821   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1822   auto *TaskEntry =
1823       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1824                              ".omp_task_entry.", &CGM.getModule());
1825   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1826   CodeGenFunction CGF(CGM);
1827   CGF.disableDebugInfo();
1828   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1829
1830   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
1831   // tt->task_data.shareds);
1832   auto *GtidParam = CGF.EmitLoadOfScalar(
1833       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1834       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1835   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1836       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1837   LValue TDBase =
1838       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1839   auto *KmpTaskTWithPrivatesQTyRD =
1840       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1841   LValue Base =
1842       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
1843   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
1844   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
1845   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
1846   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
1847
1848   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
1849   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
1850   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1851       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
1852       CGF.ConvertTypeForMem(SharedsPtrTy));
1853
1854   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
1855   llvm::Value *PrivatesParam;
1856   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
1857     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
1858     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1859         PrivatesLVal.getAddress(), CGF.VoidPtrTy);
1860   } else {
1861     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
1862   }
1863
1864   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
1865                              TaskPrivatesMap, SharedsParam};
1866   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1867   CGF.EmitStoreThroughLValue(
1868       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1869       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1870   CGF.FinishFunction();
1871   return TaskEntry;
1872 }
1873
1874 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
1875                                             SourceLocation Loc,
1876                                             QualType KmpInt32Ty,
1877                                             QualType KmpTaskTWithPrivatesPtrQTy,
1878                                             QualType KmpTaskTWithPrivatesQTy) {
1879   auto &C = CGM.getContext();
1880   FunctionArgList Args;
1881   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1882   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1883                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1884   Args.push_back(&GtidArg);
1885   Args.push_back(&TaskTypeArg);
1886   FunctionType::ExtInfo Info;
1887   auto &DestructorFnInfo =
1888       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1889                                                     /*isVariadic=*/false);
1890   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
1891   auto *DestructorFn =
1892       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
1893                              ".omp_task_destructor.", &CGM.getModule());
1894   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
1895   CodeGenFunction CGF(CGM);
1896   CGF.disableDebugInfo();
1897   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
1898                     Args);
1899
1900   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1901       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1902   LValue Base =
1903       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1904   auto *KmpTaskTWithPrivatesQTyRD =
1905       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1906   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
1907   Base = CGF.EmitLValueForField(Base, *FI);
1908   for (auto *Field :
1909        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
1910     if (auto DtorKind = Field->getType().isDestructedType()) {
1911       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
1912       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
1913     }
1914   }
1915   CGF.FinishFunction();
1916   return DestructorFn;
1917 }
1918
1919 /// \brief Emit a privates mapping function for correct handling of private and
1920 /// firstprivate variables.
1921 /// \code
1922 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
1923 /// **noalias priv1,...,  <tyn> **noalias privn) {
1924 ///   *priv1 = &.privates.priv1;
1925 ///   ...;
1926 ///   *privn = &.privates.privn;
1927 /// }
1928 /// \endcode
1929 static llvm::Value *
1930 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
1931                                const ArrayRef<const Expr *> PrivateVars,
1932                                const ArrayRef<const Expr *> FirstprivateVars,
1933                                QualType PrivatesQTy,
1934                                const ArrayRef<PrivateDataTy> Privates) {
1935   auto &C = CGM.getContext();
1936   FunctionArgList Args;
1937   ImplicitParamDecl TaskPrivatesArg(
1938       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
1939       C.getPointerType(PrivatesQTy).withConst().withRestrict());
1940   Args.push_back(&TaskPrivatesArg);
1941   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
1942   unsigned Counter = 1;
1943   for (auto *E: PrivateVars) {
1944     Args.push_back(ImplicitParamDecl::Create(
1945         C, /*DC=*/nullptr, Loc,
1946         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
1947                             .withConst()
1948                             .withRestrict()));
1949     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1950     PrivateVarsPos[VD] = Counter;
1951     ++Counter;
1952   }
1953   for (auto *E : FirstprivateVars) {
1954     Args.push_back(ImplicitParamDecl::Create(
1955         C, /*DC=*/nullptr, Loc,
1956         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
1957                             .withConst()
1958                             .withRestrict()));
1959     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1960     PrivateVarsPos[VD] = Counter;
1961     ++Counter;
1962   }
1963   FunctionType::ExtInfo Info;
1964   auto &TaskPrivatesMapFnInfo =
1965       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
1966                                                     /*isVariadic=*/false);
1967   auto *TaskPrivatesMapTy =
1968       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
1969   auto *TaskPrivatesMap = llvm::Function::Create(
1970       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
1971       ".omp_task_privates_map.", &CGM.getModule());
1972   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
1973                                 TaskPrivatesMap);
1974   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
1975   CodeGenFunction CGF(CGM);
1976   CGF.disableDebugInfo();
1977   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
1978                     TaskPrivatesMapFnInfo, Args);
1979
1980   // *privi = &.privates.privi;
1981   auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
1982       CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
1983   LValue Base =
1984       CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
1985   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
1986   Counter = 0;
1987   for (auto *Field : PrivatesQTyRD->fields()) {
1988     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
1989     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
1990     auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
1991                                                   VD->getType());
1992     auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
1993     CGF.EmitStoreOfScalar(
1994         FieldLVal.getAddress(),
1995         CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
1996                                        RefLVal.getType()->getPointeeType()));
1997     ++Counter;
1998   }
1999   CGF.FinishFunction();
2000   return TaskPrivatesMap;
2001 }
2002
2003 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2004                                      const PrivateDataTy *P2) {
2005   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2006 }
2007
2008 void CGOpenMPRuntime::emitTaskCall(
2009     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2010     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2011     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
2012     const Expr *IfCond, const ArrayRef<const Expr *> PrivateVars,
2013     const ArrayRef<const Expr *> PrivateCopies,
2014     const ArrayRef<const Expr *> FirstprivateVars,
2015     const ArrayRef<const Expr *> FirstprivateCopies,
2016     const ArrayRef<const Expr *> FirstprivateInits) {
2017   auto &C = CGM.getContext();
2018   llvm::SmallVector<PrivateDataTy, 8> Privates;
2019   // Aggregate privates and sort them by the alignment.
2020   auto I = PrivateCopies.begin();
2021   for (auto *E : PrivateVars) {
2022     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2023     Privates.push_back(std::make_pair(
2024         C.getTypeAlignInChars(VD->getType()),
2025         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2026                          /*PrivateElemInit=*/nullptr)));
2027     ++I;
2028   }
2029   I = FirstprivateCopies.begin();
2030   auto IElemInitRef = FirstprivateInits.begin();
2031   for (auto *E : FirstprivateVars) {
2032     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2033     Privates.push_back(std::make_pair(
2034         C.getTypeAlignInChars(VD->getType()),
2035         PrivateHelpersTy(
2036             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2037             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2038     ++I, ++IElemInitRef;
2039   }
2040   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2041                        array_pod_sort_comparator);
2042   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2043   // Build type kmp_routine_entry_t (if not built yet).
2044   emitKmpRoutineEntryT(KmpInt32Ty);
2045   // Build type kmp_task_t (if not built yet).
2046   if (KmpTaskTQTy.isNull()) {
2047     KmpTaskTQTy = C.getRecordType(
2048         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2049   }
2050   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2051   // Build particular struct kmp_task_t for the given task.
2052   auto *KmpTaskTWithPrivatesQTyRD =
2053       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2054   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2055   QualType KmpTaskTWithPrivatesPtrQTy =
2056       C.getPointerType(KmpTaskTWithPrivatesQTy);
2057   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2058   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2059   auto KmpTaskTWithPrivatesTySize =
2060       CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
2061   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2062
2063   // Emit initial values for private copies (if any).
2064   llvm::Value *TaskPrivatesMap = nullptr;
2065   auto *TaskPrivatesMapTy =
2066       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2067                 3)
2068           ->getType();
2069   if (!Privates.empty()) {
2070     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2071     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2072         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2073     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2074         TaskPrivatesMap, TaskPrivatesMapTy);
2075   } else {
2076     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2077         cast<llvm::PointerType>(TaskPrivatesMapTy));
2078   }
2079   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2080   // kmp_task_t *tt);
2081   auto *TaskEntry = emitProxyTaskFunction(
2082       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2083       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2084
2085   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2086   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2087   // kmp_routine_entry_t *task_entry);
2088   // Task flags. Format is taken from
2089   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2090   // description of kmp_tasking_flags struct.
2091   const unsigned TiedFlag = 0x1;
2092   const unsigned FinalFlag = 0x2;
2093   unsigned Flags = Tied ? TiedFlag : 0;
2094   auto *TaskFlags =
2095       Final.getPointer()
2096           ? CGF.Builder.CreateSelect(Final.getPointer(),
2097                                      CGF.Builder.getInt32(FinalFlag),
2098                                      CGF.Builder.getInt32(/*C=*/0))
2099           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2100   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2101   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
2102   llvm::Value *AllocArgs[] = {
2103       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
2104       KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
2105       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
2106                                                       KmpRoutineEntryPtrTy)};
2107   auto *NewTask = CGF.EmitRuntimeCall(
2108       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2109   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2110       NewTask, KmpTaskTWithPrivatesPtrTy);
2111   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2112                                                KmpTaskTWithPrivatesQTy);
2113   LValue TDBase =
2114       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2115   // Fill the data in the resulting kmp_task_t record.
2116   // Copy shareds if there are any.
2117   llvm::Value *KmpTaskSharedsPtr = nullptr;
2118   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2119     KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
2120         CGF.EmitLValueForField(
2121             TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
2122         Loc);
2123     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2124   }
2125   // Emit initial values for private copies (if any).
2126   bool NeedsCleanup = false;
2127   if (!Privates.empty()) {
2128     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2129     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2130     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2131     LValue SharedsBase;
2132     if (!FirstprivateVars.empty()) {
2133       SharedsBase = CGF.MakeNaturalAlignAddrLValue(
2134           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2135               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2136           SharedsTy);
2137     }
2138     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2139         cast<CapturedStmt>(*D.getAssociatedStmt()));
2140     for (auto &&Pair : Privates) {
2141       auto *VD = Pair.second.PrivateCopy;
2142       auto *Init = VD->getAnyInitializer();
2143       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2144       if (Init) {
2145         if (auto *Elem = Pair.second.PrivateElemInit) {
2146           auto *OriginalVD = Pair.second.Original;
2147           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2148           auto SharedRefLValue =
2149               CGF.EmitLValueForField(SharedsBase, SharedField);
2150           QualType Type = OriginalVD->getType();
2151           if (Type->isArrayType()) {
2152             // Initialize firstprivate array.
2153             if (!isa<CXXConstructExpr>(Init) ||
2154                 CGF.isTrivialInitializer(Init)) {
2155               // Perform simple memcpy.
2156               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2157                                       SharedRefLValue.getAddress(), Type);
2158             } else {
2159               // Initialize firstprivate array using element-by-element
2160               // intialization.
2161               CGF.EmitOMPAggregateAssign(
2162                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2163                   Type, [&CGF, Elem, Init, &CapturesInfo](
2164                             llvm::Value *DestElement, llvm::Value *SrcElement) {
2165                     // Clean up any temporaries needed by the initialization.
2166                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2167                     InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
2168                       return SrcElement;
2169                     });
2170                     (void)InitScope.Privatize();
2171                     // Emit initialization for single element.
2172                     auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
2173                     CGF.CapturedStmtInfo = &CapturesInfo;
2174                     CGF.EmitAnyExprToMem(Init, DestElement,
2175                                          Init->getType().getQualifiers(),
2176                                          /*IsInitializer=*/false);
2177                     CGF.CapturedStmtInfo = OldCapturedStmtInfo;
2178                   });
2179             }
2180           } else {
2181             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2182             InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
2183               return SharedRefLValue.getAddress();
2184             });
2185             (void)InitScope.Privatize();
2186             auto *OldCapturedStmtInfo = CGF.CapturedStmtInfo;
2187             CGF.CapturedStmtInfo = &CapturesInfo;
2188             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2189                                /*capturedByInit=*/false);
2190             CGF.CapturedStmtInfo = OldCapturedStmtInfo;
2191           }
2192         } else {
2193           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2194         }
2195       }
2196       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2197       ++FI;
2198     }
2199   }
2200   // Provide pointer to function with destructors for privates.
2201   llvm::Value *DestructorFn =
2202       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2203                                              KmpTaskTWithPrivatesPtrQTy,
2204                                              KmpTaskTWithPrivatesQTy)
2205                    : llvm::ConstantPointerNull::get(
2206                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2207   LValue Destructor = CGF.EmitLValueForField(
2208       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2209   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2210                             DestructorFn, KmpRoutineEntryPtrTy),
2211                         Destructor);
2212   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2213   // libcall.
2214   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2215   // *new_task);
2216   auto *ThreadID = getThreadID(CGF, Loc);
2217   llvm::Value *TaskArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID, NewTask};
2218   auto &&ThenCodeGen = [this, &TaskArgs](CodeGenFunction &CGF) {
2219     // TODO: add check for untied tasks.
2220     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
2221   };
2222   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2223       IfCallEndCleanup;
2224   auto &&ElseCodeGen =
2225       [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry](
2226           CodeGenFunction &CGF) {
2227         CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2228         CGF.EmitRuntimeCall(
2229             createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs);
2230         // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2231         // kmp_task_t *new_task);
2232         CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2233             NormalAndEHCleanup,
2234             createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2235             llvm::makeArrayRef(TaskArgs));
2236
2237         // Call proxy_task_entry(gtid, new_task);
2238         llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2239         CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2240       };
2241   if (IfCond) {
2242     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2243   } else {
2244     CodeGenFunction::RunCleanupsScope Scope(CGF);
2245     ThenCodeGen(CGF);
2246   }
2247 }
2248
2249 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2250                                           llvm::Type *ArgsType,
2251                                           ArrayRef<const Expr *> LHSExprs,
2252                                           ArrayRef<const Expr *> RHSExprs,
2253                                           ArrayRef<const Expr *> ReductionOps) {
2254   auto &C = CGM.getContext();
2255
2256   // void reduction_func(void *LHSArg, void *RHSArg);
2257   FunctionArgList Args;
2258   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2259                            C.VoidPtrTy);
2260   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2261                            C.VoidPtrTy);
2262   Args.push_back(&LHSArg);
2263   Args.push_back(&RHSArg);
2264   FunctionType::ExtInfo EI;
2265   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2266       C.VoidTy, Args, EI, /*isVariadic=*/false);
2267   auto *Fn = llvm::Function::Create(
2268       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2269       ".omp.reduction.reduction_func", &CGM.getModule());
2270   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2271   CodeGenFunction CGF(CGM);
2272   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2273
2274   // Dst = (void*[n])(LHSArg);
2275   // Src = (void*[n])(RHSArg);
2276   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2277       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
2278                                     CGF.PointerAlignInBytes),
2279       ArgsType);
2280   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2281       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
2282                                     CGF.PointerAlignInBytes),
2283       ArgsType);
2284
2285   //  ...
2286   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2287   //  ...
2288   CodeGenFunction::OMPPrivateScope Scope(CGF);
2289   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
2290     Scope.addPrivate(
2291         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
2292         [&]() -> llvm::Value *{
2293           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2294               CGF.Builder.CreateAlignedLoad(
2295                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
2296                   CGM.PointerAlignInBytes),
2297               CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
2298         });
2299     Scope.addPrivate(
2300         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
2301         [&]() -> llvm::Value *{
2302           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2303               CGF.Builder.CreateAlignedLoad(
2304                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
2305                   CGM.PointerAlignInBytes),
2306               CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
2307         });
2308   }
2309   Scope.Privatize();
2310   for (auto *E : ReductionOps) {
2311     CGF.EmitIgnoredExpr(E);
2312   }
2313   Scope.ForceCleanup();
2314   CGF.FinishFunction();
2315   return Fn;
2316 }
2317
2318 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2319                                     ArrayRef<const Expr *> LHSExprs,
2320                                     ArrayRef<const Expr *> RHSExprs,
2321                                     ArrayRef<const Expr *> ReductionOps,
2322                                     bool WithNowait, bool SimpleReduction) {
2323   // Next code should be emitted for reduction:
2324   //
2325   // static kmp_critical_name lock = { 0 };
2326   //
2327   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2328   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2329   //  ...
2330   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2331   //  *(Type<n>-1*)rhs[<n>-1]);
2332   // }
2333   //
2334   // ...
2335   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2336   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2337   // RedList, reduce_func, &<lock>)) {
2338   // case 1:
2339   //  ...
2340   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2341   //  ...
2342   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2343   // break;
2344   // case 2:
2345   //  ...
2346   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2347   //  ...
2348   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2349   // break;
2350   // default:;
2351   // }
2352   //
2353   // if SimpleReduction is true, only the next code is generated:
2354   //  ...
2355   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2356   //  ...
2357
2358   auto &C = CGM.getContext();
2359
2360   if (SimpleReduction) {
2361     CodeGenFunction::RunCleanupsScope Scope(CGF);
2362     for (auto *E : ReductionOps) {
2363       CGF.EmitIgnoredExpr(E);
2364     }
2365     return;
2366   }
2367
2368   // 1. Build a list of reduction variables.
2369   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2370   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2371   QualType ReductionArrayTy =
2372       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2373                              /*IndexTypeQuals=*/0);
2374   auto *ReductionList =
2375       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2376   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2377     auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
2378     CGF.Builder.CreateAlignedStore(
2379         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2380             CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
2381         Elem, CGM.PointerAlignInBytes);
2382   }
2383
2384   // 2. Emit reduce_func().
2385   auto *ReductionFn = emitReductionFunction(
2386       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2387       RHSExprs, ReductionOps);
2388
2389   // 3. Create static kmp_critical_name lock = { 0 };
2390   auto *Lock = getCriticalRegionLock(".reduction");
2391
2392   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2393   // RedList, reduce_func, &<lock>);
2394   auto *IdentTLoc = emitUpdateLocation(
2395       CGF, Loc,
2396       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2397   auto *ThreadId = getThreadID(CGF, Loc);
2398   auto *ReductionArrayTySize = llvm::ConstantInt::get(
2399       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2400   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
2401                                                              CGF.VoidPtrTy);
2402   llvm::Value *Args[] = {
2403       IdentTLoc,                             // ident_t *<loc>
2404       ThreadId,                              // i32 <gtid>
2405       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2406       ReductionArrayTySize,                  // size_type sizeof(RedList)
2407       RL,                                    // void *RedList
2408       ReductionFn, // void (*) (void *, void *) <reduce_func>
2409       Lock         // kmp_critical_name *&<lock>
2410   };
2411   auto Res = CGF.EmitRuntimeCall(
2412       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2413                                        : OMPRTL__kmpc_reduce),
2414       Args);
2415
2416   // 5. Build switch(res)
2417   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2418   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2419
2420   // 6. Build case 1:
2421   //  ...
2422   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2423   //  ...
2424   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2425   // break;
2426   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2427   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2428   CGF.EmitBlock(Case1BB);
2429
2430   {
2431     CodeGenFunction::RunCleanupsScope Scope(CGF);
2432     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2433     llvm::Value *EndArgs[] = {
2434         IdentTLoc, // ident_t *<loc>
2435         ThreadId,  // i32 <gtid>
2436         Lock       // kmp_critical_name *&<lock>
2437     };
2438     CGF.EHStack
2439         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2440             NormalAndEHCleanup,
2441             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2442                                              : OMPRTL__kmpc_end_reduce),
2443             llvm::makeArrayRef(EndArgs));
2444     for (auto *E : ReductionOps) {
2445       CGF.EmitIgnoredExpr(E);
2446     }
2447   }
2448
2449   CGF.EmitBranch(DefaultBB);
2450
2451   // 7. Build case 2:
2452   //  ...
2453   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2454   //  ...
2455   // break;
2456   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2457   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2458   CGF.EmitBlock(Case2BB);
2459
2460   {
2461     CodeGenFunction::RunCleanupsScope Scope(CGF);
2462     if (!WithNowait) {
2463       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2464       llvm::Value *EndArgs[] = {
2465           IdentTLoc, // ident_t *<loc>
2466           ThreadId,  // i32 <gtid>
2467           Lock       // kmp_critical_name *&<lock>
2468       };
2469       CGF.EHStack
2470           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2471               NormalAndEHCleanup,
2472               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2473               llvm::makeArrayRef(EndArgs));
2474     }
2475     auto I = LHSExprs.begin();
2476     for (auto *E : ReductionOps) {
2477       const Expr *XExpr = nullptr;
2478       const Expr *EExpr = nullptr;
2479       const Expr *UpExpr = nullptr;
2480       BinaryOperatorKind BO = BO_Comma;
2481       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2482         if (BO->getOpcode() == BO_Assign) {
2483           XExpr = BO->getLHS();
2484           UpExpr = BO->getRHS();
2485         }
2486       }
2487       // Try to emit update expression as a simple atomic.
2488       auto *RHSExpr = UpExpr;
2489       if (RHSExpr) {
2490         // Analyze RHS part of the whole expression.
2491         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2492                 RHSExpr->IgnoreParenImpCasts())) {
2493           // If this is a conditional operator, analyze its condition for
2494           // min/max reduction operator.
2495           RHSExpr = ACO->getCond();
2496         }
2497         if (auto *BORHS =
2498                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2499           EExpr = BORHS->getRHS();
2500           BO = BORHS->getOpcode();
2501         }
2502       }
2503       if (XExpr) {
2504         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2505         LValue X = CGF.EmitLValue(XExpr);
2506         RValue E;
2507         if (EExpr)
2508           E = CGF.EmitAnyExpr(EExpr);
2509         CGF.EmitOMPAtomicSimpleUpdateExpr(
2510             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2511             [&CGF, UpExpr, VD](RValue XRValue) {
2512               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2513               PrivateScope.addPrivate(
2514                   VD, [&CGF, VD, XRValue]() -> llvm::Value *{
2515                     auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
2516                     CGF.EmitStoreThroughLValue(
2517                         XRValue,
2518                         CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
2519                     return LHSTemp;
2520                   });
2521               (void)PrivateScope.Privatize();
2522               return CGF.EmitAnyExpr(UpExpr);
2523             });
2524       } else {
2525         // Emit as a critical region.
2526         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2527           CGF.EmitIgnoredExpr(E);
2528         }, Loc);
2529       }
2530       ++I;
2531     }
2532   }
2533
2534   CGF.EmitBranch(DefaultBB);
2535   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2536 }
2537
2538 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2539                                        SourceLocation Loc) {
2540   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2541   // global_tid);
2542   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2543   // Ignore return result until untied tasks are supported.
2544   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2545 }
2546
2547 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2548                                            const RegionCodeGenTy &CodeGen) {
2549   InlinedOpenMPRegionRAII Region(CGF, CodeGen);
2550   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2551 }
2552