]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Update llvm/clang to r242221.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGOpenMPRuntime.cpp
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26
27 using namespace clang;
28 using namespace CodeGen;
29
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44   };
45
46   CGOpenMPRegionInfo(const CapturedStmt &CS,
47                      const CGOpenMPRegionKind RegionKind,
48                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
49       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
50         CodeGen(CodeGen), Kind(Kind) {}
51
52   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
54       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
55         Kind(Kind) {}
56
57   /// \brief Get a variable or parameter for storing global thread id
58   /// inside OpenMP construct.
59   virtual const VarDecl *getThreadIDVariable() const = 0;
60
61   /// \brief Emit the captured statement body.
62   virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
63
64   /// \brief Get an LValue for the current ThreadID variable.
65   /// \return LValue for thread id variable. This LValue always has type int32*.
66   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
67
68   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
69
70   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
71
72   static bool classof(const CGCapturedStmtInfo *Info) {
73     return Info->getKind() == CR_OpenMP;
74   }
75
76 protected:
77   CGOpenMPRegionKind RegionKind;
78   const RegionCodeGenTy &CodeGen;
79   OpenMPDirectiveKind Kind;
80 };
81
82 /// \brief API for captured statement code generation in OpenMP constructs.
83 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
84 public:
85   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
86                              const RegionCodeGenTy &CodeGen,
87                              OpenMPDirectiveKind Kind)
88       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind),
89         ThreadIDVar(ThreadIDVar) {
90     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
91   }
92   /// \brief Get a variable or parameter for storing global thread id
93   /// inside OpenMP construct.
94   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
95
96   /// \brief Get the name of the capture helper.
97   StringRef getHelperName() const override { return ".omp_outlined."; }
98
99   static bool classof(const CGCapturedStmtInfo *Info) {
100     return CGOpenMPRegionInfo::classof(Info) &&
101            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
102                ParallelOutlinedRegion;
103   }
104
105 private:
106   /// \brief A variable or parameter storing global thread id for OpenMP
107   /// constructs.
108   const VarDecl *ThreadIDVar;
109 };
110
111 /// \brief API for captured statement code generation in OpenMP constructs.
112 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
113 public:
114   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
115                                  const VarDecl *ThreadIDVar,
116                                  const RegionCodeGenTy &CodeGen,
117                                  OpenMPDirectiveKind Kind)
118       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind),
119         ThreadIDVar(ThreadIDVar) {
120     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
121   }
122   /// \brief Get a variable or parameter for storing global thread id
123   /// inside OpenMP construct.
124   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
125
126   /// \brief Get an LValue for the current ThreadID variable.
127   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
128
129   /// \brief Get the name of the capture helper.
130   StringRef getHelperName() const override { return ".omp_outlined."; }
131
132   static bool classof(const CGCapturedStmtInfo *Info) {
133     return CGOpenMPRegionInfo::classof(Info) &&
134            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
135                TaskOutlinedRegion;
136   }
137
138 private:
139   /// \brief A variable or parameter storing global thread id for OpenMP
140   /// constructs.
141   const VarDecl *ThreadIDVar;
142 };
143
144 /// \brief API for inlined captured statement code generation in OpenMP
145 /// constructs.
146 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
147 public:
148   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
149                             const RegionCodeGenTy &CodeGen,
150                             OpenMPDirectiveKind Kind)
151       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind), OldCSI(OldCSI),
152         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
153   // \brief Retrieve the value of the context parameter.
154   llvm::Value *getContextValue() const override {
155     if (OuterRegionInfo)
156       return OuterRegionInfo->getContextValue();
157     llvm_unreachable("No context value for inlined OpenMP region");
158   }
159   virtual void setContextValue(llvm::Value *V) override {
160     if (OuterRegionInfo) {
161       OuterRegionInfo->setContextValue(V);
162       return;
163     }
164     llvm_unreachable("No context value for inlined OpenMP region");
165   }
166   /// \brief Lookup the captured field decl for a variable.
167   const FieldDecl *lookup(const VarDecl *VD) const override {
168     if (OuterRegionInfo)
169       return OuterRegionInfo->lookup(VD);
170     // If there is no outer outlined region,no need to lookup in a list of
171     // captured variables, we can use the original one.
172     return nullptr;
173   }
174   FieldDecl *getThisFieldDecl() const override {
175     if (OuterRegionInfo)
176       return OuterRegionInfo->getThisFieldDecl();
177     return nullptr;
178   }
179   /// \brief Get a variable or parameter for storing global thread id
180   /// inside OpenMP construct.
181   const VarDecl *getThreadIDVariable() const override {
182     if (OuterRegionInfo)
183       return OuterRegionInfo->getThreadIDVariable();
184     return nullptr;
185   }
186
187   /// \brief Get the name of the capture helper.
188   StringRef getHelperName() const override {
189     if (auto *OuterRegionInfo = getOldCSI())
190       return OuterRegionInfo->getHelperName();
191     llvm_unreachable("No helper name for inlined OpenMP construct");
192   }
193
194   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
195
196   static bool classof(const CGCapturedStmtInfo *Info) {
197     return CGOpenMPRegionInfo::classof(Info) &&
198            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
199   }
200
201 private:
202   /// \brief CodeGen info about outer OpenMP region.
203   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
204   CGOpenMPRegionInfo *OuterRegionInfo;
205 };
206
207 /// \brief RAII for emitting code of OpenMP constructs.
208 class InlinedOpenMPRegionRAII {
209   CodeGenFunction &CGF;
210
211 public:
212   /// \brief Constructs region for combined constructs.
213   /// \param CodeGen Code generation sequence for combined directives. Includes
214   /// a list of functions used for code generation of implicitly inlined
215   /// regions.
216   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
217                           OpenMPDirectiveKind Kind)
218       : CGF(CGF) {
219     // Start emission for the construct.
220     CGF.CapturedStmtInfo =
221         new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, Kind);
222   }
223   ~InlinedOpenMPRegionRAII() {
224     // Restore original CapturedStmtInfo only if we're done with code emission.
225     auto *OldCSI =
226         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
227     delete CGF.CapturedStmtInfo;
228     CGF.CapturedStmtInfo = OldCSI;
229   }
230 };
231
232 } // namespace
233
234 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
235   return CGF.MakeNaturalAlignAddrLValue(
236       CGF.Builder.CreateAlignedLoad(
237           CGF.GetAddrOfLocalVar(getThreadIDVariable()),
238           CGF.PointerAlignInBytes),
239       getThreadIDVariable()
240           ->getType()
241           ->castAs<PointerType>()
242           ->getPointeeType());
243 }
244
245 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
246   // 1.2.2 OpenMP Language Terminology
247   // Structured block - An executable statement with a single entry at the
248   // top and a single exit at the bottom.
249   // The point of exit cannot be a branch out of the structured block.
250   // longjmp() and throw() must not violate the entry/exit criteria.
251   CGF.EHStack.pushTerminate();
252   {
253     CodeGenFunction::RunCleanupsScope Scope(CGF);
254     CodeGen(CGF);
255   }
256   CGF.EHStack.popTerminate();
257 }
258
259 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
260     CodeGenFunction &CGF) {
261   return CGF.MakeNaturalAlignAddrLValue(
262       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
263       getThreadIDVariable()->getType());
264 }
265
266 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
267     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
268   IdentTy = llvm::StructType::create(
269       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
270       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
271       CGM.Int8PtrTy /* psource */, nullptr);
272   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
273   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
274                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
275   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
276   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
277 }
278
279 void CGOpenMPRuntime::clear() {
280   InternalVars.clear();
281 }
282
283 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
284     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
285     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
286   assert(ThreadIDVar->getType()->isPointerType() &&
287          "thread id variable must be of type kmp_int32 *");
288   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
289   CodeGenFunction CGF(CGM, true);
290   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind);
291   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
292   return CGF.GenerateCapturedStmtFunction(*CS);
293 }
294
295 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
296     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
297     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
298   assert(!ThreadIDVar->getType()->isPointerType() &&
299          "thread id variable must be of type kmp_int32 for tasks");
300   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
301   CodeGenFunction CGF(CGM, true);
302   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
303                                         InnermostKind);
304   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
305   return CGF.GenerateCapturedStmtFunction(*CS);
306 }
307
308 llvm::Value *
309 CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
310   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
311   if (!Entry) {
312     if (!DefaultOpenMPPSource) {
313       // Initialize default location for psource field of ident_t structure of
314       // all ident_t objects. Format is ";file;function;line;column;;".
315       // Taken from
316       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
317       DefaultOpenMPPSource =
318           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
319       DefaultOpenMPPSource =
320           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
321     }
322     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
323         CGM.getModule(), IdentTy, /*isConstant*/ true,
324         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
325     DefaultOpenMPLocation->setUnnamedAddr(true);
326
327     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
328     llvm::Constant *Values[] = {Zero,
329                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
330                                 Zero, Zero, DefaultOpenMPPSource};
331     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
332     DefaultOpenMPLocation->setInitializer(Init);
333     OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
334     return DefaultOpenMPLocation;
335   }
336   return Entry;
337 }
338
339 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
340                                                  SourceLocation Loc,
341                                                  OpenMPLocationFlags Flags) {
342   // If no debug info is generated - return global default location.
343   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
344       Loc.isInvalid())
345     return getOrCreateDefaultLocation(Flags);
346
347   assert(CGF.CurFn && "No function in current CodeGenFunction.");
348
349   llvm::Value *LocValue = nullptr;
350   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
351   if (I != OpenMPLocThreadIDMap.end())
352     LocValue = I->second.DebugLoc;
353   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
354   // GetOpenMPThreadID was called before this routine.
355   if (LocValue == nullptr) {
356     // Generate "ident_t .kmpc_loc.addr;"
357     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
358     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
359     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
360     Elem.second.DebugLoc = AI;
361     LocValue = AI;
362
363     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
364     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
365     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
366                              llvm::ConstantExpr::getSizeOf(IdentTy),
367                              CGM.PointerAlignInBytes);
368   }
369
370   // char **psource = &.kmpc_loc_<flags>.addr.psource;
371   auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
372                                                          IdentField_PSource);
373
374   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
375   if (OMPDebugLoc == nullptr) {
376     SmallString<128> Buffer2;
377     llvm::raw_svector_ostream OS2(Buffer2);
378     // Build debug location
379     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
380     OS2 << ";" << PLoc.getFilename() << ";";
381     if (const FunctionDecl *FD =
382             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
383       OS2 << FD->getQualifiedNameAsString();
384     }
385     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
386     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
387     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
388   }
389   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
390   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
391
392   return LocValue;
393 }
394
395 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
396                                           SourceLocation Loc) {
397   assert(CGF.CurFn && "No function in current CodeGenFunction.");
398
399   llvm::Value *ThreadID = nullptr;
400   // Check whether we've already cached a load of the thread id in this
401   // function.
402   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
403   if (I != OpenMPLocThreadIDMap.end()) {
404     ThreadID = I->second.ThreadID;
405     if (ThreadID != nullptr)
406       return ThreadID;
407   }
408   if (auto OMPRegionInfo =
409           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
410     if (OMPRegionInfo->getThreadIDVariable()) {
411       // Check if this an outlined function with thread id passed as argument.
412       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
413       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
414       // If value loaded in entry block, cache it and use it everywhere in
415       // function.
416       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
417         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
418         Elem.second.ThreadID = ThreadID;
419       }
420       return ThreadID;
421     }
422   }
423
424   // This is not an outlined function region - need to call __kmpc_int32
425   // kmpc_global_thread_num(ident_t *loc).
426   // Generate thread id value and cache this value for use across the
427   // function.
428   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
429   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
430   ThreadID =
431       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
432                           emitUpdateLocation(CGF, Loc));
433   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
434   Elem.second.ThreadID = ThreadID;
435   return ThreadID;
436 }
437
438 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
439   assert(CGF.CurFn && "No function in current CodeGenFunction.");
440   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
441     OpenMPLocThreadIDMap.erase(CGF.CurFn);
442 }
443
444 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
445   return llvm::PointerType::getUnqual(IdentTy);
446 }
447
448 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
449   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
450 }
451
452 llvm::Constant *
453 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
454   llvm::Constant *RTLFn = nullptr;
455   switch (Function) {
456   case OMPRTL__kmpc_fork_call: {
457     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
458     // microtask, ...);
459     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
460                                 getKmpc_MicroPointerTy()};
461     llvm::FunctionType *FnTy =
462         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
463     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
464     break;
465   }
466   case OMPRTL__kmpc_global_thread_num: {
467     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
468     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
469     llvm::FunctionType *FnTy =
470         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
471     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
472     break;
473   }
474   case OMPRTL__kmpc_threadprivate_cached: {
475     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
476     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
477     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
478                                 CGM.VoidPtrTy, CGM.SizeTy,
479                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
480     llvm::FunctionType *FnTy =
481         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
482     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
483     break;
484   }
485   case OMPRTL__kmpc_critical: {
486     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
487     // kmp_critical_name *crit);
488     llvm::Type *TypeParams[] = {
489         getIdentTyPointerTy(), CGM.Int32Ty,
490         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
491     llvm::FunctionType *FnTy =
492         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
493     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
494     break;
495   }
496   case OMPRTL__kmpc_threadprivate_register: {
497     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
498     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
499     // typedef void *(*kmpc_ctor)(void *);
500     auto KmpcCtorTy =
501         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
502                                 /*isVarArg*/ false)->getPointerTo();
503     // typedef void *(*kmpc_cctor)(void *, void *);
504     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
505     auto KmpcCopyCtorTy =
506         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
507                                 /*isVarArg*/ false)->getPointerTo();
508     // typedef void (*kmpc_dtor)(void *);
509     auto KmpcDtorTy =
510         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
511             ->getPointerTo();
512     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
513                               KmpcCopyCtorTy, KmpcDtorTy};
514     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
515                                         /*isVarArg*/ false);
516     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
517     break;
518   }
519   case OMPRTL__kmpc_end_critical: {
520     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
521     // kmp_critical_name *crit);
522     llvm::Type *TypeParams[] = {
523         getIdentTyPointerTy(), CGM.Int32Ty,
524         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
525     llvm::FunctionType *FnTy =
526         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
527     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
528     break;
529   }
530   case OMPRTL__kmpc_cancel_barrier: {
531     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
532     // global_tid);
533     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
534     llvm::FunctionType *FnTy =
535         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
536     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
537     break;
538   }
539   case OMPRTL__kmpc_barrier: {
540     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
541     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
542     llvm::FunctionType *FnTy =
543         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
544     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
545     break;
546   }
547   case OMPRTL__kmpc_for_static_fini: {
548     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
549     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
550     llvm::FunctionType *FnTy =
551         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
552     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
553     break;
554   }
555   case OMPRTL__kmpc_push_num_threads: {
556     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
557     // kmp_int32 num_threads)
558     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
559                                 CGM.Int32Ty};
560     llvm::FunctionType *FnTy =
561         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
562     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
563     break;
564   }
565   case OMPRTL__kmpc_serialized_parallel: {
566     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
567     // global_tid);
568     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
569     llvm::FunctionType *FnTy =
570         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
571     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
572     break;
573   }
574   case OMPRTL__kmpc_end_serialized_parallel: {
575     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
576     // global_tid);
577     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
578     llvm::FunctionType *FnTy =
579         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
580     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
581     break;
582   }
583   case OMPRTL__kmpc_flush: {
584     // Build void __kmpc_flush(ident_t *loc);
585     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
586     llvm::FunctionType *FnTy =
587         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
588     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
589     break;
590   }
591   case OMPRTL__kmpc_master: {
592     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
593     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
594     llvm::FunctionType *FnTy =
595         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
596     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
597     break;
598   }
599   case OMPRTL__kmpc_end_master: {
600     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
601     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
602     llvm::FunctionType *FnTy =
603         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
604     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
605     break;
606   }
607   case OMPRTL__kmpc_omp_taskyield: {
608     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
609     // int end_part);
610     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
611     llvm::FunctionType *FnTy =
612         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
613     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
614     break;
615   }
616   case OMPRTL__kmpc_single: {
617     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
618     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
619     llvm::FunctionType *FnTy =
620         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
621     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
622     break;
623   }
624   case OMPRTL__kmpc_end_single: {
625     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
626     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
630     break;
631   }
632   case OMPRTL__kmpc_omp_task_alloc: {
633     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
634     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
635     // kmp_routine_entry_t *task_entry);
636     assert(KmpRoutineEntryPtrTy != nullptr &&
637            "Type kmp_routine_entry_t must be created.");
638     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
639                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
640     // Return void * and then cast to particular kmp_task_t type.
641     llvm::FunctionType *FnTy =
642         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
643     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
644     break;
645   }
646   case OMPRTL__kmpc_omp_task: {
647     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
648     // *new_task);
649     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
650                                 CGM.VoidPtrTy};
651     llvm::FunctionType *FnTy =
652         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
653     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
654     break;
655   }
656   case OMPRTL__kmpc_copyprivate: {
657     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
658     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
659     // kmp_int32 didit);
660     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
661     auto *CpyFnTy =
662         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
663     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
664                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
665                                 CGM.Int32Ty};
666     llvm::FunctionType *FnTy =
667         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
668     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
669     break;
670   }
671   case OMPRTL__kmpc_reduce: {
672     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
673     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
674     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
675     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
676     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
677                                                /*isVarArg=*/false);
678     llvm::Type *TypeParams[] = {
679         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
680         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
681         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
682     llvm::FunctionType *FnTy =
683         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
684     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
685     break;
686   }
687   case OMPRTL__kmpc_reduce_nowait: {
688     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
689     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
690     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
691     // *lck);
692     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
693     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
694                                                /*isVarArg=*/false);
695     llvm::Type *TypeParams[] = {
696         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
697         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
698         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
699     llvm::FunctionType *FnTy =
700         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
701     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
702     break;
703   }
704   case OMPRTL__kmpc_end_reduce: {
705     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
706     // kmp_critical_name *lck);
707     llvm::Type *TypeParams[] = {
708         getIdentTyPointerTy(), CGM.Int32Ty,
709         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
710     llvm::FunctionType *FnTy =
711         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
712     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
713     break;
714   }
715   case OMPRTL__kmpc_end_reduce_nowait: {
716     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
717     // kmp_critical_name *lck);
718     llvm::Type *TypeParams[] = {
719         getIdentTyPointerTy(), CGM.Int32Ty,
720         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
721     llvm::FunctionType *FnTy =
722         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
723     RTLFn =
724         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
725     break;
726   }
727   case OMPRTL__kmpc_omp_task_begin_if0: {
728     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
729     // *new_task);
730     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
731                                 CGM.VoidPtrTy};
732     llvm::FunctionType *FnTy =
733         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
734     RTLFn =
735         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
736     break;
737   }
738   case OMPRTL__kmpc_omp_task_complete_if0: {
739     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
740     // *new_task);
741     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
742                                 CGM.VoidPtrTy};
743     llvm::FunctionType *FnTy =
744         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
745     RTLFn = CGM.CreateRuntimeFunction(FnTy,
746                                       /*Name=*/"__kmpc_omp_task_complete_if0");
747     break;
748   }
749   case OMPRTL__kmpc_ordered: {
750     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
751     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
752     llvm::FunctionType *FnTy =
753         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
754     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
755     break;
756   }
757   case OMPRTL__kmpc_end_ordered: {
758     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
759     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
760     llvm::FunctionType *FnTy =
761         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
762     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
763     break;
764   }
765   case OMPRTL__kmpc_omp_taskwait: {
766     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
767     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
768     llvm::FunctionType *FnTy =
769         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
770     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
771     break;
772   }
773   case OMPRTL__kmpc_taskgroup: {
774     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
775     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
776     llvm::FunctionType *FnTy =
777         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
778     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
779     break;
780   }
781   case OMPRTL__kmpc_end_taskgroup: {
782     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
783     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
784     llvm::FunctionType *FnTy =
785         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
786     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
787     break;
788   }
789   case OMPRTL__kmpc_push_proc_bind: {
790     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
791     // int proc_bind)
792     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
793     llvm::FunctionType *FnTy =
794         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
795     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
796     break;
797   }
798   case OMPRTL__kmpc_omp_task_with_deps: {
799     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
800     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
801     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
802     llvm::Type *TypeParams[] = {
803         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
804         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
805     llvm::FunctionType *FnTy =
806         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
807     RTLFn =
808         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
809     break;
810   }
811   case OMPRTL__kmpc_omp_wait_deps: {
812     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
813     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
814     // kmp_depend_info_t *noalias_dep_list);
815     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
816                                 CGM.Int32Ty,           CGM.VoidPtrTy,
817                                 CGM.Int32Ty,           CGM.VoidPtrTy};
818     llvm::FunctionType *FnTy =
819         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
820     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
821     break;
822   }
823   case OMPRTL__kmpc_cancellationpoint: {
824     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
825     // global_tid, kmp_int32 cncl_kind)
826     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
827     llvm::FunctionType *FnTy =
828         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
829     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
830     break;
831   }
832   case OMPRTL__kmpc_cancel: {
833     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
834     // kmp_int32 cncl_kind)
835     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
836     llvm::FunctionType *FnTy =
837         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
838     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
839     break;
840   }
841   }
842   return RTLFn;
843 }
844
845 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
846                                                              bool IVSigned) {
847   assert((IVSize == 32 || IVSize == 64) &&
848          "IV size is not compatible with the omp runtime");
849   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
850                                        : "__kmpc_for_static_init_4u")
851                            : (IVSigned ? "__kmpc_for_static_init_8"
852                                        : "__kmpc_for_static_init_8u");
853   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
854   auto PtrTy = llvm::PointerType::getUnqual(ITy);
855   llvm::Type *TypeParams[] = {
856     getIdentTyPointerTy(),                     // loc
857     CGM.Int32Ty,                               // tid
858     CGM.Int32Ty,                               // schedtype
859     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
860     PtrTy,                                     // p_lower
861     PtrTy,                                     // p_upper
862     PtrTy,                                     // p_stride
863     ITy,                                       // incr
864     ITy                                        // chunk
865   };
866   llvm::FunctionType *FnTy =
867       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
868   return CGM.CreateRuntimeFunction(FnTy, Name);
869 }
870
871 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
872                                                             bool IVSigned) {
873   assert((IVSize == 32 || IVSize == 64) &&
874          "IV size is not compatible with the omp runtime");
875   auto Name =
876       IVSize == 32
877           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
878           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
879   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
880   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
881                                CGM.Int32Ty,           // tid
882                                CGM.Int32Ty,           // schedtype
883                                ITy,                   // lower
884                                ITy,                   // upper
885                                ITy,                   // stride
886                                ITy                    // chunk
887   };
888   llvm::FunctionType *FnTy =
889       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
890   return CGM.CreateRuntimeFunction(FnTy, Name);
891 }
892
893 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
894                                                             bool IVSigned) {
895   assert((IVSize == 32 || IVSize == 64) &&
896          "IV size is not compatible with the omp runtime");
897   auto Name =
898       IVSize == 32
899           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
900           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
901   llvm::Type *TypeParams[] = {
902       getIdentTyPointerTy(), // loc
903       CGM.Int32Ty,           // tid
904   };
905   llvm::FunctionType *FnTy =
906       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
907   return CGM.CreateRuntimeFunction(FnTy, Name);
908 }
909
910 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
911                                                             bool IVSigned) {
912   assert((IVSize == 32 || IVSize == 64) &&
913          "IV size is not compatible with the omp runtime");
914   auto Name =
915       IVSize == 32
916           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
917           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
918   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
919   auto PtrTy = llvm::PointerType::getUnqual(ITy);
920   llvm::Type *TypeParams[] = {
921     getIdentTyPointerTy(),                     // loc
922     CGM.Int32Ty,                               // tid
923     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
924     PtrTy,                                     // p_lower
925     PtrTy,                                     // p_upper
926     PtrTy                                      // p_stride
927   };
928   llvm::FunctionType *FnTy =
929       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
930   return CGM.CreateRuntimeFunction(FnTy, Name);
931 }
932
933 llvm::Constant *
934 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
935   assert(!CGM.getLangOpts().OpenMPUseTLS ||
936          !CGM.getContext().getTargetInfo().isTLSSupported());
937   // Lookup the entry, lazily creating it if necessary.
938   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
939                                      Twine(CGM.getMangledName(VD)) + ".cache.");
940 }
941
942 llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
943                                                      const VarDecl *VD,
944                                                      llvm::Value *VDAddr,
945                                                      SourceLocation Loc) {
946   if (CGM.getLangOpts().OpenMPUseTLS &&
947       CGM.getContext().getTargetInfo().isTLSSupported())
948     return VDAddr;
949
950   auto VarTy = VDAddr->getType()->getPointerElementType();
951   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
952                          CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
953                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
954                          getOrCreateThreadPrivateCache(VD)};
955   return CGF.EmitRuntimeCall(
956       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
957 }
958
959 void CGOpenMPRuntime::emitThreadPrivateVarInit(
960     CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
961     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
962   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
963   // library.
964   auto OMPLoc = emitUpdateLocation(CGF, Loc);
965   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
966                       OMPLoc);
967   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
968   // to register constructor/destructor for variable.
969   llvm::Value *Args[] = {OMPLoc,
970                          CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
971                          Ctor, CopyCtor, Dtor};
972   CGF.EmitRuntimeCall(
973       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
974 }
975
976 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
977     const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
978     bool PerformInit, CodeGenFunction *CGF) {
979   if (CGM.getLangOpts().OpenMPUseTLS &&
980       CGM.getContext().getTargetInfo().isTLSSupported())
981     return nullptr;
982
983   VD = VD->getDefinition(CGM.getContext());
984   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
985     ThreadPrivateWithDefinition.insert(VD);
986     QualType ASTTy = VD->getType();
987
988     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
989     auto Init = VD->getAnyInitializer();
990     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
991       // Generate function that re-emits the declaration's initializer into the
992       // threadprivate copy of the variable VD
993       CodeGenFunction CtorCGF(CGM);
994       FunctionArgList Args;
995       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
996                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
997       Args.push_back(&Dst);
998
999       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1000           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1001           /*isVariadic=*/false);
1002       auto FTy = CGM.getTypes().GetFunctionType(FI);
1003       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1004           FTy, ".__kmpc_global_ctor_.", Loc);
1005       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1006                             Args, SourceLocation());
1007       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1008           CtorCGF.GetAddrOfLocalVar(&Dst),
1009           /*Volatile=*/false, CGM.PointerAlignInBytes,
1010           CGM.getContext().VoidPtrTy, Dst.getLocation());
1011       auto Arg = CtorCGF.Builder.CreatePointerCast(
1012           ArgVal,
1013           CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
1014       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1015                                /*IsInitializer=*/true);
1016       ArgVal = CtorCGF.EmitLoadOfScalar(
1017           CtorCGF.GetAddrOfLocalVar(&Dst),
1018           /*Volatile=*/false, CGM.PointerAlignInBytes,
1019           CGM.getContext().VoidPtrTy, Dst.getLocation());
1020       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1021       CtorCGF.FinishFunction();
1022       Ctor = Fn;
1023     }
1024     if (VD->getType().isDestructedType() != QualType::DK_none) {
1025       // Generate function that emits destructor call for the threadprivate copy
1026       // of the variable VD
1027       CodeGenFunction DtorCGF(CGM);
1028       FunctionArgList Args;
1029       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1030                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1031       Args.push_back(&Dst);
1032
1033       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1034           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1035           /*isVariadic=*/false);
1036       auto FTy = CGM.getTypes().GetFunctionType(FI);
1037       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1038           FTy, ".__kmpc_global_dtor_.", Loc);
1039       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1040                             SourceLocation());
1041       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1042           DtorCGF.GetAddrOfLocalVar(&Dst),
1043           /*Volatile=*/false, CGM.PointerAlignInBytes,
1044           CGM.getContext().VoidPtrTy, Dst.getLocation());
1045       DtorCGF.emitDestroy(ArgVal, ASTTy,
1046                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1047                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1048       DtorCGF.FinishFunction();
1049       Dtor = Fn;
1050     }
1051     // Do not emit init function if it is not required.
1052     if (!Ctor && !Dtor)
1053       return nullptr;
1054
1055     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1056     auto CopyCtorTy =
1057         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1058                                 /*isVarArg=*/false)->getPointerTo();
1059     // Copying constructor for the threadprivate variable.
1060     // Must be NULL - reserved by runtime, but currently it requires that this
1061     // parameter is always NULL. Otherwise it fires assertion.
1062     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1063     if (Ctor == nullptr) {
1064       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1065                                             /*isVarArg=*/false)->getPointerTo();
1066       Ctor = llvm::Constant::getNullValue(CtorTy);
1067     }
1068     if (Dtor == nullptr) {
1069       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1070                                             /*isVarArg=*/false)->getPointerTo();
1071       Dtor = llvm::Constant::getNullValue(DtorTy);
1072     }
1073     if (!CGF) {
1074       auto InitFunctionTy =
1075           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1076       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1077           InitFunctionTy, ".__omp_threadprivate_init_.");
1078       CodeGenFunction InitCGF(CGM);
1079       FunctionArgList ArgList;
1080       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1081                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1082                             Loc);
1083       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1084       InitCGF.FinishFunction();
1085       return InitFunction;
1086     }
1087     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1088   }
1089   return nullptr;
1090 }
1091
1092 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1093 /// function. Here is the logic:
1094 /// if (Cond) {
1095 ///   ThenGen();
1096 /// } else {
1097 ///   ElseGen();
1098 /// }
1099 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1100                             const RegionCodeGenTy &ThenGen,
1101                             const RegionCodeGenTy &ElseGen) {
1102   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1103
1104   // If the condition constant folds and can be elided, try to avoid emitting
1105   // the condition and the dead arm of the if/else.
1106   bool CondConstant;
1107   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1108     CodeGenFunction::RunCleanupsScope Scope(CGF);
1109     if (CondConstant) {
1110       ThenGen(CGF);
1111     } else {
1112       ElseGen(CGF);
1113     }
1114     return;
1115   }
1116
1117   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1118   // emit the conditional branch.
1119   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1120   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1121   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1122   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1123
1124   // Emit the 'then' code.
1125   CGF.EmitBlock(ThenBlock);
1126   {
1127     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1128     ThenGen(CGF);
1129   }
1130   CGF.EmitBranch(ContBlock);
1131   // Emit the 'else' code if present.
1132   {
1133     // There is no need to emit line number for unconditional branch.
1134     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1135     CGF.EmitBlock(ElseBlock);
1136   }
1137   {
1138     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1139     ElseGen(CGF);
1140   }
1141   {
1142     // There is no need to emit line number for unconditional branch.
1143     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1144     CGF.EmitBranch(ContBlock);
1145   }
1146   // Emit the continuation block for code after the if.
1147   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1148 }
1149
1150 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1151                                        llvm::Value *OutlinedFn,
1152                                        llvm::Value *CapturedStruct,
1153                                        const Expr *IfCond) {
1154   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1155   auto &&ThenGen =
1156       [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
1157         // Build call __kmpc_fork_call(loc, 1, microtask,
1158         // captured_struct/*context*/)
1159         llvm::Value *Args[] = {
1160             RTLoc,
1161             CGF.Builder.getInt32(
1162                 1), // Number of arguments after 'microtask' argument
1163             // (there is only one additional argument - 'context')
1164             CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
1165             CGF.EmitCastToVoidPtr(CapturedStruct)};
1166         auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1167         CGF.EmitRuntimeCall(RTLFn, Args);
1168       };
1169   auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
1170       CodeGenFunction &CGF) {
1171     auto ThreadID = getThreadID(CGF, Loc);
1172     // Build calls:
1173     // __kmpc_serialized_parallel(&Loc, GTid);
1174     llvm::Value *Args[] = {RTLoc, ThreadID};
1175     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1176                         Args);
1177
1178     // OutlinedFn(&GTid, &zero, CapturedStruct);
1179     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1180     auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
1181                                                           /*Signed*/ true);
1182     auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
1183     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1184     llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
1185     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1186
1187     // __kmpc_end_serialized_parallel(&Loc, GTid);
1188     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1189     CGF.EmitRuntimeCall(
1190         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1191   };
1192   if (IfCond) {
1193     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1194   } else {
1195     CodeGenFunction::RunCleanupsScope Scope(CGF);
1196     ThenGen(CGF);
1197   }
1198 }
1199
1200 // If we're inside an (outlined) parallel region, use the region info's
1201 // thread-ID variable (it is passed in a first argument of the outlined function
1202 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1203 // regular serial code region, get thread ID by calling kmp_int32
1204 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1205 // return the address of that temp.
1206 llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1207                                                   SourceLocation Loc) {
1208   if (auto OMPRegionInfo =
1209           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1210     if (OMPRegionInfo->getThreadIDVariable())
1211       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1212
1213   auto ThreadID = getThreadID(CGF, Loc);
1214   auto Int32Ty =
1215       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1216   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1217   CGF.EmitStoreOfScalar(ThreadID,
1218                         CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
1219
1220   return ThreadIDTemp;
1221 }
1222
1223 llvm::Constant *
1224 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1225                                              const llvm::Twine &Name) {
1226   SmallString<256> Buffer;
1227   llvm::raw_svector_ostream Out(Buffer);
1228   Out << Name;
1229   auto RuntimeName = Out.str();
1230   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1231   if (Elem.second) {
1232     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1233            "OMP internal variable has different type than requested");
1234     return &*Elem.second;
1235   }
1236
1237   return Elem.second = new llvm::GlobalVariable(
1238              CGM.getModule(), Ty, /*IsConstant*/ false,
1239              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1240              Elem.first());
1241 }
1242
1243 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1244   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1245   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1246 }
1247
1248 namespace {
1249 template <size_t N> class CallEndCleanup : public EHScopeStack::Cleanup {
1250   llvm::Value *Callee;
1251   llvm::Value *Args[N];
1252
1253 public:
1254   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1255       : Callee(Callee) {
1256     assert(CleanupArgs.size() == N);
1257     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1258   }
1259   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1260     CGF.EmitRuntimeCall(Callee, Args);
1261   }
1262 };
1263 } // namespace
1264
1265 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1266                                          StringRef CriticalName,
1267                                          const RegionCodeGenTy &CriticalOpGen,
1268                                          SourceLocation Loc) {
1269   // __kmpc_critical(ident_t *, gtid, Lock);
1270   // CriticalOpGen();
1271   // __kmpc_end_critical(ident_t *, gtid, Lock);
1272   // Prepare arguments and build a call to __kmpc_critical
1273   {
1274     CodeGenFunction::RunCleanupsScope Scope(CGF);
1275     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1276                            getCriticalRegionLock(CriticalName)};
1277     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1278     // Build a call to __kmpc_end_critical
1279     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1280         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1281         llvm::makeArrayRef(Args));
1282     emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1283   }
1284 }
1285
1286 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1287                        OpenMPDirectiveKind Kind,
1288                        const RegionCodeGenTy &BodyOpGen) {
1289   llvm::Value *CallBool = CGF.EmitScalarConversion(
1290       IfCond,
1291       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1292       CGF.getContext().BoolTy);
1293
1294   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1295   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1296   // Generate the branch (If-stmt)
1297   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1298   CGF.EmitBlock(ThenBlock);
1299   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1300   // Emit the rest of bblocks/branches
1301   CGF.EmitBranch(ContBlock);
1302   CGF.EmitBlock(ContBlock, true);
1303 }
1304
1305 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1306                                        const RegionCodeGenTy &MasterOpGen,
1307                                        SourceLocation Loc) {
1308   // if(__kmpc_master(ident_t *, gtid)) {
1309   //   MasterOpGen();
1310   //   __kmpc_end_master(ident_t *, gtid);
1311   // }
1312   // Prepare arguments and build a call to __kmpc_master
1313   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1314   auto *IsMaster =
1315       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1316   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1317       MasterCallEndCleanup;
1318   emitIfStmt(CGF, IsMaster, OMPD_master, [&](CodeGenFunction &CGF) -> void {
1319     CodeGenFunction::RunCleanupsScope Scope(CGF);
1320     CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1321         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1322         llvm::makeArrayRef(Args));
1323     MasterOpGen(CGF);
1324   });
1325 }
1326
1327 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1328                                         SourceLocation Loc) {
1329   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1330   llvm::Value *Args[] = {
1331       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1332       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1333   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1334 }
1335
1336 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1337                                           const RegionCodeGenTy &TaskgroupOpGen,
1338                                           SourceLocation Loc) {
1339   // __kmpc_taskgroup(ident_t *, gtid);
1340   // TaskgroupOpGen();
1341   // __kmpc_end_taskgroup(ident_t *, gtid);
1342   // Prepare arguments and build a call to __kmpc_taskgroup
1343   {
1344     CodeGenFunction::RunCleanupsScope Scope(CGF);
1345     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1346     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1347     // Build a call to __kmpc_end_taskgroup
1348     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1349         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1350         llvm::makeArrayRef(Args));
1351     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1352   }
1353 }
1354
1355 static llvm::Value *emitCopyprivateCopyFunction(
1356     CodeGenModule &CGM, llvm::Type *ArgsType,
1357     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1358     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1359   auto &C = CGM.getContext();
1360   // void copy_func(void *LHSArg, void *RHSArg);
1361   FunctionArgList Args;
1362   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1363                            C.VoidPtrTy);
1364   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1365                            C.VoidPtrTy);
1366   Args.push_back(&LHSArg);
1367   Args.push_back(&RHSArg);
1368   FunctionType::ExtInfo EI;
1369   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1370       C.VoidTy, Args, EI, /*isVariadic=*/false);
1371   auto *Fn = llvm::Function::Create(
1372       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1373       ".omp.copyprivate.copy_func", &CGM.getModule());
1374   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
1375   CodeGenFunction CGF(CGM);
1376   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1377   // Dest = (void*[n])(LHSArg);
1378   // Src = (void*[n])(RHSArg);
1379   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1380       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
1381                                     CGF.PointerAlignInBytes),
1382       ArgsType);
1383   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1384       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
1385                                     CGF.PointerAlignInBytes),
1386       ArgsType);
1387   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1388   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1389   // ...
1390   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1391   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1392     auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1393         CGF.Builder.CreateAlignedLoad(
1394             CGF.Builder.CreateStructGEP(nullptr, LHS, I),
1395             CGM.PointerAlignInBytes),
1396         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1397     auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1398         CGF.Builder.CreateAlignedLoad(
1399             CGF.Builder.CreateStructGEP(nullptr, RHS, I),
1400             CGM.PointerAlignInBytes),
1401         CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
1402     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1403     QualType Type = VD->getType();
1404     CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
1405                     cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
1406                     cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
1407                     AssignmentOps[I]);
1408   }
1409   CGF.FinishFunction();
1410   return Fn;
1411 }
1412
1413 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1414                                        const RegionCodeGenTy &SingleOpGen,
1415                                        SourceLocation Loc,
1416                                        ArrayRef<const Expr *> CopyprivateVars,
1417                                        ArrayRef<const Expr *> SrcExprs,
1418                                        ArrayRef<const Expr *> DstExprs,
1419                                        ArrayRef<const Expr *> AssignmentOps) {
1420   assert(CopyprivateVars.size() == SrcExprs.size() &&
1421          CopyprivateVars.size() == DstExprs.size() &&
1422          CopyprivateVars.size() == AssignmentOps.size());
1423   auto &C = CGM.getContext();
1424   // int32 did_it = 0;
1425   // if(__kmpc_single(ident_t *, gtid)) {
1426   //   SingleOpGen();
1427   //   __kmpc_end_single(ident_t *, gtid);
1428   //   did_it = 1;
1429   // }
1430   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1431   // <copy_func>, did_it);
1432
1433   llvm::AllocaInst *DidIt = nullptr;
1434   if (!CopyprivateVars.empty()) {
1435     // int32 did_it = 0;
1436     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1437     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1438     CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
1439                                    DidIt->getAlignment());
1440   }
1441   // Prepare arguments and build a call to __kmpc_single
1442   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1443   auto *IsSingle =
1444       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1445   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1446       SingleCallEndCleanup;
1447   emitIfStmt(CGF, IsSingle, OMPD_single, [&](CodeGenFunction &CGF) -> void {
1448     CodeGenFunction::RunCleanupsScope Scope(CGF);
1449     CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1450         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1451         llvm::makeArrayRef(Args));
1452     SingleOpGen(CGF);
1453     if (DidIt) {
1454       // did_it = 1;
1455       CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
1456                                      DidIt->getAlignment());
1457     }
1458   });
1459   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1460   // <copy_func>, did_it);
1461   if (DidIt) {
1462     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1463     auto CopyprivateArrayTy =
1464         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1465                                /*IndexTypeQuals=*/0);
1466     // Create a list of all private variables for copyprivate.
1467     auto *CopyprivateList =
1468         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1469     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1470       auto *Elem = CGF.Builder.CreateStructGEP(
1471           CopyprivateList->getAllocatedType(), CopyprivateList, I);
1472       CGF.Builder.CreateAlignedStore(
1473           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1474               CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
1475           Elem, CGM.PointerAlignInBytes);
1476     }
1477     // Build function that copies private values from single region to all other
1478     // threads in the corresponding parallel region.
1479     auto *CpyFn = emitCopyprivateCopyFunction(
1480         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1481         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1482     auto *BufSize = llvm::ConstantInt::get(
1483         CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
1484     auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1485                                                                CGF.VoidPtrTy);
1486     auto *DidItVal =
1487         CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
1488     llvm::Value *Args[] = {
1489         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1490         getThreadID(CGF, Loc),        // i32 <gtid>
1491         BufSize,                      // size_t <buf_size>
1492         CL,                           // void *<copyprivate list>
1493         CpyFn,                        // void (*) (void *, void *) <copy_func>
1494         DidItVal                      // i32 did_it
1495     };
1496     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1497   }
1498 }
1499
1500 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1501                                         const RegionCodeGenTy &OrderedOpGen,
1502                                         SourceLocation Loc) {
1503   // __kmpc_ordered(ident_t *, gtid);
1504   // OrderedOpGen();
1505   // __kmpc_end_ordered(ident_t *, gtid);
1506   // Prepare arguments and build a call to __kmpc_ordered
1507   {
1508     CodeGenFunction::RunCleanupsScope Scope(CGF);
1509     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1510     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1511     // Build a call to __kmpc_end_ordered
1512     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1513         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1514         llvm::makeArrayRef(Args));
1515     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1516   }
1517 }
1518
1519 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1520                                       OpenMPDirectiveKind Kind,
1521                                       bool CheckForCancel) {
1522   // Build call __kmpc_cancel_barrier(loc, thread_id);
1523   // Build call __kmpc_barrier(loc, thread_id);
1524   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1525   if (Kind == OMPD_for) {
1526     Flags =
1527         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1528   } else if (Kind == OMPD_sections) {
1529     Flags = static_cast<OpenMPLocationFlags>(Flags |
1530                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1531   } else if (Kind == OMPD_single) {
1532     Flags =
1533         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1534   } else if (Kind == OMPD_barrier) {
1535     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1536   } else {
1537     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1538   }
1539   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1540   // thread_id);
1541   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1542                          getThreadID(CGF, Loc)};
1543   if (auto *OMPRegionInfo =
1544           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1545     auto CancelDestination =
1546         CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1547     if (CancelDestination.isValid()) {
1548       auto *Result = CGF.EmitRuntimeCall(
1549           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1550       if (CheckForCancel) {
1551         // if (__kmpc_cancel_barrier()) {
1552         //   exit from construct;
1553         // }
1554         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1555         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1556         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1557         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1558         CGF.EmitBlock(ExitBB);
1559         //   exit from construct;
1560         CGF.EmitBranchThroughCleanup(CancelDestination);
1561         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1562       }
1563       return;
1564     }
1565   }
1566   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1567 }
1568
1569 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1570 /// the enum sched_type in kmp.h).
1571 enum OpenMPSchedType {
1572   /// \brief Lower bound for default (unordered) versions.
1573   OMP_sch_lower = 32,
1574   OMP_sch_static_chunked = 33,
1575   OMP_sch_static = 34,
1576   OMP_sch_dynamic_chunked = 35,
1577   OMP_sch_guided_chunked = 36,
1578   OMP_sch_runtime = 37,
1579   OMP_sch_auto = 38,
1580   /// \brief Lower bound for 'ordered' versions.
1581   OMP_ord_lower = 64,
1582   OMP_ord_static_chunked = 65,
1583   OMP_ord_static = 66,
1584   OMP_ord_dynamic_chunked = 67,
1585   OMP_ord_guided_chunked = 68,
1586   OMP_ord_runtime = 69,
1587   OMP_ord_auto = 70,
1588   OMP_sch_default = OMP_sch_static,
1589 };
1590
1591 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1592 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1593                                           bool Chunked, bool Ordered) {
1594   switch (ScheduleKind) {
1595   case OMPC_SCHEDULE_static:
1596     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1597                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1598   case OMPC_SCHEDULE_dynamic:
1599     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1600   case OMPC_SCHEDULE_guided:
1601     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1602   case OMPC_SCHEDULE_runtime:
1603     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1604   case OMPC_SCHEDULE_auto:
1605     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1606   case OMPC_SCHEDULE_unknown:
1607     assert(!Chunked && "chunk was specified but schedule kind not known");
1608     return Ordered ? OMP_ord_static : OMP_sch_static;
1609   }
1610   llvm_unreachable("Unexpected runtime schedule");
1611 }
1612
1613 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1614                                          bool Chunked) const {
1615   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1616   return Schedule == OMP_sch_static;
1617 }
1618
1619 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1620   auto Schedule =
1621       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1622   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1623   return Schedule != OMP_sch_static;
1624 }
1625
1626 void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
1627                                   OpenMPScheduleClauseKind ScheduleKind,
1628                                   unsigned IVSize, bool IVSigned, bool Ordered,
1629                                   llvm::Value *IL, llvm::Value *LB,
1630                                   llvm::Value *UB, llvm::Value *ST,
1631                                   llvm::Value *Chunk) {
1632   OpenMPSchedType Schedule =
1633       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1634   if (Ordered ||
1635       (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1636        Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
1637     // Call __kmpc_dispatch_init(
1638     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1639     //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1640     //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1641
1642     // If the Chunk was not specified in the clause - use default value 1.
1643     if (Chunk == nullptr)
1644       Chunk = CGF.Builder.getIntN(IVSize, 1);
1645     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1646                             getThreadID(CGF, Loc),
1647                             CGF.Builder.getInt32(Schedule), // Schedule type
1648                             CGF.Builder.getIntN(IVSize, 0), // Lower
1649                             UB,                             // Upper
1650                             CGF.Builder.getIntN(IVSize, 1), // Stride
1651                             Chunk                           // Chunk
1652     };
1653     CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1654   } else {
1655     // Call __kmpc_for_static_init(
1656     //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1657     //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1658     //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1659     //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1660     if (Chunk == nullptr) {
1661       assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1662              "expected static non-chunked schedule");
1663       // If the Chunk was not specified in the clause - use default value 1.
1664       Chunk = CGF.Builder.getIntN(IVSize, 1);
1665     } else
1666       assert((Schedule == OMP_sch_static_chunked ||
1667               Schedule == OMP_ord_static_chunked) &&
1668              "expected static chunked schedule");
1669     llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1670                             getThreadID(CGF, Loc),
1671                             CGF.Builder.getInt32(Schedule), // Schedule type
1672                             IL,                             // &isLastIter
1673                             LB,                             // &LB
1674                             UB,                             // &UB
1675                             ST,                             // &Stride
1676                             CGF.Builder.getIntN(IVSize, 1), // Incr
1677                             Chunk                           // Chunk
1678     };
1679     CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1680   }
1681 }
1682
1683 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1684                                           SourceLocation Loc) {
1685   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1686   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1687                          getThreadID(CGF, Loc)};
1688   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1689                       Args);
1690 }
1691
1692 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1693                                                  SourceLocation Loc,
1694                                                  unsigned IVSize,
1695                                                  bool IVSigned) {
1696   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1697   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1698                          getThreadID(CGF, Loc)};
1699   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1700 }
1701
1702 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1703                                           SourceLocation Loc, unsigned IVSize,
1704                                           bool IVSigned, llvm::Value *IL,
1705                                           llvm::Value *LB, llvm::Value *UB,
1706                                           llvm::Value *ST) {
1707   // Call __kmpc_dispatch_next(
1708   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1709   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1710   //          kmp_int[32|64] *p_stride);
1711   llvm::Value *Args[] = {
1712       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1713       IL, // &isLastIter
1714       LB, // &Lower
1715       UB, // &Upper
1716       ST  // &Stride
1717   };
1718   llvm::Value *Call =
1719       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1720   return CGF.EmitScalarConversion(
1721       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1722       CGF.getContext().BoolTy);
1723 }
1724
1725 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1726                                            llvm::Value *NumThreads,
1727                                            SourceLocation Loc) {
1728   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1729   llvm::Value *Args[] = {
1730       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1731       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1732   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1733                       Args);
1734 }
1735
1736 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1737                                          OpenMPProcBindClauseKind ProcBind,
1738                                          SourceLocation Loc) {
1739   // Constants for proc bind value accepted by the runtime.
1740   enum ProcBindTy {
1741     ProcBindFalse = 0,
1742     ProcBindTrue,
1743     ProcBindMaster,
1744     ProcBindClose,
1745     ProcBindSpread,
1746     ProcBindIntel,
1747     ProcBindDefault
1748   } RuntimeProcBind;
1749   switch (ProcBind) {
1750   case OMPC_PROC_BIND_master:
1751     RuntimeProcBind = ProcBindMaster;
1752     break;
1753   case OMPC_PROC_BIND_close:
1754     RuntimeProcBind = ProcBindClose;
1755     break;
1756   case OMPC_PROC_BIND_spread:
1757     RuntimeProcBind = ProcBindSpread;
1758     break;
1759   case OMPC_PROC_BIND_unknown:
1760     llvm_unreachable("Unsupported proc_bind value.");
1761   }
1762   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1763   llvm::Value *Args[] = {
1764       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1765       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1766   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1767 }
1768
1769 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1770                                 SourceLocation Loc) {
1771   // Build call void __kmpc_flush(ident_t *loc)
1772   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1773                       emitUpdateLocation(CGF, Loc));
1774 }
1775
1776 namespace {
1777 /// \brief Indexes of fields for type kmp_task_t.
1778 enum KmpTaskTFields {
1779   /// \brief List of shared variables.
1780   KmpTaskTShareds,
1781   /// \brief Task routine.
1782   KmpTaskTRoutine,
1783   /// \brief Partition id for the untied tasks.
1784   KmpTaskTPartId,
1785   /// \brief Function with call of destructors for private variables.
1786   KmpTaskTDestructors,
1787 };
1788 } // namespace
1789
1790 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1791   if (!KmpRoutineEntryPtrTy) {
1792     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1793     auto &C = CGM.getContext();
1794     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1795     FunctionProtoType::ExtProtoInfo EPI;
1796     KmpRoutineEntryPtrQTy = C.getPointerType(
1797         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1798     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1799   }
1800 }
1801
1802 static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1803                                  QualType FieldTy) {
1804   auto *Field = FieldDecl::Create(
1805       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1806       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1807       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1808   Field->setAccess(AS_public);
1809   DC->addDecl(Field);
1810 }
1811
1812 namespace {
1813 struct PrivateHelpersTy {
1814   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1815                    const VarDecl *PrivateElemInit)
1816       : Original(Original), PrivateCopy(PrivateCopy),
1817         PrivateElemInit(PrivateElemInit) {}
1818   const VarDecl *Original;
1819   const VarDecl *PrivateCopy;
1820   const VarDecl *PrivateElemInit;
1821 };
1822 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
1823 } // namespace
1824
1825 static RecordDecl *
1826 createPrivatesRecordDecl(CodeGenModule &CGM,
1827                          const ArrayRef<PrivateDataTy> Privates) {
1828   if (!Privates.empty()) {
1829     auto &C = CGM.getContext();
1830     // Build struct .kmp_privates_t. {
1831     //         /*  private vars  */
1832     //       };
1833     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
1834     RD->startDefinition();
1835     for (auto &&Pair : Privates) {
1836       auto Type = Pair.second.Original->getType();
1837       Type = Type.getNonReferenceType();
1838       addFieldToRecordDecl(C, RD, Type);
1839     }
1840     RD->completeDefinition();
1841     return RD;
1842   }
1843   return nullptr;
1844 }
1845
1846 static RecordDecl *
1847 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
1848                          QualType KmpRoutineEntryPointerQTy) {
1849   auto &C = CGM.getContext();
1850   // Build struct kmp_task_t {
1851   //         void *              shareds;
1852   //         kmp_routine_entry_t routine;
1853   //         kmp_int32           part_id;
1854   //         kmp_routine_entry_t destructors;
1855   //       };
1856   auto *RD = C.buildImplicitRecord("kmp_task_t");
1857   RD->startDefinition();
1858   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1859   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1860   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1861   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
1862   RD->completeDefinition();
1863   return RD;
1864 }
1865
1866 static RecordDecl *
1867 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
1868                                      const ArrayRef<PrivateDataTy> Privates) {
1869   auto &C = CGM.getContext();
1870   // Build struct kmp_task_t_with_privates {
1871   //         kmp_task_t task_data;
1872   //         .kmp_privates_t. privates;
1873   //       };
1874   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
1875   RD->startDefinition();
1876   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
1877   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
1878     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
1879   }
1880   RD->completeDefinition();
1881   return RD;
1882 }
1883
1884 /// \brief Emit a proxy function which accepts kmp_task_t as the second
1885 /// argument.
1886 /// \code
1887 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
1888 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
1889 ///   tt->shareds);
1890 ///   return 0;
1891 /// }
1892 /// \endcode
1893 static llvm::Value *
1894 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
1895                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
1896                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
1897                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
1898                       llvm::Value *TaskPrivatesMap) {
1899   auto &C = CGM.getContext();
1900   FunctionArgList Args;
1901   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1902   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1903                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1904   Args.push_back(&GtidArg);
1905   Args.push_back(&TaskTypeArg);
1906   FunctionType::ExtInfo Info;
1907   auto &TaskEntryFnInfo =
1908       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1909                                                     /*isVariadic=*/false);
1910   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
1911   auto *TaskEntry =
1912       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
1913                              ".omp_task_entry.", &CGM.getModule());
1914   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
1915   CodeGenFunction CGF(CGM);
1916   CGF.disableDebugInfo();
1917   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
1918
1919   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
1920   // tt->task_data.shareds);
1921   auto *GtidParam = CGF.EmitLoadOfScalar(
1922       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
1923       C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
1924   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1925       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1926   LValue TDBase =
1927       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1928   auto *KmpTaskTWithPrivatesQTyRD =
1929       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1930   LValue Base =
1931       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
1932   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
1933   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
1934   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
1935   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
1936
1937   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
1938   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
1939   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1940       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
1941       CGF.ConvertTypeForMem(SharedsPtrTy));
1942
1943   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
1944   llvm::Value *PrivatesParam;
1945   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
1946     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
1947     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1948         PrivatesLVal.getAddress(), CGF.VoidPtrTy);
1949   } else {
1950     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
1951   }
1952
1953   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
1954                              TaskPrivatesMap, SharedsParam};
1955   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
1956   CGF.EmitStoreThroughLValue(
1957       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
1958       CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
1959   CGF.FinishFunction();
1960   return TaskEntry;
1961 }
1962
1963 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
1964                                             SourceLocation Loc,
1965                                             QualType KmpInt32Ty,
1966                                             QualType KmpTaskTWithPrivatesPtrQTy,
1967                                             QualType KmpTaskTWithPrivatesQTy) {
1968   auto &C = CGM.getContext();
1969   FunctionArgList Args;
1970   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
1971   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
1972                                 /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
1973   Args.push_back(&GtidArg);
1974   Args.push_back(&TaskTypeArg);
1975   FunctionType::ExtInfo Info;
1976   auto &DestructorFnInfo =
1977       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
1978                                                     /*isVariadic=*/false);
1979   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
1980   auto *DestructorFn =
1981       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
1982                              ".omp_task_destructor.", &CGM.getModule());
1983   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
1984   CodeGenFunction CGF(CGM);
1985   CGF.disableDebugInfo();
1986   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
1987                     Args);
1988
1989   auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
1990       CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
1991   LValue Base =
1992       CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
1993   auto *KmpTaskTWithPrivatesQTyRD =
1994       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
1995   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
1996   Base = CGF.EmitLValueForField(Base, *FI);
1997   for (auto *Field :
1998        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
1999     if (auto DtorKind = Field->getType().isDestructedType()) {
2000       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2001       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2002     }
2003   }
2004   CGF.FinishFunction();
2005   return DestructorFn;
2006 }
2007
2008 /// \brief Emit a privates mapping function for correct handling of private and
2009 /// firstprivate variables.
2010 /// \code
2011 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2012 /// **noalias priv1,...,  <tyn> **noalias privn) {
2013 ///   *priv1 = &.privates.priv1;
2014 ///   ...;
2015 ///   *privn = &.privates.privn;
2016 /// }
2017 /// \endcode
2018 static llvm::Value *
2019 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2020                                const ArrayRef<const Expr *> PrivateVars,
2021                                const ArrayRef<const Expr *> FirstprivateVars,
2022                                QualType PrivatesQTy,
2023                                const ArrayRef<PrivateDataTy> Privates) {
2024   auto &C = CGM.getContext();
2025   FunctionArgList Args;
2026   ImplicitParamDecl TaskPrivatesArg(
2027       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2028       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2029   Args.push_back(&TaskPrivatesArg);
2030   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2031   unsigned Counter = 1;
2032   for (auto *E: PrivateVars) {
2033     Args.push_back(ImplicitParamDecl::Create(
2034         C, /*DC=*/nullptr, Loc,
2035         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2036                             .withConst()
2037                             .withRestrict()));
2038     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2039     PrivateVarsPos[VD] = Counter;
2040     ++Counter;
2041   }
2042   for (auto *E : FirstprivateVars) {
2043     Args.push_back(ImplicitParamDecl::Create(
2044         C, /*DC=*/nullptr, Loc,
2045         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2046                             .withConst()
2047                             .withRestrict()));
2048     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2049     PrivateVarsPos[VD] = Counter;
2050     ++Counter;
2051   }
2052   FunctionType::ExtInfo Info;
2053   auto &TaskPrivatesMapFnInfo =
2054       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2055                                                     /*isVariadic=*/false);
2056   auto *TaskPrivatesMapTy =
2057       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2058   auto *TaskPrivatesMap = llvm::Function::Create(
2059       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2060       ".omp_task_privates_map.", &CGM.getModule());
2061   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
2062                                 TaskPrivatesMap);
2063   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2064   CodeGenFunction CGF(CGM);
2065   CGF.disableDebugInfo();
2066   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2067                     TaskPrivatesMapFnInfo, Args);
2068
2069   // *privi = &.privates.privi;
2070   auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
2071       CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
2072   LValue Base =
2073       CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
2074   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2075   Counter = 0;
2076   for (auto *Field : PrivatesQTyRD->fields()) {
2077     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2078     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2079     auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
2080                                                   VD->getType());
2081     auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
2082     CGF.EmitStoreOfScalar(
2083         FieldLVal.getAddress(),
2084         CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
2085                                        RefLVal.getType()->getPointeeType()));
2086     ++Counter;
2087   }
2088   CGF.FinishFunction();
2089   return TaskPrivatesMap;
2090 }
2091
2092 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2093                                      const PrivateDataTy *P2) {
2094   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2095 }
2096
2097 void CGOpenMPRuntime::emitTaskCall(
2098     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2099     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2100     llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
2101     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2102     ArrayRef<const Expr *> PrivateCopies,
2103     ArrayRef<const Expr *> FirstprivateVars,
2104     ArrayRef<const Expr *> FirstprivateCopies,
2105     ArrayRef<const Expr *> FirstprivateInits,
2106     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2107   auto &C = CGM.getContext();
2108   llvm::SmallVector<PrivateDataTy, 8> Privates;
2109   // Aggregate privates and sort them by the alignment.
2110   auto I = PrivateCopies.begin();
2111   for (auto *E : PrivateVars) {
2112     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2113     Privates.push_back(std::make_pair(
2114         C.getTypeAlignInChars(VD->getType()),
2115         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2116                          /*PrivateElemInit=*/nullptr)));
2117     ++I;
2118   }
2119   I = FirstprivateCopies.begin();
2120   auto IElemInitRef = FirstprivateInits.begin();
2121   for (auto *E : FirstprivateVars) {
2122     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2123     Privates.push_back(std::make_pair(
2124         C.getTypeAlignInChars(VD->getType()),
2125         PrivateHelpersTy(
2126             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2127             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2128     ++I, ++IElemInitRef;
2129   }
2130   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2131                        array_pod_sort_comparator);
2132   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2133   // Build type kmp_routine_entry_t (if not built yet).
2134   emitKmpRoutineEntryT(KmpInt32Ty);
2135   // Build type kmp_task_t (if not built yet).
2136   if (KmpTaskTQTy.isNull()) {
2137     KmpTaskTQTy = C.getRecordType(
2138         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2139   }
2140   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2141   // Build particular struct kmp_task_t for the given task.
2142   auto *KmpTaskTWithPrivatesQTyRD =
2143       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2144   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2145   QualType KmpTaskTWithPrivatesPtrQTy =
2146       C.getPointerType(KmpTaskTWithPrivatesQTy);
2147   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2148   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2149   auto KmpTaskTWithPrivatesTySize =
2150       CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
2151   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2152
2153   // Emit initial values for private copies (if any).
2154   llvm::Value *TaskPrivatesMap = nullptr;
2155   auto *TaskPrivatesMapTy =
2156       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2157                 3)
2158           ->getType();
2159   if (!Privates.empty()) {
2160     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2161     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2162         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2163     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2164         TaskPrivatesMap, TaskPrivatesMapTy);
2165   } else {
2166     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2167         cast<llvm::PointerType>(TaskPrivatesMapTy));
2168   }
2169   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2170   // kmp_task_t *tt);
2171   auto *TaskEntry = emitProxyTaskFunction(
2172       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2173       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2174
2175   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2176   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2177   // kmp_routine_entry_t *task_entry);
2178   // Task flags. Format is taken from
2179   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2180   // description of kmp_tasking_flags struct.
2181   const unsigned TiedFlag = 0x1;
2182   const unsigned FinalFlag = 0x2;
2183   unsigned Flags = Tied ? TiedFlag : 0;
2184   auto *TaskFlags =
2185       Final.getPointer()
2186           ? CGF.Builder.CreateSelect(Final.getPointer(),
2187                                      CGF.Builder.getInt32(FinalFlag),
2188                                      CGF.Builder.getInt32(/*C=*/0))
2189           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2190   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2191   auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
2192   llvm::Value *AllocArgs[] = {
2193       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
2194       KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
2195       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
2196                                                       KmpRoutineEntryPtrTy)};
2197   auto *NewTask = CGF.EmitRuntimeCall(
2198       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2199   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2200       NewTask, KmpTaskTWithPrivatesPtrTy);
2201   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2202                                                KmpTaskTWithPrivatesQTy);
2203   LValue TDBase =
2204       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2205   // Fill the data in the resulting kmp_task_t record.
2206   // Copy shareds if there are any.
2207   llvm::Value *KmpTaskSharedsPtr = nullptr;
2208   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2209     KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
2210         CGF.EmitLValueForField(
2211             TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
2212         Loc);
2213     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2214   }
2215   // Emit initial values for private copies (if any).
2216   bool NeedsCleanup = false;
2217   if (!Privates.empty()) {
2218     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2219     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2220     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2221     LValue SharedsBase;
2222     if (!FirstprivateVars.empty()) {
2223       SharedsBase = CGF.MakeNaturalAlignAddrLValue(
2224           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2225               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2226           SharedsTy);
2227     }
2228     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2229         cast<CapturedStmt>(*D.getAssociatedStmt()));
2230     for (auto &&Pair : Privates) {
2231       auto *VD = Pair.second.PrivateCopy;
2232       auto *Init = VD->getAnyInitializer();
2233       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2234       if (Init) {
2235         if (auto *Elem = Pair.second.PrivateElemInit) {
2236           auto *OriginalVD = Pair.second.Original;
2237           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2238           auto SharedRefLValue =
2239               CGF.EmitLValueForField(SharedsBase, SharedField);
2240           QualType Type = OriginalVD->getType();
2241           if (Type->isArrayType()) {
2242             // Initialize firstprivate array.
2243             if (!isa<CXXConstructExpr>(Init) ||
2244                 CGF.isTrivialInitializer(Init)) {
2245               // Perform simple memcpy.
2246               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2247                                       SharedRefLValue.getAddress(), Type);
2248             } else {
2249               // Initialize firstprivate array using element-by-element
2250               // intialization.
2251               CGF.EmitOMPAggregateAssign(
2252                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2253                   Type, [&CGF, Elem, Init, &CapturesInfo](
2254                             llvm::Value *DestElement, llvm::Value *SrcElement) {
2255                     // Clean up any temporaries needed by the initialization.
2256                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2257                     InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
2258                       return SrcElement;
2259                     });
2260                     (void)InitScope.Privatize();
2261                     // Emit initialization for single element.
2262                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2263                         CGF, &CapturesInfo);
2264                     CGF.EmitAnyExprToMem(Init, DestElement,
2265                                          Init->getType().getQualifiers(),
2266                                          /*IsInitializer=*/false);
2267                   });
2268             }
2269           } else {
2270             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2271             InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
2272               return SharedRefLValue.getAddress();
2273             });
2274             (void)InitScope.Privatize();
2275             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2276             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2277                                /*capturedByInit=*/false);
2278           }
2279         } else {
2280           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2281         }
2282       }
2283       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2284       ++FI;
2285     }
2286   }
2287   // Provide pointer to function with destructors for privates.
2288   llvm::Value *DestructorFn =
2289       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2290                                              KmpTaskTWithPrivatesPtrQTy,
2291                                              KmpTaskTWithPrivatesQTy)
2292                    : llvm::ConstantPointerNull::get(
2293                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2294   LValue Destructor = CGF.EmitLValueForField(
2295       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2296   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2297                             DestructorFn, KmpRoutineEntryPtrTy),
2298                         Destructor);
2299
2300   // Process list of dependences.
2301   llvm::Value *DependInfo = nullptr;
2302   unsigned DependencesNumber = Dependences.size();
2303   if (!Dependences.empty()) {
2304     // Dependence kind for RTL.
2305     enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
2306     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2307     RecordDecl *KmpDependInfoRD;
2308     QualType FlagsTy = C.getIntTypeForBitwidth(
2309         C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false);
2310     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2311     if (KmpDependInfoTy.isNull()) {
2312       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2313       KmpDependInfoRD->startDefinition();
2314       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2315       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2316       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2317       KmpDependInfoRD->completeDefinition();
2318       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2319     } else {
2320       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2321     }
2322     // Define type kmp_depend_info[<Dependences.size()>];
2323     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2324         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()),
2325         ArrayType::Normal, /*IndexTypeQuals=*/0);
2326     // kmp_depend_info[<Dependences.size()>] deps;
2327     DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2328     for (unsigned i = 0; i < DependencesNumber; ++i) {
2329       auto Addr = CGF.EmitLValue(Dependences[i].second);
2330       auto *Size = llvm::ConstantInt::get(
2331           CGF.SizeTy,
2332           C.getTypeSizeInChars(Dependences[i].second->getType()).getQuantity());
2333       auto Base = CGF.MakeNaturalAlignAddrLValue(
2334           CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i),
2335           KmpDependInfoTy);
2336       // deps[i].base_addr = &<Dependences[i].second>;
2337       auto BaseAddrLVal = CGF.EmitLValueForField(
2338           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2339       CGF.EmitStoreOfScalar(
2340           CGF.Builder.CreatePtrToInt(Addr.getAddress(), CGF.IntPtrTy),
2341           BaseAddrLVal);
2342       // deps[i].len = sizeof(<Dependences[i].second>);
2343       auto LenLVal = CGF.EmitLValueForField(
2344           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2345       CGF.EmitStoreOfScalar(Size, LenLVal);
2346       // deps[i].flags = <Dependences[i].first>;
2347       RTLDependenceKindTy DepKind;
2348       switch (Dependences[i].first) {
2349       case OMPC_DEPEND_in:
2350         DepKind = DepIn;
2351         break;
2352       case OMPC_DEPEND_out:
2353         DepKind = DepOut;
2354         break;
2355       case OMPC_DEPEND_inout:
2356         DepKind = DepInOut;
2357         break;
2358       case OMPC_DEPEND_unknown:
2359         llvm_unreachable("Unknown task dependence type");
2360       }
2361       auto FlagsLVal = CGF.EmitLValueForField(
2362           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2363       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2364                             FlagsLVal);
2365     }
2366     DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2367         CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0),
2368         CGF.VoidPtrTy);
2369   }
2370
2371   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2372   // libcall.
2373   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2374   // *new_task);
2375   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2376   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2377   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2378   // list is not empty
2379   auto *ThreadID = getThreadID(CGF, Loc);
2380   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2381   llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
2382   llvm::Value *DepTaskArgs[] = {
2383       UpLoc,
2384       ThreadID,
2385       NewTask,
2386       DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
2387       DependInfo,
2388       DependInfo ? CGF.Builder.getInt32(0) : nullptr,
2389       DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
2390   auto &&ThenCodeGen = [this, DependInfo, &TaskArgs,
2391                         &DepTaskArgs](CodeGenFunction &CGF) {
2392     // TODO: add check for untied tasks.
2393     CGF.EmitRuntimeCall(
2394         createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps
2395                                          : OMPRTL__kmpc_omp_task),
2396         DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs));
2397   };
2398   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2399       IfCallEndCleanup;
2400   llvm::Value *DepWaitTaskArgs[] = {
2401       UpLoc,
2402       ThreadID,
2403       DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
2404       DependInfo,
2405       DependInfo ? CGF.Builder.getInt32(0) : nullptr,
2406       DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
2407   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2408                         DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2409     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2410     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2411     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2412     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2413     // is specified.
2414     if (DependInfo)
2415       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2416                           DepWaitTaskArgs);
2417     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2418     // kmp_task_t *new_task);
2419     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2420                         TaskArgs);
2421     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2422     // kmp_task_t *new_task);
2423     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2424         NormalAndEHCleanup,
2425         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2426         llvm::makeArrayRef(TaskArgs));
2427
2428     // Call proxy_task_entry(gtid, new_task);
2429     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2430     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2431   };
2432   if (IfCond) {
2433     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2434   } else {
2435     CodeGenFunction::RunCleanupsScope Scope(CGF);
2436     ThenCodeGen(CGF);
2437   }
2438 }
2439
2440 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2441                                           llvm::Type *ArgsType,
2442                                           ArrayRef<const Expr *> LHSExprs,
2443                                           ArrayRef<const Expr *> RHSExprs,
2444                                           ArrayRef<const Expr *> ReductionOps) {
2445   auto &C = CGM.getContext();
2446
2447   // void reduction_func(void *LHSArg, void *RHSArg);
2448   FunctionArgList Args;
2449   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2450                            C.VoidPtrTy);
2451   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2452                            C.VoidPtrTy);
2453   Args.push_back(&LHSArg);
2454   Args.push_back(&RHSArg);
2455   FunctionType::ExtInfo EI;
2456   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2457       C.VoidTy, Args, EI, /*isVariadic=*/false);
2458   auto *Fn = llvm::Function::Create(
2459       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2460       ".omp.reduction.reduction_func", &CGM.getModule());
2461   CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
2462   CodeGenFunction CGF(CGM);
2463   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2464
2465   // Dst = (void*[n])(LHSArg);
2466   // Src = (void*[n])(RHSArg);
2467   auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2468       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
2469                                     CGF.PointerAlignInBytes),
2470       ArgsType);
2471   auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2472       CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
2473                                     CGF.PointerAlignInBytes),
2474       ArgsType);
2475
2476   //  ...
2477   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2478   //  ...
2479   CodeGenFunction::OMPPrivateScope Scope(CGF);
2480   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
2481     Scope.addPrivate(
2482         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
2483         [&]() -> llvm::Value *{
2484           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2485               CGF.Builder.CreateAlignedLoad(
2486                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
2487                   CGM.PointerAlignInBytes),
2488               CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
2489         });
2490     Scope.addPrivate(
2491         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
2492         [&]() -> llvm::Value *{
2493           return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2494               CGF.Builder.CreateAlignedLoad(
2495                   CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
2496                   CGM.PointerAlignInBytes),
2497               CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
2498         });
2499   }
2500   Scope.Privatize();
2501   for (auto *E : ReductionOps) {
2502     CGF.EmitIgnoredExpr(E);
2503   }
2504   Scope.ForceCleanup();
2505   CGF.FinishFunction();
2506   return Fn;
2507 }
2508
2509 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2510                                     ArrayRef<const Expr *> LHSExprs,
2511                                     ArrayRef<const Expr *> RHSExprs,
2512                                     ArrayRef<const Expr *> ReductionOps,
2513                                     bool WithNowait, bool SimpleReduction) {
2514   // Next code should be emitted for reduction:
2515   //
2516   // static kmp_critical_name lock = { 0 };
2517   //
2518   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2519   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2520   //  ...
2521   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2522   //  *(Type<n>-1*)rhs[<n>-1]);
2523   // }
2524   //
2525   // ...
2526   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2527   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2528   // RedList, reduce_func, &<lock>)) {
2529   // case 1:
2530   //  ...
2531   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2532   //  ...
2533   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2534   // break;
2535   // case 2:
2536   //  ...
2537   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2538   //  ...
2539   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2540   // break;
2541   // default:;
2542   // }
2543   //
2544   // if SimpleReduction is true, only the next code is generated:
2545   //  ...
2546   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2547   //  ...
2548
2549   auto &C = CGM.getContext();
2550
2551   if (SimpleReduction) {
2552     CodeGenFunction::RunCleanupsScope Scope(CGF);
2553     for (auto *E : ReductionOps) {
2554       CGF.EmitIgnoredExpr(E);
2555     }
2556     return;
2557   }
2558
2559   // 1. Build a list of reduction variables.
2560   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2561   llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
2562   QualType ReductionArrayTy =
2563       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2564                              /*IndexTypeQuals=*/0);
2565   auto *ReductionList =
2566       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2567   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
2568     auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
2569     CGF.Builder.CreateAlignedStore(
2570         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2571             CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
2572         Elem, CGM.PointerAlignInBytes);
2573   }
2574
2575   // 2. Emit reduce_func().
2576   auto *ReductionFn = emitReductionFunction(
2577       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
2578       RHSExprs, ReductionOps);
2579
2580   // 3. Create static kmp_critical_name lock = { 0 };
2581   auto *Lock = getCriticalRegionLock(".reduction");
2582
2583   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2584   // RedList, reduce_func, &<lock>);
2585   auto *IdentTLoc = emitUpdateLocation(
2586       CGF, Loc,
2587       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2588   auto *ThreadId = getThreadID(CGF, Loc);
2589   auto *ReductionArrayTySize = llvm::ConstantInt::get(
2590       CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
2591   auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
2592                                                              CGF.VoidPtrTy);
2593   llvm::Value *Args[] = {
2594       IdentTLoc,                             // ident_t *<loc>
2595       ThreadId,                              // i32 <gtid>
2596       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2597       ReductionArrayTySize,                  // size_type sizeof(RedList)
2598       RL,                                    // void *RedList
2599       ReductionFn, // void (*) (void *, void *) <reduce_func>
2600       Lock         // kmp_critical_name *&<lock>
2601   };
2602   auto Res = CGF.EmitRuntimeCall(
2603       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2604                                        : OMPRTL__kmpc_reduce),
2605       Args);
2606
2607   // 5. Build switch(res)
2608   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2609   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2610
2611   // 6. Build case 1:
2612   //  ...
2613   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2614   //  ...
2615   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2616   // break;
2617   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2618   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2619   CGF.EmitBlock(Case1BB);
2620
2621   {
2622     CodeGenFunction::RunCleanupsScope Scope(CGF);
2623     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2624     llvm::Value *EndArgs[] = {
2625         IdentTLoc, // ident_t *<loc>
2626         ThreadId,  // i32 <gtid>
2627         Lock       // kmp_critical_name *&<lock>
2628     };
2629     CGF.EHStack
2630         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2631             NormalAndEHCleanup,
2632             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2633                                              : OMPRTL__kmpc_end_reduce),
2634             llvm::makeArrayRef(EndArgs));
2635     for (auto *E : ReductionOps) {
2636       CGF.EmitIgnoredExpr(E);
2637     }
2638   }
2639
2640   CGF.EmitBranch(DefaultBB);
2641
2642   // 7. Build case 2:
2643   //  ...
2644   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2645   //  ...
2646   // break;
2647   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
2648   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
2649   CGF.EmitBlock(Case2BB);
2650
2651   {
2652     CodeGenFunction::RunCleanupsScope Scope(CGF);
2653     if (!WithNowait) {
2654       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
2655       llvm::Value *EndArgs[] = {
2656           IdentTLoc, // ident_t *<loc>
2657           ThreadId,  // i32 <gtid>
2658           Lock       // kmp_critical_name *&<lock>
2659       };
2660       CGF.EHStack
2661           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2662               NormalAndEHCleanup,
2663               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
2664               llvm::makeArrayRef(EndArgs));
2665     }
2666     auto I = LHSExprs.begin();
2667     for (auto *E : ReductionOps) {
2668       const Expr *XExpr = nullptr;
2669       const Expr *EExpr = nullptr;
2670       const Expr *UpExpr = nullptr;
2671       BinaryOperatorKind BO = BO_Comma;
2672       if (auto *BO = dyn_cast<BinaryOperator>(E)) {
2673         if (BO->getOpcode() == BO_Assign) {
2674           XExpr = BO->getLHS();
2675           UpExpr = BO->getRHS();
2676         }
2677       }
2678       // Try to emit update expression as a simple atomic.
2679       auto *RHSExpr = UpExpr;
2680       if (RHSExpr) {
2681         // Analyze RHS part of the whole expression.
2682         if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
2683                 RHSExpr->IgnoreParenImpCasts())) {
2684           // If this is a conditional operator, analyze its condition for
2685           // min/max reduction operator.
2686           RHSExpr = ACO->getCond();
2687         }
2688         if (auto *BORHS =
2689                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
2690           EExpr = BORHS->getRHS();
2691           BO = BORHS->getOpcode();
2692         }
2693       }
2694       if (XExpr) {
2695         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
2696         LValue X = CGF.EmitLValue(XExpr);
2697         RValue E;
2698         if (EExpr)
2699           E = CGF.EmitAnyExpr(EExpr);
2700         CGF.EmitOMPAtomicSimpleUpdateExpr(
2701             X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
2702             [&CGF, UpExpr, VD](RValue XRValue) {
2703               CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
2704               PrivateScope.addPrivate(
2705                   VD, [&CGF, VD, XRValue]() -> llvm::Value *{
2706                     auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
2707                     CGF.EmitStoreThroughLValue(
2708                         XRValue,
2709                         CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
2710                     return LHSTemp;
2711                   });
2712               (void)PrivateScope.Privatize();
2713               return CGF.EmitAnyExpr(UpExpr);
2714             });
2715       } else {
2716         // Emit as a critical region.
2717         emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
2718           CGF.EmitIgnoredExpr(E);
2719         }, Loc);
2720       }
2721       ++I;
2722     }
2723   }
2724
2725   CGF.EmitBranch(DefaultBB);
2726   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
2727 }
2728
2729 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
2730                                        SourceLocation Loc) {
2731   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
2732   // global_tid);
2733   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2734   // Ignore return result until untied tasks are supported.
2735   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
2736 }
2737
2738 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
2739                                            OpenMPDirectiveKind InnerKind,
2740                                            const RegionCodeGenTy &CodeGen) {
2741   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind);
2742   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
2743 }
2744
2745 namespace {
2746 enum RTCancelKind {
2747   CancelNoreq = 0,
2748   CancelParallel = 1,
2749   CancelLoop = 2,
2750   CancelSections = 3,
2751   CancelTaskgroup = 4
2752 };
2753 }
2754
2755 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
2756   RTCancelKind CancelKind = CancelNoreq;
2757   if (CancelRegion == OMPD_parallel)
2758     CancelKind = CancelParallel;
2759   else if (CancelRegion == OMPD_for)
2760     CancelKind = CancelLoop;
2761   else if (CancelRegion == OMPD_sections)
2762     CancelKind = CancelSections;
2763   else {
2764     assert(CancelRegion == OMPD_taskgroup);
2765     CancelKind = CancelTaskgroup;
2766   }
2767   return CancelKind;
2768 }
2769
2770 void CGOpenMPRuntime::emitCancellationPointCall(
2771     CodeGenFunction &CGF, SourceLocation Loc,
2772     OpenMPDirectiveKind CancelRegion) {
2773   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2774   // global_tid, kmp_int32 cncl_kind);
2775   if (auto *OMPRegionInfo =
2776           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2777     auto CancelDest =
2778         CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2779     if (CancelDest.isValid()) {
2780       llvm::Value *Args[] = {
2781           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2782           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2783       // Ignore return result until untied tasks are supported.
2784       auto *Result = CGF.EmitRuntimeCall(
2785           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
2786       // if (__kmpc_cancellationpoint()) {
2787       //  __kmpc_cancel_barrier();
2788       //   exit from construct;
2789       // }
2790       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2791       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2792       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2793       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2794       CGF.EmitBlock(ExitBB);
2795       // __kmpc_cancel_barrier();
2796       emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
2797       // exit from construct;
2798       CGF.EmitBranchThroughCleanup(CancelDest);
2799       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2800     }
2801   }
2802 }
2803
2804 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
2805                                      OpenMPDirectiveKind CancelRegion) {
2806   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2807   // kmp_int32 cncl_kind);
2808   if (auto *OMPRegionInfo =
2809           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2810     auto CancelDest =
2811         CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2812     if (CancelDest.isValid()) {
2813       llvm::Value *Args[] = {
2814           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2815           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
2816       // Ignore return result until untied tasks are supported.
2817       auto *Result =
2818           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
2819       // if (__kmpc_cancel()) {
2820       //  __kmpc_cancel_barrier();
2821       //   exit from construct;
2822       // }
2823       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2824       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2825       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2826       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2827       CGF.EmitBlock(ExitBB);
2828       // __kmpc_cancel_barrier();
2829       emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
2830       // exit from construct;
2831       CGF.EmitBranchThroughCleanup(CancelDest);
2832       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2833     }
2834   }
2835 }
2836