]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Merge LLDB 3.8
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGOpenMPRuntime.cpp
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGOpenMPRuntime.h"
15 #include "CodeGenFunction.h"
16 #include "CGCleanup.h"
17 #include "clang/AST/Decl.h"
18 #include "clang/AST/StmtOpenMP.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/GlobalValue.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <cassert>
26
27 using namespace clang;
28 using namespace CodeGen;
29
30 namespace {
31 /// \brief Base class for handling code generation inside OpenMP regions.
32 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
33 public:
34   /// \brief Kinds of OpenMP regions used in codegen.
35   enum CGOpenMPRegionKind {
36     /// \brief Region with outlined function for standalone 'parallel'
37     /// directive.
38     ParallelOutlinedRegion,
39     /// \brief Region with outlined function for standalone 'task' directive.
40     TaskOutlinedRegion,
41     /// \brief Region for constructs that do not require function outlining,
42     /// like 'for', 'sections', 'atomic' etc. directives.
43     InlinedRegion,
44     /// \brief Region with outlined function for standalone 'target' directive.
45     TargetRegion,
46   };
47
48   CGOpenMPRegionInfo(const CapturedStmt &CS,
49                      const CGOpenMPRegionKind RegionKind,
50                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
51                      bool HasCancel)
52       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
53         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
54
55   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
56                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
57                      bool HasCancel)
58       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
59         Kind(Kind), HasCancel(HasCancel) {}
60
61   /// \brief Get a variable or parameter for storing global thread id
62   /// inside OpenMP construct.
63   virtual const VarDecl *getThreadIDVariable() const = 0;
64
65   /// \brief Emit the captured statement body.
66   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
67
68   /// \brief Get an LValue for the current ThreadID variable.
69   /// \return LValue for thread id variable. This LValue always has type int32*.
70   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
71
72   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
73
74   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
75
76   bool hasCancel() const { return HasCancel; }
77
78   static bool classof(const CGCapturedStmtInfo *Info) {
79     return Info->getKind() == CR_OpenMP;
80   }
81
82 protected:
83   CGOpenMPRegionKind RegionKind;
84   const RegionCodeGenTy &CodeGen;
85   OpenMPDirectiveKind Kind;
86   bool HasCancel;
87 };
88
89 /// \brief API for captured statement code generation in OpenMP constructs.
90 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
91 public:
92   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
93                              const RegionCodeGenTy &CodeGen,
94                              OpenMPDirectiveKind Kind, bool HasCancel)
95       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
96                            HasCancel),
97         ThreadIDVar(ThreadIDVar) {
98     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
99   }
100   /// \brief Get a variable or parameter for storing global thread id
101   /// inside OpenMP construct.
102   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
103
104   /// \brief Get the name of the capture helper.
105   StringRef getHelperName() const override { return ".omp_outlined."; }
106
107   static bool classof(const CGCapturedStmtInfo *Info) {
108     return CGOpenMPRegionInfo::classof(Info) &&
109            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
110                ParallelOutlinedRegion;
111   }
112
113 private:
114   /// \brief A variable or parameter storing global thread id for OpenMP
115   /// constructs.
116   const VarDecl *ThreadIDVar;
117 };
118
119 /// \brief API for captured statement code generation in OpenMP constructs.
120 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
121 public:
122   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
123                                  const VarDecl *ThreadIDVar,
124                                  const RegionCodeGenTy &CodeGen,
125                                  OpenMPDirectiveKind Kind, bool HasCancel)
126       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
127         ThreadIDVar(ThreadIDVar) {
128     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
129   }
130   /// \brief Get a variable or parameter for storing global thread id
131   /// inside OpenMP construct.
132   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133
134   /// \brief Get an LValue for the current ThreadID variable.
135   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
136
137   /// \brief Get the name of the capture helper.
138   StringRef getHelperName() const override { return ".omp_outlined."; }
139
140   static bool classof(const CGCapturedStmtInfo *Info) {
141     return CGOpenMPRegionInfo::classof(Info) &&
142            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
143                TaskOutlinedRegion;
144   }
145
146 private:
147   /// \brief A variable or parameter storing global thread id for OpenMP
148   /// constructs.
149   const VarDecl *ThreadIDVar;
150 };
151
152 /// \brief API for inlined captured statement code generation in OpenMP
153 /// constructs.
154 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
155 public:
156   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
157                             const RegionCodeGenTy &CodeGen,
158                             OpenMPDirectiveKind Kind, bool HasCancel)
159       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
160         OldCSI(OldCSI),
161         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
162   // \brief Retrieve the value of the context parameter.
163   llvm::Value *getContextValue() const override {
164     if (OuterRegionInfo)
165       return OuterRegionInfo->getContextValue();
166     llvm_unreachable("No context value for inlined OpenMP region");
167   }
168   void setContextValue(llvm::Value *V) override {
169     if (OuterRegionInfo) {
170       OuterRegionInfo->setContextValue(V);
171       return;
172     }
173     llvm_unreachable("No context value for inlined OpenMP region");
174   }
175   /// \brief Lookup the captured field decl for a variable.
176   const FieldDecl *lookup(const VarDecl *VD) const override {
177     if (OuterRegionInfo)
178       return OuterRegionInfo->lookup(VD);
179     // If there is no outer outlined region,no need to lookup in a list of
180     // captured variables, we can use the original one.
181     return nullptr;
182   }
183   FieldDecl *getThisFieldDecl() const override {
184     if (OuterRegionInfo)
185       return OuterRegionInfo->getThisFieldDecl();
186     return nullptr;
187   }
188   /// \brief Get a variable or parameter for storing global thread id
189   /// inside OpenMP construct.
190   const VarDecl *getThreadIDVariable() const override {
191     if (OuterRegionInfo)
192       return OuterRegionInfo->getThreadIDVariable();
193     return nullptr;
194   }
195
196   /// \brief Get the name of the capture helper.
197   StringRef getHelperName() const override {
198     if (auto *OuterRegionInfo = getOldCSI())
199       return OuterRegionInfo->getHelperName();
200     llvm_unreachable("No helper name for inlined OpenMP construct");
201   }
202
203   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
204
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
208   }
209
210 private:
211   /// \brief CodeGen info about outer OpenMP region.
212   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
213   CGOpenMPRegionInfo *OuterRegionInfo;
214 };
215
216 /// \brief API for captured statement code generation in OpenMP target
217 /// constructs. For this captures, implicit parameters are used instead of the
218 /// captured fields.
219 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
220 public:
221   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
222                            const RegionCodeGenTy &CodeGen)
223       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
224                            /*HasCancel = */ false) {}
225
226   /// \brief This is unused for target regions because each starts executing
227   /// with a single thread.
228   const VarDecl *getThreadIDVariable() const override { return nullptr; }
229
230   /// \brief Get the name of the capture helper.
231   StringRef getHelperName() const override { return ".omp_offloading."; }
232
233   static bool classof(const CGCapturedStmtInfo *Info) {
234     return CGOpenMPRegionInfo::classof(Info) &&
235            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
236   }
237 };
238
239 /// \brief RAII for emitting code of OpenMP constructs.
240 class InlinedOpenMPRegionRAII {
241   CodeGenFunction &CGF;
242
243 public:
244   /// \brief Constructs region for combined constructs.
245   /// \param CodeGen Code generation sequence for combined directives. Includes
246   /// a list of functions used for code generation of implicitly inlined
247   /// regions.
248   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
249                           OpenMPDirectiveKind Kind, bool HasCancel)
250       : CGF(CGF) {
251     // Start emission for the construct.
252     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
253         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
254   }
255   ~InlinedOpenMPRegionRAII() {
256     // Restore original CapturedStmtInfo only if we're done with code emission.
257     auto *OldCSI =
258         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
259     delete CGF.CapturedStmtInfo;
260     CGF.CapturedStmtInfo = OldCSI;
261   }
262 };
263
264 } // anonymous namespace
265
266 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
267                                       QualType Ty) {
268   AlignmentSource Source;
269   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
270   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
271                             Ty->getPointeeType(), Source);
272 }
273
274 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
275   return emitLoadOfPointerLValue(CGF,
276                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
277                                  getThreadIDVariable()->getType());
278 }
279
280 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
281   if (!CGF.HaveInsertPoint())
282     return;
283   // 1.2.2 OpenMP Language Terminology
284   // Structured block - An executable statement with a single entry at the
285   // top and a single exit at the bottom.
286   // The point of exit cannot be a branch out of the structured block.
287   // longjmp() and throw() must not violate the entry/exit criteria.
288   CGF.EHStack.pushTerminate();
289   {
290     CodeGenFunction::RunCleanupsScope Scope(CGF);
291     CodeGen(CGF);
292   }
293   CGF.EHStack.popTerminate();
294 }
295
296 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
297     CodeGenFunction &CGF) {
298   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
299                             getThreadIDVariable()->getType(),
300                             AlignmentSource::Decl);
301 }
302
303 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
304     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr) {
305   IdentTy = llvm::StructType::create(
306       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
307       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
308       CGM.Int8PtrTy /* psource */, nullptr);
309   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
310   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
311                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
312   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
313   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
314 }
315
316 void CGOpenMPRuntime::clear() {
317   InternalVars.clear();
318 }
319
320 // Layout information for ident_t.
321 static CharUnits getIdentAlign(CodeGenModule &CGM) {
322   return CGM.getPointerAlign();
323 }
324 static CharUnits getIdentSize(CodeGenModule &CGM) {
325   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
326   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
327 }
328 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
329   // All the fields except the last are i32, so this works beautifully.
330   return unsigned(Field) * CharUnits::fromQuantity(4);
331 }
332 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
333                                    CGOpenMPRuntime::IdentFieldIndex Field,
334                                    const llvm::Twine &Name = "") {
335   auto Offset = getOffsetOfIdentField(Field);
336   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
337 }
338
339 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
340     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
341     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
342   assert(ThreadIDVar->getType()->isPointerType() &&
343          "thread id variable must be of type kmp_int32 *");
344   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
345   CodeGenFunction CGF(CGM, true);
346   bool HasCancel = false;
347   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
348     HasCancel = OPD->hasCancel();
349   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
350     HasCancel = OPSD->hasCancel();
351   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
352     HasCancel = OPFD->hasCancel();
353   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
354                                     HasCancel);
355   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
356   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
357 }
358
359 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
360     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
361     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
362   assert(!ThreadIDVar->getType()->isPointerType() &&
363          "thread id variable must be of type kmp_int32 for tasks");
364   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
365   CodeGenFunction CGF(CGM, true);
366   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
367                                         InnermostKind,
368                                         cast<OMPTaskDirective>(D).hasCancel());
369   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
370   return CGF.GenerateCapturedStmtFunction(*CS);
371 }
372
373 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
374   CharUnits Align = getIdentAlign(CGM);
375   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
376   if (!Entry) {
377     if (!DefaultOpenMPPSource) {
378       // Initialize default location for psource field of ident_t structure of
379       // all ident_t objects. Format is ";file;function;line;column;;".
380       // Taken from
381       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
382       DefaultOpenMPPSource =
383           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
384       DefaultOpenMPPSource =
385           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
386     }
387     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
388         CGM.getModule(), IdentTy, /*isConstant*/ true,
389         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
390     DefaultOpenMPLocation->setUnnamedAddr(true);
391     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
392
393     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
394     llvm::Constant *Values[] = {Zero,
395                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
396                                 Zero, Zero, DefaultOpenMPPSource};
397     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
398     DefaultOpenMPLocation->setInitializer(Init);
399     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
400   }
401   return Address(Entry, Align);
402 }
403
404 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
405                                                  SourceLocation Loc,
406                                                  OpenMPLocationFlags Flags) {
407   // If no debug info is generated - return global default location.
408   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
409       Loc.isInvalid())
410     return getOrCreateDefaultLocation(Flags).getPointer();
411
412   assert(CGF.CurFn && "No function in current CodeGenFunction.");
413
414   Address LocValue = Address::invalid();
415   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
416   if (I != OpenMPLocThreadIDMap.end())
417     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
418
419   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
420   // GetOpenMPThreadID was called before this routine.
421   if (!LocValue.isValid()) {
422     // Generate "ident_t .kmpc_loc.addr;"
423     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
424                                       ".kmpc_loc.addr");
425     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
426     Elem.second.DebugLoc = AI.getPointer();
427     LocValue = AI;
428
429     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
430     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
431     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
432                              CGM.getSize(getIdentSize(CGF.CGM)));
433   }
434
435   // char **psource = &.kmpc_loc_<flags>.addr.psource;
436   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
437
438   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
439   if (OMPDebugLoc == nullptr) {
440     SmallString<128> Buffer2;
441     llvm::raw_svector_ostream OS2(Buffer2);
442     // Build debug location
443     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
444     OS2 << ";" << PLoc.getFilename() << ";";
445     if (const FunctionDecl *FD =
446             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
447       OS2 << FD->getQualifiedNameAsString();
448     }
449     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
450     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
451     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
452   }
453   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
454   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
455
456   // Our callers always pass this to a runtime function, so for
457   // convenience, go ahead and return a naked pointer.
458   return LocValue.getPointer();
459 }
460
461 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
462                                           SourceLocation Loc) {
463   assert(CGF.CurFn && "No function in current CodeGenFunction.");
464
465   llvm::Value *ThreadID = nullptr;
466   // Check whether we've already cached a load of the thread id in this
467   // function.
468   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
469   if (I != OpenMPLocThreadIDMap.end()) {
470     ThreadID = I->second.ThreadID;
471     if (ThreadID != nullptr)
472       return ThreadID;
473   }
474   if (auto OMPRegionInfo =
475           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
476     if (OMPRegionInfo->getThreadIDVariable()) {
477       // Check if this an outlined function with thread id passed as argument.
478       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
479       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
480       // If value loaded in entry block, cache it and use it everywhere in
481       // function.
482       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
483         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
484         Elem.second.ThreadID = ThreadID;
485       }
486       return ThreadID;
487     }
488   }
489
490   // This is not an outlined function region - need to call __kmpc_int32
491   // kmpc_global_thread_num(ident_t *loc).
492   // Generate thread id value and cache this value for use across the
493   // function.
494   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
495   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
496   ThreadID =
497       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
498                           emitUpdateLocation(CGF, Loc));
499   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
500   Elem.second.ThreadID = ThreadID;
501   return ThreadID;
502 }
503
504 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
505   assert(CGF.CurFn && "No function in current CodeGenFunction.");
506   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
507     OpenMPLocThreadIDMap.erase(CGF.CurFn);
508 }
509
510 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
511   return llvm::PointerType::getUnqual(IdentTy);
512 }
513
514 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
515   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
516 }
517
518 llvm::Constant *
519 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
520   llvm::Constant *RTLFn = nullptr;
521   switch (Function) {
522   case OMPRTL__kmpc_fork_call: {
523     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
524     // microtask, ...);
525     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
526                                 getKmpc_MicroPointerTy()};
527     llvm::FunctionType *FnTy =
528         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
529     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
530     break;
531   }
532   case OMPRTL__kmpc_global_thread_num: {
533     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
534     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
535     llvm::FunctionType *FnTy =
536         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
537     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
538     break;
539   }
540   case OMPRTL__kmpc_threadprivate_cached: {
541     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
542     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
543     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
544                                 CGM.VoidPtrTy, CGM.SizeTy,
545                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
546     llvm::FunctionType *FnTy =
547         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
548     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
549     break;
550   }
551   case OMPRTL__kmpc_critical: {
552     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
553     // kmp_critical_name *crit);
554     llvm::Type *TypeParams[] = {
555         getIdentTyPointerTy(), CGM.Int32Ty,
556         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
557     llvm::FunctionType *FnTy =
558         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
559     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
560     break;
561   }
562   case OMPRTL__kmpc_critical_with_hint: {
563     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
564     // kmp_critical_name *crit, uintptr_t hint);
565     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
566                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
567                                 CGM.IntPtrTy};
568     llvm::FunctionType *FnTy =
569         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
570     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
571     break;
572   }
573   case OMPRTL__kmpc_threadprivate_register: {
574     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
575     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
576     // typedef void *(*kmpc_ctor)(void *);
577     auto KmpcCtorTy =
578         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
579                                 /*isVarArg*/ false)->getPointerTo();
580     // typedef void *(*kmpc_cctor)(void *, void *);
581     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
582     auto KmpcCopyCtorTy =
583         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
584                                 /*isVarArg*/ false)->getPointerTo();
585     // typedef void (*kmpc_dtor)(void *);
586     auto KmpcDtorTy =
587         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
588             ->getPointerTo();
589     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
590                               KmpcCopyCtorTy, KmpcDtorTy};
591     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
592                                         /*isVarArg*/ false);
593     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
594     break;
595   }
596   case OMPRTL__kmpc_end_critical: {
597     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
598     // kmp_critical_name *crit);
599     llvm::Type *TypeParams[] = {
600         getIdentTyPointerTy(), CGM.Int32Ty,
601         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
602     llvm::FunctionType *FnTy =
603         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
604     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
605     break;
606   }
607   case OMPRTL__kmpc_cancel_barrier: {
608     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
609     // global_tid);
610     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
611     llvm::FunctionType *FnTy =
612         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
613     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
614     break;
615   }
616   case OMPRTL__kmpc_barrier: {
617     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
618     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
619     llvm::FunctionType *FnTy =
620         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
621     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
622     break;
623   }
624   case OMPRTL__kmpc_for_static_fini: {
625     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
626     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
627     llvm::FunctionType *FnTy =
628         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
629     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
630     break;
631   }
632   case OMPRTL__kmpc_push_num_threads: {
633     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
634     // kmp_int32 num_threads)
635     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
636                                 CGM.Int32Ty};
637     llvm::FunctionType *FnTy =
638         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
639     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
640     break;
641   }
642   case OMPRTL__kmpc_serialized_parallel: {
643     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
644     // global_tid);
645     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
646     llvm::FunctionType *FnTy =
647         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
648     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
649     break;
650   }
651   case OMPRTL__kmpc_end_serialized_parallel: {
652     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
653     // global_tid);
654     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
655     llvm::FunctionType *FnTy =
656         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
657     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
658     break;
659   }
660   case OMPRTL__kmpc_flush: {
661     // Build void __kmpc_flush(ident_t *loc);
662     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
663     llvm::FunctionType *FnTy =
664         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
665     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
666     break;
667   }
668   case OMPRTL__kmpc_master: {
669     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
670     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
671     llvm::FunctionType *FnTy =
672         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
673     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
674     break;
675   }
676   case OMPRTL__kmpc_end_master: {
677     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
678     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
679     llvm::FunctionType *FnTy =
680         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
681     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
682     break;
683   }
684   case OMPRTL__kmpc_omp_taskyield: {
685     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
686     // int end_part);
687     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
688     llvm::FunctionType *FnTy =
689         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
690     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
691     break;
692   }
693   case OMPRTL__kmpc_single: {
694     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
695     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
696     llvm::FunctionType *FnTy =
697         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
698     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
699     break;
700   }
701   case OMPRTL__kmpc_end_single: {
702     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
703     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
704     llvm::FunctionType *FnTy =
705         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
706     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
707     break;
708   }
709   case OMPRTL__kmpc_omp_task_alloc: {
710     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
711     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
712     // kmp_routine_entry_t *task_entry);
713     assert(KmpRoutineEntryPtrTy != nullptr &&
714            "Type kmp_routine_entry_t must be created.");
715     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
716                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
717     // Return void * and then cast to particular kmp_task_t type.
718     llvm::FunctionType *FnTy =
719         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
720     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
721     break;
722   }
723   case OMPRTL__kmpc_omp_task: {
724     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
725     // *new_task);
726     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
727                                 CGM.VoidPtrTy};
728     llvm::FunctionType *FnTy =
729         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
730     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
731     break;
732   }
733   case OMPRTL__kmpc_copyprivate: {
734     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
735     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
736     // kmp_int32 didit);
737     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
738     auto *CpyFnTy =
739         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
740     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
741                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
742                                 CGM.Int32Ty};
743     llvm::FunctionType *FnTy =
744         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
745     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
746     break;
747   }
748   case OMPRTL__kmpc_reduce: {
749     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
750     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
751     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
752     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
753     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
754                                                /*isVarArg=*/false);
755     llvm::Type *TypeParams[] = {
756         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
757         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
758         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
759     llvm::FunctionType *FnTy =
760         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
761     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
762     break;
763   }
764   case OMPRTL__kmpc_reduce_nowait: {
765     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
766     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
767     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
768     // *lck);
769     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
770     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
771                                                /*isVarArg=*/false);
772     llvm::Type *TypeParams[] = {
773         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
774         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
775         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
776     llvm::FunctionType *FnTy =
777         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
778     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
779     break;
780   }
781   case OMPRTL__kmpc_end_reduce: {
782     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
783     // kmp_critical_name *lck);
784     llvm::Type *TypeParams[] = {
785         getIdentTyPointerTy(), CGM.Int32Ty,
786         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
787     llvm::FunctionType *FnTy =
788         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
789     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
790     break;
791   }
792   case OMPRTL__kmpc_end_reduce_nowait: {
793     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
794     // kmp_critical_name *lck);
795     llvm::Type *TypeParams[] = {
796         getIdentTyPointerTy(), CGM.Int32Ty,
797         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
798     llvm::FunctionType *FnTy =
799         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
800     RTLFn =
801         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
802     break;
803   }
804   case OMPRTL__kmpc_omp_task_begin_if0: {
805     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
806     // *new_task);
807     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
808                                 CGM.VoidPtrTy};
809     llvm::FunctionType *FnTy =
810         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
811     RTLFn =
812         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
813     break;
814   }
815   case OMPRTL__kmpc_omp_task_complete_if0: {
816     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
817     // *new_task);
818     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
819                                 CGM.VoidPtrTy};
820     llvm::FunctionType *FnTy =
821         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
822     RTLFn = CGM.CreateRuntimeFunction(FnTy,
823                                       /*Name=*/"__kmpc_omp_task_complete_if0");
824     break;
825   }
826   case OMPRTL__kmpc_ordered: {
827     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
828     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
829     llvm::FunctionType *FnTy =
830         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
831     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
832     break;
833   }
834   case OMPRTL__kmpc_end_ordered: {
835     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
836     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
837     llvm::FunctionType *FnTy =
838         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
839     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
840     break;
841   }
842   case OMPRTL__kmpc_omp_taskwait: {
843     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
844     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
845     llvm::FunctionType *FnTy =
846         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
847     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
848     break;
849   }
850   case OMPRTL__kmpc_taskgroup: {
851     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
852     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
853     llvm::FunctionType *FnTy =
854         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
855     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
856     break;
857   }
858   case OMPRTL__kmpc_end_taskgroup: {
859     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
860     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
861     llvm::FunctionType *FnTy =
862         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
863     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
864     break;
865   }
866   case OMPRTL__kmpc_push_proc_bind: {
867     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
868     // int proc_bind)
869     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
870     llvm::FunctionType *FnTy =
871         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
872     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
873     break;
874   }
875   case OMPRTL__kmpc_omp_task_with_deps: {
876     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
877     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
878     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
879     llvm::Type *TypeParams[] = {
880         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
881         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
884     RTLFn =
885         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
886     break;
887   }
888   case OMPRTL__kmpc_omp_wait_deps: {
889     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
890     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
891     // kmp_depend_info_t *noalias_dep_list);
892     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
893                                 CGM.Int32Ty,           CGM.VoidPtrTy,
894                                 CGM.Int32Ty,           CGM.VoidPtrTy};
895     llvm::FunctionType *FnTy =
896         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
897     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
898     break;
899   }
900   case OMPRTL__kmpc_cancellationpoint: {
901     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
902     // global_tid, kmp_int32 cncl_kind)
903     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
904     llvm::FunctionType *FnTy =
905         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
906     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
907     break;
908   }
909   case OMPRTL__kmpc_cancel: {
910     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
911     // kmp_int32 cncl_kind)
912     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
913     llvm::FunctionType *FnTy =
914         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
915     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
916     break;
917   }
918   case OMPRTL__tgt_target: {
919     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
920     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
921     // *arg_types);
922     llvm::Type *TypeParams[] = {CGM.Int32Ty,
923                                 CGM.VoidPtrTy,
924                                 CGM.Int32Ty,
925                                 CGM.VoidPtrPtrTy,
926                                 CGM.VoidPtrPtrTy,
927                                 CGM.SizeTy->getPointerTo(),
928                                 CGM.Int32Ty->getPointerTo()};
929     llvm::FunctionType *FnTy =
930         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
931     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
932     break;
933   }
934   }
935   return RTLFn;
936 }
937
938 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
939   auto &C = CGF.getContext();
940   llvm::Value *Size = nullptr;
941   auto SizeInChars = C.getTypeSizeInChars(Ty);
942   if (SizeInChars.isZero()) {
943     // getTypeSizeInChars() returns 0 for a VLA.
944     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
945       llvm::Value *ArraySize;
946       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
947       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
948     }
949     SizeInChars = C.getTypeSizeInChars(Ty);
950     assert(!SizeInChars.isZero());
951     Size = CGF.Builder.CreateNUWMul(
952         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
953   } else
954     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
955   return Size;
956 }
957
958 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
959                                                              bool IVSigned) {
960   assert((IVSize == 32 || IVSize == 64) &&
961          "IV size is not compatible with the omp runtime");
962   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
963                                        : "__kmpc_for_static_init_4u")
964                            : (IVSigned ? "__kmpc_for_static_init_8"
965                                        : "__kmpc_for_static_init_8u");
966   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
967   auto PtrTy = llvm::PointerType::getUnqual(ITy);
968   llvm::Type *TypeParams[] = {
969     getIdentTyPointerTy(),                     // loc
970     CGM.Int32Ty,                               // tid
971     CGM.Int32Ty,                               // schedtype
972     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
973     PtrTy,                                     // p_lower
974     PtrTy,                                     // p_upper
975     PtrTy,                                     // p_stride
976     ITy,                                       // incr
977     ITy                                        // chunk
978   };
979   llvm::FunctionType *FnTy =
980       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
981   return CGM.CreateRuntimeFunction(FnTy, Name);
982 }
983
984 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
985                                                             bool IVSigned) {
986   assert((IVSize == 32 || IVSize == 64) &&
987          "IV size is not compatible with the omp runtime");
988   auto Name =
989       IVSize == 32
990           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
991           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
992   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
993   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
994                                CGM.Int32Ty,           // tid
995                                CGM.Int32Ty,           // schedtype
996                                ITy,                   // lower
997                                ITy,                   // upper
998                                ITy,                   // stride
999                                ITy                    // chunk
1000   };
1001   llvm::FunctionType *FnTy =
1002       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1003   return CGM.CreateRuntimeFunction(FnTy, Name);
1004 }
1005
1006 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1007                                                             bool IVSigned) {
1008   assert((IVSize == 32 || IVSize == 64) &&
1009          "IV size is not compatible with the omp runtime");
1010   auto Name =
1011       IVSize == 32
1012           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1013           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1014   llvm::Type *TypeParams[] = {
1015       getIdentTyPointerTy(), // loc
1016       CGM.Int32Ty,           // tid
1017   };
1018   llvm::FunctionType *FnTy =
1019       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1020   return CGM.CreateRuntimeFunction(FnTy, Name);
1021 }
1022
1023 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1024                                                             bool IVSigned) {
1025   assert((IVSize == 32 || IVSize == 64) &&
1026          "IV size is not compatible with the omp runtime");
1027   auto Name =
1028       IVSize == 32
1029           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1030           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1031   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1032   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1033   llvm::Type *TypeParams[] = {
1034     getIdentTyPointerTy(),                     // loc
1035     CGM.Int32Ty,                               // tid
1036     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1037     PtrTy,                                     // p_lower
1038     PtrTy,                                     // p_upper
1039     PtrTy                                      // p_stride
1040   };
1041   llvm::FunctionType *FnTy =
1042       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1043   return CGM.CreateRuntimeFunction(FnTy, Name);
1044 }
1045
1046 llvm::Constant *
1047 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1048   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1049          !CGM.getContext().getTargetInfo().isTLSSupported());
1050   // Lookup the entry, lazily creating it if necessary.
1051   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1052                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1053 }
1054
1055 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1056                                                 const VarDecl *VD,
1057                                                 Address VDAddr,
1058                                                 SourceLocation Loc) {
1059   if (CGM.getLangOpts().OpenMPUseTLS &&
1060       CGM.getContext().getTargetInfo().isTLSSupported())
1061     return VDAddr;
1062
1063   auto VarTy = VDAddr.getElementType();
1064   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1065                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1066                                                        CGM.Int8PtrTy),
1067                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1068                          getOrCreateThreadPrivateCache(VD)};
1069   return Address(CGF.EmitRuntimeCall(
1070       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1071                  VDAddr.getAlignment());
1072 }
1073
1074 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1075     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1076     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1077   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1078   // library.
1079   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1080   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1081                       OMPLoc);
1082   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1083   // to register constructor/destructor for variable.
1084   llvm::Value *Args[] = {OMPLoc,
1085                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1086                                                        CGM.VoidPtrTy),
1087                          Ctor, CopyCtor, Dtor};
1088   CGF.EmitRuntimeCall(
1089       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1090 }
1091
1092 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1093     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1094     bool PerformInit, CodeGenFunction *CGF) {
1095   if (CGM.getLangOpts().OpenMPUseTLS &&
1096       CGM.getContext().getTargetInfo().isTLSSupported())
1097     return nullptr;
1098
1099   VD = VD->getDefinition(CGM.getContext());
1100   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1101     ThreadPrivateWithDefinition.insert(VD);
1102     QualType ASTTy = VD->getType();
1103
1104     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1105     auto Init = VD->getAnyInitializer();
1106     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1107       // Generate function that re-emits the declaration's initializer into the
1108       // threadprivate copy of the variable VD
1109       CodeGenFunction CtorCGF(CGM);
1110       FunctionArgList Args;
1111       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1112                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1113       Args.push_back(&Dst);
1114
1115       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1116           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1117           /*isVariadic=*/false);
1118       auto FTy = CGM.getTypes().GetFunctionType(FI);
1119       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1120           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1121       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1122                             Args, SourceLocation());
1123       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1124           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1125           CGM.getContext().VoidPtrTy, Dst.getLocation());
1126       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1127       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1128                                              CtorCGF.ConvertTypeForMem(ASTTy));
1129       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1130                                /*IsInitializer=*/true);
1131       ArgVal = CtorCGF.EmitLoadOfScalar(
1132           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1133           CGM.getContext().VoidPtrTy, Dst.getLocation());
1134       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1135       CtorCGF.FinishFunction();
1136       Ctor = Fn;
1137     }
1138     if (VD->getType().isDestructedType() != QualType::DK_none) {
1139       // Generate function that emits destructor call for the threadprivate copy
1140       // of the variable VD
1141       CodeGenFunction DtorCGF(CGM);
1142       FunctionArgList Args;
1143       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1144                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1145       Args.push_back(&Dst);
1146
1147       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1148           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1149           /*isVariadic=*/false);
1150       auto FTy = CGM.getTypes().GetFunctionType(FI);
1151       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1152           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1153       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1154                             SourceLocation());
1155       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1156           DtorCGF.GetAddrOfLocalVar(&Dst),
1157           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1158       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1159                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1160                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1161       DtorCGF.FinishFunction();
1162       Dtor = Fn;
1163     }
1164     // Do not emit init function if it is not required.
1165     if (!Ctor && !Dtor)
1166       return nullptr;
1167
1168     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1169     auto CopyCtorTy =
1170         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1171                                 /*isVarArg=*/false)->getPointerTo();
1172     // Copying constructor for the threadprivate variable.
1173     // Must be NULL - reserved by runtime, but currently it requires that this
1174     // parameter is always NULL. Otherwise it fires assertion.
1175     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1176     if (Ctor == nullptr) {
1177       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1178                                             /*isVarArg=*/false)->getPointerTo();
1179       Ctor = llvm::Constant::getNullValue(CtorTy);
1180     }
1181     if (Dtor == nullptr) {
1182       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1183                                             /*isVarArg=*/false)->getPointerTo();
1184       Dtor = llvm::Constant::getNullValue(DtorTy);
1185     }
1186     if (!CGF) {
1187       auto InitFunctionTy =
1188           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1189       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1190           InitFunctionTy, ".__omp_threadprivate_init_.",
1191           CGM.getTypes().arrangeNullaryFunction());
1192       CodeGenFunction InitCGF(CGM);
1193       FunctionArgList ArgList;
1194       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1195                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1196                             Loc);
1197       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1198       InitCGF.FinishFunction();
1199       return InitFunction;
1200     }
1201     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1202   }
1203   return nullptr;
1204 }
1205
1206 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1207 /// function. Here is the logic:
1208 /// if (Cond) {
1209 ///   ThenGen();
1210 /// } else {
1211 ///   ElseGen();
1212 /// }
1213 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1214                             const RegionCodeGenTy &ThenGen,
1215                             const RegionCodeGenTy &ElseGen) {
1216   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1217
1218   // If the condition constant folds and can be elided, try to avoid emitting
1219   // the condition and the dead arm of the if/else.
1220   bool CondConstant;
1221   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1222     CodeGenFunction::RunCleanupsScope Scope(CGF);
1223     if (CondConstant) {
1224       ThenGen(CGF);
1225     } else {
1226       ElseGen(CGF);
1227     }
1228     return;
1229   }
1230
1231   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1232   // emit the conditional branch.
1233   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1234   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1235   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1236   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1237
1238   // Emit the 'then' code.
1239   CGF.EmitBlock(ThenBlock);
1240   {
1241     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1242     ThenGen(CGF);
1243   }
1244   CGF.EmitBranch(ContBlock);
1245   // Emit the 'else' code if present.
1246   {
1247     // There is no need to emit line number for unconditional branch.
1248     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1249     CGF.EmitBlock(ElseBlock);
1250   }
1251   {
1252     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1253     ElseGen(CGF);
1254   }
1255   {
1256     // There is no need to emit line number for unconditional branch.
1257     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1258     CGF.EmitBranch(ContBlock);
1259   }
1260   // Emit the continuation block for code after the if.
1261   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1262 }
1263
1264 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1265                                        llvm::Value *OutlinedFn,
1266                                        ArrayRef<llvm::Value *> CapturedVars,
1267                                        const Expr *IfCond) {
1268   if (!CGF.HaveInsertPoint())
1269     return;
1270   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1271   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1272                     RTLoc](CodeGenFunction &CGF) {
1273     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1274     llvm::Value *Args[] = {
1275         RTLoc,
1276         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1277         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1278     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1279     RealArgs.append(std::begin(Args), std::end(Args));
1280     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1281
1282     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1283     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1284   };
1285   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1286                     Loc](CodeGenFunction &CGF) {
1287     auto ThreadID = getThreadID(CGF, Loc);
1288     // Build calls:
1289     // __kmpc_serialized_parallel(&Loc, GTid);
1290     llvm::Value *Args[] = {RTLoc, ThreadID};
1291     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1292                         Args);
1293
1294     // OutlinedFn(&GTid, &zero, CapturedStruct);
1295     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1296     Address ZeroAddr =
1297       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1298                            /*Name*/ ".zero.addr");
1299     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1300     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1301     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1302     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1303     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1304     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1305
1306     // __kmpc_end_serialized_parallel(&Loc, GTid);
1307     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1308     CGF.EmitRuntimeCall(
1309         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1310   };
1311   if (IfCond) {
1312     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1313   } else {
1314     CodeGenFunction::RunCleanupsScope Scope(CGF);
1315     ThenGen(CGF);
1316   }
1317 }
1318
1319 // If we're inside an (outlined) parallel region, use the region info's
1320 // thread-ID variable (it is passed in a first argument of the outlined function
1321 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1322 // regular serial code region, get thread ID by calling kmp_int32
1323 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1324 // return the address of that temp.
1325 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1326                                              SourceLocation Loc) {
1327   if (auto OMPRegionInfo =
1328           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1329     if (OMPRegionInfo->getThreadIDVariable())
1330       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1331
1332   auto ThreadID = getThreadID(CGF, Loc);
1333   auto Int32Ty =
1334       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1335   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1336   CGF.EmitStoreOfScalar(ThreadID,
1337                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1338
1339   return ThreadIDTemp;
1340 }
1341
1342 llvm::Constant *
1343 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1344                                              const llvm::Twine &Name) {
1345   SmallString<256> Buffer;
1346   llvm::raw_svector_ostream Out(Buffer);
1347   Out << Name;
1348   auto RuntimeName = Out.str();
1349   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1350   if (Elem.second) {
1351     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1352            "OMP internal variable has different type than requested");
1353     return &*Elem.second;
1354   }
1355
1356   return Elem.second = new llvm::GlobalVariable(
1357              CGM.getModule(), Ty, /*IsConstant*/ false,
1358              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1359              Elem.first());
1360 }
1361
1362 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1363   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1364   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1365 }
1366
1367 namespace {
1368 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1369   llvm::Value *Callee;
1370   llvm::Value *Args[N];
1371
1372 public:
1373   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1374       : Callee(Callee) {
1375     assert(CleanupArgs.size() == N);
1376     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1377   }
1378   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1379     if (!CGF.HaveInsertPoint())
1380       return;
1381     CGF.EmitRuntimeCall(Callee, Args);
1382   }
1383 };
1384 } // anonymous namespace
1385
1386 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1387                                          StringRef CriticalName,
1388                                          const RegionCodeGenTy &CriticalOpGen,
1389                                          SourceLocation Loc, const Expr *Hint) {
1390   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1391   // CriticalOpGen();
1392   // __kmpc_end_critical(ident_t *, gtid, Lock);
1393   // Prepare arguments and build a call to __kmpc_critical
1394   if (!CGF.HaveInsertPoint())
1395     return;
1396   CodeGenFunction::RunCleanupsScope Scope(CGF);
1397   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1398                          getCriticalRegionLock(CriticalName)};
1399   if (Hint) {
1400     llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
1401                                                      std::end(Args));
1402     auto *HintVal = CGF.EmitScalarExpr(Hint);
1403     ArgsWithHint.push_back(
1404         CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
1405     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
1406                         ArgsWithHint);
1407   } else
1408     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1409   // Build a call to __kmpc_end_critical
1410   CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1411       NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1412       llvm::makeArrayRef(Args));
1413   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1414 }
1415
1416 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1417                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1418                        const RegionCodeGenTy &BodyOpGen) {
1419   llvm::Value *CallBool = CGF.EmitScalarConversion(
1420       IfCond,
1421       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1422       CGF.getContext().BoolTy, Loc);
1423
1424   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1425   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1426   // Generate the branch (If-stmt)
1427   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1428   CGF.EmitBlock(ThenBlock);
1429   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1430   // Emit the rest of bblocks/branches
1431   CGF.EmitBranch(ContBlock);
1432   CGF.EmitBlock(ContBlock, true);
1433 }
1434
1435 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1436                                        const RegionCodeGenTy &MasterOpGen,
1437                                        SourceLocation Loc) {
1438   if (!CGF.HaveInsertPoint())
1439     return;
1440   // if(__kmpc_master(ident_t *, gtid)) {
1441   //   MasterOpGen();
1442   //   __kmpc_end_master(ident_t *, gtid);
1443   // }
1444   // Prepare arguments and build a call to __kmpc_master
1445   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1446   auto *IsMaster =
1447       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1448   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1449       MasterCallEndCleanup;
1450   emitIfStmt(
1451       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1452         CodeGenFunction::RunCleanupsScope Scope(CGF);
1453         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1454             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1455             llvm::makeArrayRef(Args));
1456         MasterOpGen(CGF);
1457       });
1458 }
1459
1460 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1461                                         SourceLocation Loc) {
1462   if (!CGF.HaveInsertPoint())
1463     return;
1464   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1465   llvm::Value *Args[] = {
1466       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1467       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1468   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1469 }
1470
1471 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1472                                           const RegionCodeGenTy &TaskgroupOpGen,
1473                                           SourceLocation Loc) {
1474   if (!CGF.HaveInsertPoint())
1475     return;
1476   // __kmpc_taskgroup(ident_t *, gtid);
1477   // TaskgroupOpGen();
1478   // __kmpc_end_taskgroup(ident_t *, gtid);
1479   // Prepare arguments and build a call to __kmpc_taskgroup
1480   {
1481     CodeGenFunction::RunCleanupsScope Scope(CGF);
1482     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1483     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1484     // Build a call to __kmpc_end_taskgroup
1485     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1486         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1487         llvm::makeArrayRef(Args));
1488     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1489   }
1490 }
1491
1492 /// Given an array of pointers to variables, project the address of a
1493 /// given variable.
1494 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1495                                       unsigned Index, const VarDecl *Var) {
1496   // Pull out the pointer to the variable.
1497   Address PtrAddr =
1498       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1499   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1500
1501   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1502   Addr = CGF.Builder.CreateElementBitCast(
1503       Addr, CGF.ConvertTypeForMem(Var->getType()));
1504   return Addr;
1505 }
1506
1507 static llvm::Value *emitCopyprivateCopyFunction(
1508     CodeGenModule &CGM, llvm::Type *ArgsType,
1509     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1510     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1511   auto &C = CGM.getContext();
1512   // void copy_func(void *LHSArg, void *RHSArg);
1513   FunctionArgList Args;
1514   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1515                            C.VoidPtrTy);
1516   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1517                            C.VoidPtrTy);
1518   Args.push_back(&LHSArg);
1519   Args.push_back(&RHSArg);
1520   FunctionType::ExtInfo EI;
1521   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1522       C.VoidTy, Args, EI, /*isVariadic=*/false);
1523   auto *Fn = llvm::Function::Create(
1524       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1525       ".omp.copyprivate.copy_func", &CGM.getModule());
1526   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
1527   CodeGenFunction CGF(CGM);
1528   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1529   // Dest = (void*[n])(LHSArg);
1530   // Src = (void*[n])(RHSArg);
1531   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1532       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1533       ArgsType), CGF.getPointerAlign());
1534   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1535       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1536       ArgsType), CGF.getPointerAlign());
1537   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1538   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1539   // ...
1540   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1541   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1542     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1543     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1544
1545     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1546     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1547
1548     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1549     QualType Type = VD->getType();
1550     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1551   }
1552   CGF.FinishFunction();
1553   return Fn;
1554 }
1555
1556 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1557                                        const RegionCodeGenTy &SingleOpGen,
1558                                        SourceLocation Loc,
1559                                        ArrayRef<const Expr *> CopyprivateVars,
1560                                        ArrayRef<const Expr *> SrcExprs,
1561                                        ArrayRef<const Expr *> DstExprs,
1562                                        ArrayRef<const Expr *> AssignmentOps) {
1563   if (!CGF.HaveInsertPoint())
1564     return;
1565   assert(CopyprivateVars.size() == SrcExprs.size() &&
1566          CopyprivateVars.size() == DstExprs.size() &&
1567          CopyprivateVars.size() == AssignmentOps.size());
1568   auto &C = CGM.getContext();
1569   // int32 did_it = 0;
1570   // if(__kmpc_single(ident_t *, gtid)) {
1571   //   SingleOpGen();
1572   //   __kmpc_end_single(ident_t *, gtid);
1573   //   did_it = 1;
1574   // }
1575   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1576   // <copy_func>, did_it);
1577
1578   Address DidIt = Address::invalid();
1579   if (!CopyprivateVars.empty()) {
1580     // int32 did_it = 0;
1581     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1582     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1583     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1584   }
1585   // Prepare arguments and build a call to __kmpc_single
1586   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1587   auto *IsSingle =
1588       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1589   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1590       SingleCallEndCleanup;
1591   emitIfStmt(
1592       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1593         CodeGenFunction::RunCleanupsScope Scope(CGF);
1594         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1595             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1596             llvm::makeArrayRef(Args));
1597         SingleOpGen(CGF);
1598         if (DidIt.isValid()) {
1599           // did_it = 1;
1600           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1601         }
1602       });
1603   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1604   // <copy_func>, did_it);
1605   if (DidIt.isValid()) {
1606     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1607     auto CopyprivateArrayTy =
1608         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1609                                /*IndexTypeQuals=*/0);
1610     // Create a list of all private variables for copyprivate.
1611     Address CopyprivateList =
1612         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1613     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1614       Address Elem = CGF.Builder.CreateConstArrayGEP(
1615           CopyprivateList, I, CGF.getPointerSize());
1616       CGF.Builder.CreateStore(
1617           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1618               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1619           Elem);
1620     }
1621     // Build function that copies private values from single region to all other
1622     // threads in the corresponding parallel region.
1623     auto *CpyFn = emitCopyprivateCopyFunction(
1624         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1625         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1626     auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
1627     Address CL =
1628       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1629                                                       CGF.VoidPtrTy);
1630     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1631     llvm::Value *Args[] = {
1632         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1633         getThreadID(CGF, Loc),        // i32 <gtid>
1634         BufSize,                      // size_t <buf_size>
1635         CL.getPointer(),              // void *<copyprivate list>
1636         CpyFn,                        // void (*) (void *, void *) <copy_func>
1637         DidItVal                      // i32 did_it
1638     };
1639     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1640   }
1641 }
1642
1643 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1644                                         const RegionCodeGenTy &OrderedOpGen,
1645                                         SourceLocation Loc, bool IsThreads) {
1646   if (!CGF.HaveInsertPoint())
1647     return;
1648   // __kmpc_ordered(ident_t *, gtid);
1649   // OrderedOpGen();
1650   // __kmpc_end_ordered(ident_t *, gtid);
1651   // Prepare arguments and build a call to __kmpc_ordered
1652   CodeGenFunction::RunCleanupsScope Scope(CGF);
1653   if (IsThreads) {
1654     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1655     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1656     // Build a call to __kmpc_end_ordered
1657     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1658         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1659         llvm::makeArrayRef(Args));
1660   }
1661   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1662 }
1663
1664 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1665                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1666                                       bool ForceSimpleCall) {
1667   if (!CGF.HaveInsertPoint())
1668     return;
1669   // Build call __kmpc_cancel_barrier(loc, thread_id);
1670   // Build call __kmpc_barrier(loc, thread_id);
1671   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1672   if (Kind == OMPD_for) {
1673     Flags =
1674         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1675   } else if (Kind == OMPD_sections) {
1676     Flags = static_cast<OpenMPLocationFlags>(Flags |
1677                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1678   } else if (Kind == OMPD_single) {
1679     Flags =
1680         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1681   } else if (Kind == OMPD_barrier) {
1682     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1683   } else {
1684     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1685   }
1686   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1687   // thread_id);
1688   auto *OMPRegionInfo =
1689       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
1690   // Do not emit barrier call in the single directive emitted in some rare cases
1691   // for sections directives.
1692   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
1693     return;
1694   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1695                          getThreadID(CGF, Loc)};
1696   if (OMPRegionInfo) {
1697     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1698       auto *Result = CGF.EmitRuntimeCall(
1699           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1700       if (EmitChecks) {
1701         // if (__kmpc_cancel_barrier()) {
1702         //   exit from construct;
1703         // }
1704         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1705         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1706         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1707         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1708         CGF.EmitBlock(ExitBB);
1709         //   exit from construct;
1710         auto CancelDestination =
1711             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1712         CGF.EmitBranchThroughCleanup(CancelDestination);
1713         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1714       }
1715       return;
1716     }
1717   }
1718   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1719 }
1720
1721 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1722 /// the enum sched_type in kmp.h).
1723 enum OpenMPSchedType {
1724   /// \brief Lower bound for default (unordered) versions.
1725   OMP_sch_lower = 32,
1726   OMP_sch_static_chunked = 33,
1727   OMP_sch_static = 34,
1728   OMP_sch_dynamic_chunked = 35,
1729   OMP_sch_guided_chunked = 36,
1730   OMP_sch_runtime = 37,
1731   OMP_sch_auto = 38,
1732   /// \brief Lower bound for 'ordered' versions.
1733   OMP_ord_lower = 64,
1734   OMP_ord_static_chunked = 65,
1735   OMP_ord_static = 66,
1736   OMP_ord_dynamic_chunked = 67,
1737   OMP_ord_guided_chunked = 68,
1738   OMP_ord_runtime = 69,
1739   OMP_ord_auto = 70,
1740   OMP_sch_default = OMP_sch_static,
1741 };
1742
1743 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1744 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1745                                           bool Chunked, bool Ordered) {
1746   switch (ScheduleKind) {
1747   case OMPC_SCHEDULE_static:
1748     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1749                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1750   case OMPC_SCHEDULE_dynamic:
1751     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1752   case OMPC_SCHEDULE_guided:
1753     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1754   case OMPC_SCHEDULE_runtime:
1755     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1756   case OMPC_SCHEDULE_auto:
1757     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1758   case OMPC_SCHEDULE_unknown:
1759     assert(!Chunked && "chunk was specified but schedule kind not known");
1760     return Ordered ? OMP_ord_static : OMP_sch_static;
1761   }
1762   llvm_unreachable("Unexpected runtime schedule");
1763 }
1764
1765 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1766                                          bool Chunked) const {
1767   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1768   return Schedule == OMP_sch_static;
1769 }
1770
1771 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1772   auto Schedule =
1773       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1774   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1775   return Schedule != OMP_sch_static;
1776 }
1777
1778 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1779                                           SourceLocation Loc,
1780                                           OpenMPScheduleClauseKind ScheduleKind,
1781                                           unsigned IVSize, bool IVSigned,
1782                                           bool Ordered, llvm::Value *UB,
1783                                           llvm::Value *Chunk) {
1784   if (!CGF.HaveInsertPoint())
1785     return;
1786   OpenMPSchedType Schedule =
1787       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1788   assert(Ordered ||
1789          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1790           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1791   // Call __kmpc_dispatch_init(
1792   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1793   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1794   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1795
1796   // If the Chunk was not specified in the clause - use default value 1.
1797   if (Chunk == nullptr)
1798     Chunk = CGF.Builder.getIntN(IVSize, 1);
1799   llvm::Value *Args[] = {
1800     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1801     getThreadID(CGF, Loc),
1802     CGF.Builder.getInt32(Schedule), // Schedule type
1803     CGF.Builder.getIntN(IVSize, 0), // Lower
1804     UB,                             // Upper
1805     CGF.Builder.getIntN(IVSize, 1), // Stride
1806     Chunk                           // Chunk
1807   };
1808   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1809 }
1810
1811 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1812                                         SourceLocation Loc,
1813                                         OpenMPScheduleClauseKind ScheduleKind,
1814                                         unsigned IVSize, bool IVSigned,
1815                                         bool Ordered, Address IL, Address LB,
1816                                         Address UB, Address ST,
1817                                         llvm::Value *Chunk) {
1818   if (!CGF.HaveInsertPoint())
1819     return;
1820   OpenMPSchedType Schedule =
1821     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1822   assert(!Ordered);
1823   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1824          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1825
1826   // Call __kmpc_for_static_init(
1827   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1828   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1829   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1830   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1831   if (Chunk == nullptr) {
1832     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1833            "expected static non-chunked schedule");
1834     // If the Chunk was not specified in the clause - use default value 1.
1835       Chunk = CGF.Builder.getIntN(IVSize, 1);
1836   } else {
1837     assert((Schedule == OMP_sch_static_chunked ||
1838             Schedule == OMP_ord_static_chunked) &&
1839            "expected static chunked schedule");
1840   }
1841   llvm::Value *Args[] = {
1842     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1843     getThreadID(CGF, Loc),
1844     CGF.Builder.getInt32(Schedule), // Schedule type
1845     IL.getPointer(),                // &isLastIter
1846     LB.getPointer(),                // &LB
1847     UB.getPointer(),                // &UB
1848     ST.getPointer(),                // &Stride
1849     CGF.Builder.getIntN(IVSize, 1), // Incr
1850     Chunk                           // Chunk
1851   };
1852   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1853 }
1854
1855 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1856                                           SourceLocation Loc) {
1857   if (!CGF.HaveInsertPoint())
1858     return;
1859   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1860   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1861                          getThreadID(CGF, Loc)};
1862   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1863                       Args);
1864 }
1865
1866 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1867                                                  SourceLocation Loc,
1868                                                  unsigned IVSize,
1869                                                  bool IVSigned) {
1870   if (!CGF.HaveInsertPoint())
1871     return;
1872   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1873   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1874                          getThreadID(CGF, Loc)};
1875   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1876 }
1877
1878 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1879                                           SourceLocation Loc, unsigned IVSize,
1880                                           bool IVSigned, Address IL,
1881                                           Address LB, Address UB,
1882                                           Address ST) {
1883   // Call __kmpc_dispatch_next(
1884   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1885   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1886   //          kmp_int[32|64] *p_stride);
1887   llvm::Value *Args[] = {
1888       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1889       IL.getPointer(), // &isLastIter
1890       LB.getPointer(), // &Lower
1891       UB.getPointer(), // &Upper
1892       ST.getPointer()  // &Stride
1893   };
1894   llvm::Value *Call =
1895       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1896   return CGF.EmitScalarConversion(
1897       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1898       CGF.getContext().BoolTy, Loc);
1899 }
1900
1901 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1902                                            llvm::Value *NumThreads,
1903                                            SourceLocation Loc) {
1904   if (!CGF.HaveInsertPoint())
1905     return;
1906   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1907   llvm::Value *Args[] = {
1908       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1909       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1910   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1911                       Args);
1912 }
1913
1914 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1915                                          OpenMPProcBindClauseKind ProcBind,
1916                                          SourceLocation Loc) {
1917   if (!CGF.HaveInsertPoint())
1918     return;
1919   // Constants for proc bind value accepted by the runtime.
1920   enum ProcBindTy {
1921     ProcBindFalse = 0,
1922     ProcBindTrue,
1923     ProcBindMaster,
1924     ProcBindClose,
1925     ProcBindSpread,
1926     ProcBindIntel,
1927     ProcBindDefault
1928   } RuntimeProcBind;
1929   switch (ProcBind) {
1930   case OMPC_PROC_BIND_master:
1931     RuntimeProcBind = ProcBindMaster;
1932     break;
1933   case OMPC_PROC_BIND_close:
1934     RuntimeProcBind = ProcBindClose;
1935     break;
1936   case OMPC_PROC_BIND_spread:
1937     RuntimeProcBind = ProcBindSpread;
1938     break;
1939   case OMPC_PROC_BIND_unknown:
1940     llvm_unreachable("Unsupported proc_bind value.");
1941   }
1942   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1943   llvm::Value *Args[] = {
1944       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1945       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1946   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1947 }
1948
1949 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1950                                 SourceLocation Loc) {
1951   if (!CGF.HaveInsertPoint())
1952     return;
1953   // Build call void __kmpc_flush(ident_t *loc)
1954   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1955                       emitUpdateLocation(CGF, Loc));
1956 }
1957
1958 namespace {
1959 /// \brief Indexes of fields for type kmp_task_t.
1960 enum KmpTaskTFields {
1961   /// \brief List of shared variables.
1962   KmpTaskTShareds,
1963   /// \brief Task routine.
1964   KmpTaskTRoutine,
1965   /// \brief Partition id for the untied tasks.
1966   KmpTaskTPartId,
1967   /// \brief Function with call of destructors for private variables.
1968   KmpTaskTDestructors,
1969 };
1970 } // anonymous namespace
1971
1972 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
1973   if (!KmpRoutineEntryPtrTy) {
1974     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
1975     auto &C = CGM.getContext();
1976     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
1977     FunctionProtoType::ExtProtoInfo EPI;
1978     KmpRoutineEntryPtrQTy = C.getPointerType(
1979         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
1980     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
1981   }
1982 }
1983
1984 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1985                                        QualType FieldTy) {
1986   auto *Field = FieldDecl::Create(
1987       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1988       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1989       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1990   Field->setAccess(AS_public);
1991   DC->addDecl(Field);
1992   return Field;
1993 }
1994
1995 namespace {
1996 struct PrivateHelpersTy {
1997   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
1998                    const VarDecl *PrivateElemInit)
1999       : Original(Original), PrivateCopy(PrivateCopy),
2000         PrivateElemInit(PrivateElemInit) {}
2001   const VarDecl *Original;
2002   const VarDecl *PrivateCopy;
2003   const VarDecl *PrivateElemInit;
2004 };
2005 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2006 } // anonymous namespace
2007
2008 static RecordDecl *
2009 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2010   if (!Privates.empty()) {
2011     auto &C = CGM.getContext();
2012     // Build struct .kmp_privates_t. {
2013     //         /*  private vars  */
2014     //       };
2015     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
2016     RD->startDefinition();
2017     for (auto &&Pair : Privates) {
2018       auto *VD = Pair.second.Original;
2019       auto Type = VD->getType();
2020       Type = Type.getNonReferenceType();
2021       auto *FD = addFieldToRecordDecl(C, RD, Type);
2022       if (VD->hasAttrs()) {
2023         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2024              E(VD->getAttrs().end());
2025              I != E; ++I)
2026           FD->addAttr(*I);
2027       }
2028     }
2029     RD->completeDefinition();
2030     return RD;
2031   }
2032   return nullptr;
2033 }
2034
2035 static RecordDecl *
2036 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
2037                          QualType KmpRoutineEntryPointerQTy) {
2038   auto &C = CGM.getContext();
2039   // Build struct kmp_task_t {
2040   //         void *              shareds;
2041   //         kmp_routine_entry_t routine;
2042   //         kmp_int32           part_id;
2043   //         kmp_routine_entry_t destructors;
2044   //       };
2045   auto *RD = C.buildImplicitRecord("kmp_task_t");
2046   RD->startDefinition();
2047   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2048   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2049   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2050   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2051   RD->completeDefinition();
2052   return RD;
2053 }
2054
2055 static RecordDecl *
2056 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2057                                      ArrayRef<PrivateDataTy> Privates) {
2058   auto &C = CGM.getContext();
2059   // Build struct kmp_task_t_with_privates {
2060   //         kmp_task_t task_data;
2061   //         .kmp_privates_t. privates;
2062   //       };
2063   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2064   RD->startDefinition();
2065   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2066   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2067     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2068   }
2069   RD->completeDefinition();
2070   return RD;
2071 }
2072
2073 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2074 /// argument.
2075 /// \code
2076 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2077 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2078 ///   tt->shareds);
2079 ///   return 0;
2080 /// }
2081 /// \endcode
2082 static llvm::Value *
2083 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2084                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2085                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2086                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2087                       llvm::Value *TaskPrivatesMap) {
2088   auto &C = CGM.getContext();
2089   FunctionArgList Args;
2090   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2091   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2092                                 /*Id=*/nullptr,
2093                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2094   Args.push_back(&GtidArg);
2095   Args.push_back(&TaskTypeArg);
2096   FunctionType::ExtInfo Info;
2097   auto &TaskEntryFnInfo =
2098       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2099                                                     /*isVariadic=*/false);
2100   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2101   auto *TaskEntry =
2102       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2103                              ".omp_task_entry.", &CGM.getModule());
2104   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
2105   CodeGenFunction CGF(CGM);
2106   CGF.disableDebugInfo();
2107   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2108
2109   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2110   // tt->task_data.shareds);
2111   auto *GtidParam = CGF.EmitLoadOfScalar(
2112       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2113   LValue TDBase = emitLoadOfPointerLValue(
2114       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2115   auto *KmpTaskTWithPrivatesQTyRD =
2116       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2117   LValue Base =
2118       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2119   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2120   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2121   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2122   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2123
2124   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2125   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2126   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2127       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2128       CGF.ConvertTypeForMem(SharedsPtrTy));
2129
2130   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2131   llvm::Value *PrivatesParam;
2132   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2133     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2134     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2135         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2136   } else {
2137     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2138   }
2139
2140   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2141                              TaskPrivatesMap, SharedsParam};
2142   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2143   CGF.EmitStoreThroughLValue(
2144       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2145       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2146   CGF.FinishFunction();
2147   return TaskEntry;
2148 }
2149
2150 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2151                                             SourceLocation Loc,
2152                                             QualType KmpInt32Ty,
2153                                             QualType KmpTaskTWithPrivatesPtrQTy,
2154                                             QualType KmpTaskTWithPrivatesQTy) {
2155   auto &C = CGM.getContext();
2156   FunctionArgList Args;
2157   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2158   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2159                                 /*Id=*/nullptr,
2160                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2161   Args.push_back(&GtidArg);
2162   Args.push_back(&TaskTypeArg);
2163   FunctionType::ExtInfo Info;
2164   auto &DestructorFnInfo =
2165       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2166                                                     /*isVariadic=*/false);
2167   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2168   auto *DestructorFn =
2169       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2170                              ".omp_task_destructor.", &CGM.getModule());
2171   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
2172                                     DestructorFnInfo);
2173   CodeGenFunction CGF(CGM);
2174   CGF.disableDebugInfo();
2175   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2176                     Args);
2177
2178   LValue Base = emitLoadOfPointerLValue(
2179       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2180   auto *KmpTaskTWithPrivatesQTyRD =
2181       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2182   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2183   Base = CGF.EmitLValueForField(Base, *FI);
2184   for (auto *Field :
2185        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2186     if (auto DtorKind = Field->getType().isDestructedType()) {
2187       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2188       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2189     }
2190   }
2191   CGF.FinishFunction();
2192   return DestructorFn;
2193 }
2194
2195 /// \brief Emit a privates mapping function for correct handling of private and
2196 /// firstprivate variables.
2197 /// \code
2198 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2199 /// **noalias priv1,...,  <tyn> **noalias privn) {
2200 ///   *priv1 = &.privates.priv1;
2201 ///   ...;
2202 ///   *privn = &.privates.privn;
2203 /// }
2204 /// \endcode
2205 static llvm::Value *
2206 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2207                                ArrayRef<const Expr *> PrivateVars,
2208                                ArrayRef<const Expr *> FirstprivateVars,
2209                                QualType PrivatesQTy,
2210                                ArrayRef<PrivateDataTy> Privates) {
2211   auto &C = CGM.getContext();
2212   FunctionArgList Args;
2213   ImplicitParamDecl TaskPrivatesArg(
2214       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2215       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2216   Args.push_back(&TaskPrivatesArg);
2217   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2218   unsigned Counter = 1;
2219   for (auto *E: PrivateVars) {
2220     Args.push_back(ImplicitParamDecl::Create(
2221         C, /*DC=*/nullptr, Loc,
2222         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2223                             .withConst()
2224                             .withRestrict()));
2225     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2226     PrivateVarsPos[VD] = Counter;
2227     ++Counter;
2228   }
2229   for (auto *E : FirstprivateVars) {
2230     Args.push_back(ImplicitParamDecl::Create(
2231         C, /*DC=*/nullptr, Loc,
2232         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2233                             .withConst()
2234                             .withRestrict()));
2235     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2236     PrivateVarsPos[VD] = Counter;
2237     ++Counter;
2238   }
2239   FunctionType::ExtInfo Info;
2240   auto &TaskPrivatesMapFnInfo =
2241       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2242                                                     /*isVariadic=*/false);
2243   auto *TaskPrivatesMapTy =
2244       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2245   auto *TaskPrivatesMap = llvm::Function::Create(
2246       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2247       ".omp_task_privates_map.", &CGM.getModule());
2248   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
2249                                     TaskPrivatesMapFnInfo);
2250   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2251   CodeGenFunction CGF(CGM);
2252   CGF.disableDebugInfo();
2253   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2254                     TaskPrivatesMapFnInfo, Args);
2255
2256   // *privi = &.privates.privi;
2257   LValue Base = emitLoadOfPointerLValue(
2258       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2259   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2260   Counter = 0;
2261   for (auto *Field : PrivatesQTyRD->fields()) {
2262     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2263     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2264     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2265     auto RefLoadLVal =
2266         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2267     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2268     ++Counter;
2269   }
2270   CGF.FinishFunction();
2271   return TaskPrivatesMap;
2272 }
2273
2274 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2275                                      const PrivateDataTy *P2) {
2276   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2277 }
2278
2279 void CGOpenMPRuntime::emitTaskCall(
2280     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2281     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2282     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2283     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2284     ArrayRef<const Expr *> PrivateCopies,
2285     ArrayRef<const Expr *> FirstprivateVars,
2286     ArrayRef<const Expr *> FirstprivateCopies,
2287     ArrayRef<const Expr *> FirstprivateInits,
2288     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2289   if (!CGF.HaveInsertPoint())
2290     return;
2291   auto &C = CGM.getContext();
2292   llvm::SmallVector<PrivateDataTy, 8> Privates;
2293   // Aggregate privates and sort them by the alignment.
2294   auto I = PrivateCopies.begin();
2295   for (auto *E : PrivateVars) {
2296     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2297     Privates.push_back(std::make_pair(
2298         C.getDeclAlign(VD),
2299         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2300                          /*PrivateElemInit=*/nullptr)));
2301     ++I;
2302   }
2303   I = FirstprivateCopies.begin();
2304   auto IElemInitRef = FirstprivateInits.begin();
2305   for (auto *E : FirstprivateVars) {
2306     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2307     Privates.push_back(std::make_pair(
2308         C.getDeclAlign(VD),
2309         PrivateHelpersTy(
2310             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2311             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2312     ++I, ++IElemInitRef;
2313   }
2314   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2315                        array_pod_sort_comparator);
2316   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2317   // Build type kmp_routine_entry_t (if not built yet).
2318   emitKmpRoutineEntryT(KmpInt32Ty);
2319   // Build type kmp_task_t (if not built yet).
2320   if (KmpTaskTQTy.isNull()) {
2321     KmpTaskTQTy = C.getRecordType(
2322         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2323   }
2324   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2325   // Build particular struct kmp_task_t for the given task.
2326   auto *KmpTaskTWithPrivatesQTyRD =
2327       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2328   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2329   QualType KmpTaskTWithPrivatesPtrQTy =
2330       C.getPointerType(KmpTaskTWithPrivatesQTy);
2331   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2332   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2333   auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
2334   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2335
2336   // Emit initial values for private copies (if any).
2337   llvm::Value *TaskPrivatesMap = nullptr;
2338   auto *TaskPrivatesMapTy =
2339       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2340                 3)
2341           ->getType();
2342   if (!Privates.empty()) {
2343     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2344     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2345         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2346     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2347         TaskPrivatesMap, TaskPrivatesMapTy);
2348   } else {
2349     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2350         cast<llvm::PointerType>(TaskPrivatesMapTy));
2351   }
2352   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2353   // kmp_task_t *tt);
2354   auto *TaskEntry = emitProxyTaskFunction(
2355       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2356       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2357
2358   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2359   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2360   // kmp_routine_entry_t *task_entry);
2361   // Task flags. Format is taken from
2362   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2363   // description of kmp_tasking_flags struct.
2364   const unsigned TiedFlag = 0x1;
2365   const unsigned FinalFlag = 0x2;
2366   unsigned Flags = Tied ? TiedFlag : 0;
2367   auto *TaskFlags =
2368       Final.getPointer()
2369           ? CGF.Builder.CreateSelect(Final.getPointer(),
2370                                      CGF.Builder.getInt32(FinalFlag),
2371                                      CGF.Builder.getInt32(/*C=*/0))
2372           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2373   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2374   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
2375   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
2376                               getThreadID(CGF, Loc), TaskFlags,
2377                               KmpTaskTWithPrivatesTySize, SharedsSize,
2378                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2379                                   TaskEntry, KmpRoutineEntryPtrTy)};
2380   auto *NewTask = CGF.EmitRuntimeCall(
2381       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2382   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2383       NewTask, KmpTaskTWithPrivatesPtrTy);
2384   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2385                                                KmpTaskTWithPrivatesQTy);
2386   LValue TDBase =
2387       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2388   // Fill the data in the resulting kmp_task_t record.
2389   // Copy shareds if there are any.
2390   Address KmpTaskSharedsPtr = Address::invalid();
2391   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2392     KmpTaskSharedsPtr =
2393         Address(CGF.EmitLoadOfScalar(
2394                     CGF.EmitLValueForField(
2395                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2396                                            KmpTaskTShareds)),
2397                     Loc),
2398                 CGF.getNaturalTypeAlignment(SharedsTy));
2399     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2400   }
2401   // Emit initial values for private copies (if any).
2402   bool NeedsCleanup = false;
2403   if (!Privates.empty()) {
2404     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2405     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2406     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2407     LValue SharedsBase;
2408     if (!FirstprivateVars.empty()) {
2409       SharedsBase = CGF.MakeAddrLValue(
2410           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2411               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2412           SharedsTy);
2413     }
2414     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2415         cast<CapturedStmt>(*D.getAssociatedStmt()));
2416     for (auto &&Pair : Privates) {
2417       auto *VD = Pair.second.PrivateCopy;
2418       auto *Init = VD->getAnyInitializer();
2419       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2420       if (Init) {
2421         if (auto *Elem = Pair.second.PrivateElemInit) {
2422           auto *OriginalVD = Pair.second.Original;
2423           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2424           auto SharedRefLValue =
2425               CGF.EmitLValueForField(SharedsBase, SharedField);
2426           SharedRefLValue = CGF.MakeAddrLValue(
2427               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2428               SharedRefLValue.getType(), AlignmentSource::Decl);
2429           QualType Type = OriginalVD->getType();
2430           if (Type->isArrayType()) {
2431             // Initialize firstprivate array.
2432             if (!isa<CXXConstructExpr>(Init) ||
2433                 CGF.isTrivialInitializer(Init)) {
2434               // Perform simple memcpy.
2435               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2436                                       SharedRefLValue.getAddress(), Type);
2437             } else {
2438               // Initialize firstprivate array using element-by-element
2439               // intialization.
2440               CGF.EmitOMPAggregateAssign(
2441                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2442                   Type, [&CGF, Elem, Init, &CapturesInfo](
2443                             Address DestElement, Address SrcElement) {
2444                     // Clean up any temporaries needed by the initialization.
2445                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2446                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2447                       return SrcElement;
2448                     });
2449                     (void)InitScope.Privatize();
2450                     // Emit initialization for single element.
2451                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2452                         CGF, &CapturesInfo);
2453                     CGF.EmitAnyExprToMem(Init, DestElement,
2454                                          Init->getType().getQualifiers(),
2455                                          /*IsInitializer=*/false);
2456                   });
2457             }
2458           } else {
2459             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2460             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2461               return SharedRefLValue.getAddress();
2462             });
2463             (void)InitScope.Privatize();
2464             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2465             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2466                                /*capturedByInit=*/false);
2467           }
2468         } else {
2469           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2470         }
2471       }
2472       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2473       ++FI;
2474     }
2475   }
2476   // Provide pointer to function with destructors for privates.
2477   llvm::Value *DestructorFn =
2478       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2479                                              KmpTaskTWithPrivatesPtrQTy,
2480                                              KmpTaskTWithPrivatesQTy)
2481                    : llvm::ConstantPointerNull::get(
2482                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2483   LValue Destructor = CGF.EmitLValueForField(
2484       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2485   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2486                             DestructorFn, KmpRoutineEntryPtrTy),
2487                         Destructor);
2488
2489   // Process list of dependences.
2490   Address DependenciesArray = Address::invalid();
2491   unsigned NumDependencies = Dependences.size();
2492   if (NumDependencies) {
2493     // Dependence kind for RTL.
2494     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
2495     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2496     RecordDecl *KmpDependInfoRD;
2497     QualType FlagsTy =
2498         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
2499     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2500     if (KmpDependInfoTy.isNull()) {
2501       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2502       KmpDependInfoRD->startDefinition();
2503       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2504       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2505       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2506       KmpDependInfoRD->completeDefinition();
2507       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2508     } else {
2509       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2510     }
2511     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2512     // Define type kmp_depend_info[<Dependences.size()>];
2513     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2514         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2515         ArrayType::Normal, /*IndexTypeQuals=*/0);
2516     // kmp_depend_info[<Dependences.size()>] deps;
2517     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2518     for (unsigned i = 0; i < NumDependencies; ++i) {
2519       const Expr *E = Dependences[i].second;
2520       auto Addr = CGF.EmitLValue(E);
2521       llvm::Value *Size;
2522       QualType Ty = E->getType();
2523       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
2524         LValue UpAddrLVal =
2525             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
2526         llvm::Value *UpAddr =
2527             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
2528         llvm::Value *LowIntPtr =
2529             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
2530         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
2531         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
2532       } else
2533         Size = getTypeSize(CGF, Ty);
2534       auto Base = CGF.MakeAddrLValue(
2535           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
2536           KmpDependInfoTy);
2537       // deps[i].base_addr = &<Dependences[i].second>;
2538       auto BaseAddrLVal = CGF.EmitLValueForField(
2539           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
2540       CGF.EmitStoreOfScalar(
2541           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
2542           BaseAddrLVal);
2543       // deps[i].len = sizeof(<Dependences[i].second>);
2544       auto LenLVal = CGF.EmitLValueForField(
2545           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
2546       CGF.EmitStoreOfScalar(Size, LenLVal);
2547       // deps[i].flags = <Dependences[i].first>;
2548       RTLDependenceKindTy DepKind;
2549       switch (Dependences[i].first) {
2550       case OMPC_DEPEND_in:
2551         DepKind = DepIn;
2552         break;
2553       // Out and InOut dependencies must use the same code.
2554       case OMPC_DEPEND_out:
2555       case OMPC_DEPEND_inout:
2556         DepKind = DepInOut;
2557         break;
2558       case OMPC_DEPEND_source:
2559       case OMPC_DEPEND_sink:
2560       case OMPC_DEPEND_unknown:
2561         llvm_unreachable("Unknown task dependence type");
2562       }
2563       auto FlagsLVal = CGF.EmitLValueForField(
2564           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
2565       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
2566                             FlagsLVal);
2567     }
2568     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2569         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
2570         CGF.VoidPtrTy);
2571   }
2572
2573   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
2574   // libcall.
2575   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2576   // *new_task);
2577   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2578   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2579   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
2580   // list is not empty
2581   auto *ThreadID = getThreadID(CGF, Loc);
2582   auto *UpLoc = emitUpdateLocation(CGF, Loc);
2583   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
2584   llvm::Value *DepTaskArgs[7];
2585   if (NumDependencies) {
2586     DepTaskArgs[0] = UpLoc;
2587     DepTaskArgs[1] = ThreadID;
2588     DepTaskArgs[2] = NewTask;
2589     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
2590     DepTaskArgs[4] = DependenciesArray.getPointer();
2591     DepTaskArgs[5] = CGF.Builder.getInt32(0);
2592     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2593   }
2594   auto &&ThenCodeGen = [this, NumDependencies,
2595                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
2596     // TODO: add check for untied tasks.    
2597     if (NumDependencies) {
2598       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
2599                           DepTaskArgs);
2600     } else {
2601       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
2602                           TaskArgs);
2603     }
2604   };
2605   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
2606       IfCallEndCleanup;
2607
2608   llvm::Value *DepWaitTaskArgs[6];
2609   if (NumDependencies) {
2610     DepWaitTaskArgs[0] = UpLoc;
2611     DepWaitTaskArgs[1] = ThreadID;
2612     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
2613     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
2614     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
2615     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2616   }
2617   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
2618                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
2619     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
2620     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2621     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
2622     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
2623     // is specified.
2624     if (NumDependencies)
2625       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
2626                           DepWaitTaskArgs);
2627     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
2628     // kmp_task_t *new_task);
2629     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
2630                         TaskArgs);
2631     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
2632     // kmp_task_t *new_task);
2633     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
2634         NormalAndEHCleanup,
2635         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
2636         llvm::makeArrayRef(TaskArgs));
2637
2638     // Call proxy_task_entry(gtid, new_task);
2639     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
2640     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
2641   };
2642
2643   if (IfCond) {
2644     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
2645   } else {
2646     CodeGenFunction::RunCleanupsScope Scope(CGF);
2647     ThenCodeGen(CGF);
2648   }
2649 }
2650
2651 /// \brief Emit reduction operation for each element of array (required for
2652 /// array sections) LHS op = RHS.
2653 /// \param Type Type of array.
2654 /// \param LHSVar Variable on the left side of the reduction operation
2655 /// (references element of array in original variable).
2656 /// \param RHSVar Variable on the right side of the reduction operation
2657 /// (references element of array in original variable).
2658 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
2659 /// RHSVar.
2660 static void EmitOMPAggregateReduction(
2661     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
2662     const VarDecl *RHSVar,
2663     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
2664                                   const Expr *, const Expr *)> &RedOpGen,
2665     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
2666     const Expr *UpExpr = nullptr) {
2667   // Perform element-by-element initialization.
2668   QualType ElementTy;
2669   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
2670   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
2671
2672   // Drill down to the base element type on both arrays.
2673   auto ArrayTy = Type->getAsArrayTypeUnsafe();
2674   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
2675
2676   auto RHSBegin = RHSAddr.getPointer();
2677   auto LHSBegin = LHSAddr.getPointer();
2678   // Cast from pointer to array type to pointer to single element.
2679   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
2680   // The basic structure here is a while-do loop.
2681   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
2682   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
2683   auto IsEmpty =
2684       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
2685   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
2686
2687   // Enter the loop body, making that address the current address.
2688   auto EntryBB = CGF.Builder.GetInsertBlock();
2689   CGF.EmitBlock(BodyBB);
2690
2691   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
2692
2693   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
2694       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
2695   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
2696   Address RHSElementCurrent =
2697       Address(RHSElementPHI,
2698               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2699
2700   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
2701       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
2702   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
2703   Address LHSElementCurrent =
2704       Address(LHSElementPHI,
2705               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
2706
2707   // Emit copy.
2708   CodeGenFunction::OMPPrivateScope Scope(CGF);
2709   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
2710   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
2711   Scope.Privatize();
2712   RedOpGen(CGF, XExpr, EExpr, UpExpr);
2713   Scope.ForceCleanup();
2714
2715   // Shift the address forward by one element.
2716   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
2717       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
2718   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
2719       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
2720   // Check whether we've reached the end.
2721   auto Done =
2722       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
2723   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
2724   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
2725   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
2726
2727   // Done.
2728   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
2729 }
2730
2731 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
2732                                           llvm::Type *ArgsType,
2733                                           ArrayRef<const Expr *> Privates,
2734                                           ArrayRef<const Expr *> LHSExprs,
2735                                           ArrayRef<const Expr *> RHSExprs,
2736                                           ArrayRef<const Expr *> ReductionOps) {
2737   auto &C = CGM.getContext();
2738
2739   // void reduction_func(void *LHSArg, void *RHSArg);
2740   FunctionArgList Args;
2741   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2742                            C.VoidPtrTy);
2743   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
2744                            C.VoidPtrTy);
2745   Args.push_back(&LHSArg);
2746   Args.push_back(&RHSArg);
2747   FunctionType::ExtInfo EI;
2748   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2749       C.VoidTy, Args, EI, /*isVariadic=*/false);
2750   auto *Fn = llvm::Function::Create(
2751       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
2752       ".omp.reduction.reduction_func", &CGM.getModule());
2753   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2754   CodeGenFunction CGF(CGM);
2755   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2756
2757   // Dst = (void*[n])(LHSArg);
2758   // Src = (void*[n])(RHSArg);
2759   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2760       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2761       ArgsType), CGF.getPointerAlign());
2762   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2763       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2764       ArgsType), CGF.getPointerAlign());
2765
2766   //  ...
2767   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
2768   //  ...
2769   CodeGenFunction::OMPPrivateScope Scope(CGF);
2770   auto IPriv = Privates.begin();
2771   unsigned Idx = 0;
2772   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
2773     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
2774     Scope.addPrivate(RHSVar, [&]() -> Address {
2775       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
2776     });
2777     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
2778     Scope.addPrivate(LHSVar, [&]() -> Address {
2779       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
2780     });
2781     QualType PrivTy = (*IPriv)->getType();
2782     if (PrivTy->isArrayType()) {
2783       // Get array size and emit VLA type.
2784       ++Idx;
2785       Address Elem =
2786           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
2787       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
2788       CodeGenFunction::OpaqueValueMapping OpaqueMap(
2789           CGF,
2790           cast<OpaqueValueExpr>(
2791               CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
2792           RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
2793       CGF.EmitVariablyModifiedType(PrivTy);
2794     }
2795   }
2796   Scope.Privatize();
2797   IPriv = Privates.begin();
2798   auto ILHS = LHSExprs.begin();
2799   auto IRHS = RHSExprs.begin();
2800   for (auto *E : ReductionOps) {
2801     if ((*IPriv)->getType()->isArrayType()) {
2802       // Emit reduction for array section.
2803       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2804       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2805       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2806                                 [=](CodeGenFunction &CGF, const Expr *,
2807                                     const Expr *,
2808                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
2809     } else
2810       // Emit reduction for array subscript or single variable.
2811       CGF.EmitIgnoredExpr(E);
2812     ++IPriv, ++ILHS, ++IRHS;
2813   }
2814   Scope.ForceCleanup();
2815   CGF.FinishFunction();
2816   return Fn;
2817 }
2818
2819 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
2820                                     ArrayRef<const Expr *> Privates,
2821                                     ArrayRef<const Expr *> LHSExprs,
2822                                     ArrayRef<const Expr *> RHSExprs,
2823                                     ArrayRef<const Expr *> ReductionOps,
2824                                     bool WithNowait, bool SimpleReduction) {
2825   if (!CGF.HaveInsertPoint())
2826     return;
2827   // Next code should be emitted for reduction:
2828   //
2829   // static kmp_critical_name lock = { 0 };
2830   //
2831   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
2832   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
2833   //  ...
2834   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
2835   //  *(Type<n>-1*)rhs[<n>-1]);
2836   // }
2837   //
2838   // ...
2839   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
2840   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2841   // RedList, reduce_func, &<lock>)) {
2842   // case 1:
2843   //  ...
2844   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2845   //  ...
2846   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2847   // break;
2848   // case 2:
2849   //  ...
2850   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
2851   //  ...
2852   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
2853   // break;
2854   // default:;
2855   // }
2856   //
2857   // if SimpleReduction is true, only the next code is generated:
2858   //  ...
2859   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2860   //  ...
2861
2862   auto &C = CGM.getContext();
2863
2864   if (SimpleReduction) {
2865     CodeGenFunction::RunCleanupsScope Scope(CGF);
2866     auto IPriv = Privates.begin();
2867     auto ILHS = LHSExprs.begin();
2868     auto IRHS = RHSExprs.begin();
2869     for (auto *E : ReductionOps) {
2870       if ((*IPriv)->getType()->isArrayType()) {
2871         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2872         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2873         EmitOMPAggregateReduction(
2874             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2875             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2876                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2877       } else
2878         CGF.EmitIgnoredExpr(E);
2879       ++IPriv, ++ILHS, ++IRHS;
2880     }
2881     return;
2882   }
2883
2884   // 1. Build a list of reduction variables.
2885   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
2886   auto Size = RHSExprs.size();
2887   for (auto *E : Privates) {
2888     if (E->getType()->isArrayType())
2889       // Reserve place for array size.
2890       ++Size;
2891   }
2892   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
2893   QualType ReductionArrayTy =
2894       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2895                              /*IndexTypeQuals=*/0);
2896   Address ReductionList =
2897       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
2898   auto IPriv = Privates.begin();
2899   unsigned Idx = 0;
2900   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
2901     Address Elem =
2902       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
2903     CGF.Builder.CreateStore(
2904         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2905             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
2906         Elem);
2907     if ((*IPriv)->getType()->isArrayType()) {
2908       // Store array size.
2909       ++Idx;
2910       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
2911                                              CGF.getPointerSize());
2912       CGF.Builder.CreateStore(
2913           CGF.Builder.CreateIntToPtr(
2914               CGF.Builder.CreateIntCast(
2915                   CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
2916                                      (*IPriv)->getType()))
2917                       .first,
2918                   CGF.SizeTy, /*isSigned=*/false),
2919               CGF.VoidPtrTy),
2920           Elem);
2921     }
2922   }
2923
2924   // 2. Emit reduce_func().
2925   auto *ReductionFn = emitReductionFunction(
2926       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
2927       LHSExprs, RHSExprs, ReductionOps);
2928
2929   // 3. Create static kmp_critical_name lock = { 0 };
2930   auto *Lock = getCriticalRegionLock(".reduction");
2931
2932   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
2933   // RedList, reduce_func, &<lock>);
2934   auto *IdentTLoc = emitUpdateLocation(
2935       CGF, Loc,
2936       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
2937   auto *ThreadId = getThreadID(CGF, Loc);
2938   auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
2939   auto *RL =
2940     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
2941                                                     CGF.VoidPtrTy);
2942   llvm::Value *Args[] = {
2943       IdentTLoc,                             // ident_t *<loc>
2944       ThreadId,                              // i32 <gtid>
2945       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
2946       ReductionArrayTySize,                  // size_type sizeof(RedList)
2947       RL,                                    // void *RedList
2948       ReductionFn, // void (*) (void *, void *) <reduce_func>
2949       Lock         // kmp_critical_name *&<lock>
2950   };
2951   auto Res = CGF.EmitRuntimeCall(
2952       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
2953                                        : OMPRTL__kmpc_reduce),
2954       Args);
2955
2956   // 5. Build switch(res)
2957   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
2958   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
2959
2960   // 6. Build case 1:
2961   //  ...
2962   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
2963   //  ...
2964   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2965   // break;
2966   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
2967   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
2968   CGF.EmitBlock(Case1BB);
2969
2970   {
2971     CodeGenFunction::RunCleanupsScope Scope(CGF);
2972     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
2973     llvm::Value *EndArgs[] = {
2974         IdentTLoc, // ident_t *<loc>
2975         ThreadId,  // i32 <gtid>
2976         Lock       // kmp_critical_name *&<lock>
2977     };
2978     CGF.EHStack
2979         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
2980             NormalAndEHCleanup,
2981             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
2982                                              : OMPRTL__kmpc_end_reduce),
2983             llvm::makeArrayRef(EndArgs));
2984     auto IPriv = Privates.begin();
2985     auto ILHS = LHSExprs.begin();
2986     auto IRHS = RHSExprs.begin();
2987     for (auto *E : ReductionOps) {
2988       if ((*IPriv)->getType()->isArrayType()) {
2989         // Emit reduction for array section.
2990         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
2991         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
2992         EmitOMPAggregateReduction(
2993             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
2994             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
2995                 const Expr *) { CGF.EmitIgnoredExpr(E); });
2996       } else
2997         // Emit reduction for array subscript or single variable.
2998         CGF.EmitIgnoredExpr(E);
2999       ++IPriv, ++ILHS, ++IRHS;
3000     }
3001   }
3002
3003   CGF.EmitBranch(DefaultBB);
3004
3005   // 7. Build case 2:
3006   //  ...
3007   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3008   //  ...
3009   // break;
3010   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
3011   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
3012   CGF.EmitBlock(Case2BB);
3013
3014   {
3015     CodeGenFunction::RunCleanupsScope Scope(CGF);
3016     if (!WithNowait) {
3017       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
3018       llvm::Value *EndArgs[] = {
3019           IdentTLoc, // ident_t *<loc>
3020           ThreadId,  // i32 <gtid>
3021           Lock       // kmp_critical_name *&<lock>
3022       };
3023       CGF.EHStack
3024           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3025               NormalAndEHCleanup,
3026               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
3027               llvm::makeArrayRef(EndArgs));
3028     }
3029     auto ILHS = LHSExprs.begin();
3030     auto IRHS = RHSExprs.begin();
3031     auto IPriv = Privates.begin();
3032     for (auto *E : ReductionOps) {
3033         const Expr *XExpr = nullptr;
3034         const Expr *EExpr = nullptr;
3035         const Expr *UpExpr = nullptr;
3036         BinaryOperatorKind BO = BO_Comma;
3037         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
3038           if (BO->getOpcode() == BO_Assign) {
3039             XExpr = BO->getLHS();
3040             UpExpr = BO->getRHS();
3041           }
3042         }
3043         // Try to emit update expression as a simple atomic.
3044         auto *RHSExpr = UpExpr;
3045         if (RHSExpr) {
3046           // Analyze RHS part of the whole expression.
3047           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
3048                   RHSExpr->IgnoreParenImpCasts())) {
3049             // If this is a conditional operator, analyze its condition for
3050             // min/max reduction operator.
3051             RHSExpr = ACO->getCond();
3052           }
3053           if (auto *BORHS =
3054                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
3055             EExpr = BORHS->getRHS();
3056             BO = BORHS->getOpcode();
3057           }
3058         }
3059         if (XExpr) {
3060           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3061           auto &&AtomicRedGen = [this, BO, VD, IPriv,
3062                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
3063                                       const Expr *EExpr, const Expr *UpExpr) {
3064             LValue X = CGF.EmitLValue(XExpr);
3065             RValue E;
3066             if (EExpr)
3067               E = CGF.EmitAnyExpr(EExpr);
3068             CGF.EmitOMPAtomicSimpleUpdateExpr(
3069                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
3070                 [&CGF, UpExpr, VD, IPriv](RValue XRValue) {
3071                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3072                   PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address {
3073                     Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3074                     CGF.EmitStoreThroughLValue(
3075                         XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType()));
3076                     return LHSTemp;
3077                   });
3078                   (void)PrivateScope.Privatize();
3079                   return CGF.EmitAnyExpr(UpExpr);
3080                 });
3081           };
3082           if ((*IPriv)->getType()->isArrayType()) {
3083             // Emit atomic reduction for array section.
3084             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3085             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3086                                       AtomicRedGen, XExpr, EExpr, UpExpr);
3087           } else
3088             // Emit atomic reduction for array subscript or single variable.
3089             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3090         } else {
3091           // Emit as a critical region.
3092           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
3093                                              const Expr *, const Expr *) {
3094             emitCriticalRegion(
3095                 CGF, ".atomic_reduction",
3096                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
3097           };
3098           if ((*IPriv)->getType()->isArrayType()) {
3099             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3100             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3101             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3102                                       CritRedGen);
3103           } else
3104             CritRedGen(CGF, nullptr, nullptr, nullptr);
3105         }
3106       ++ILHS, ++IRHS, ++IPriv;
3107     }
3108   }
3109
3110   CGF.EmitBranch(DefaultBB);
3111   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
3112 }
3113
3114 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
3115                                        SourceLocation Loc) {
3116   if (!CGF.HaveInsertPoint())
3117     return;
3118   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
3119   // global_tid);
3120   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3121   // Ignore return result until untied tasks are supported.
3122   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
3123 }
3124
3125 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
3126                                            OpenMPDirectiveKind InnerKind,
3127                                            const RegionCodeGenTy &CodeGen,
3128                                            bool HasCancel) {
3129   if (!CGF.HaveInsertPoint())
3130     return;
3131   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
3132   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
3133 }
3134
3135 namespace {
3136 enum RTCancelKind {
3137   CancelNoreq = 0,
3138   CancelParallel = 1,
3139   CancelLoop = 2,
3140   CancelSections = 3,
3141   CancelTaskgroup = 4
3142 };
3143 }
3144
3145 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
3146   RTCancelKind CancelKind = CancelNoreq;
3147   if (CancelRegion == OMPD_parallel)
3148     CancelKind = CancelParallel;
3149   else if (CancelRegion == OMPD_for)
3150     CancelKind = CancelLoop;
3151   else if (CancelRegion == OMPD_sections)
3152     CancelKind = CancelSections;
3153   else {
3154     assert(CancelRegion == OMPD_taskgroup);
3155     CancelKind = CancelTaskgroup;
3156   }
3157   return CancelKind;
3158 }
3159
3160 void CGOpenMPRuntime::emitCancellationPointCall(
3161     CodeGenFunction &CGF, SourceLocation Loc,
3162     OpenMPDirectiveKind CancelRegion) {
3163   if (!CGF.HaveInsertPoint())
3164     return;
3165   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
3166   // global_tid, kmp_int32 cncl_kind);
3167   if (auto *OMPRegionInfo =
3168           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3169     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3170       return;
3171     if (OMPRegionInfo->hasCancel()) {
3172       llvm::Value *Args[] = {
3173           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3174           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3175       // Ignore return result until untied tasks are supported.
3176       auto *Result = CGF.EmitRuntimeCall(
3177           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
3178       // if (__kmpc_cancellationpoint()) {
3179       //  __kmpc_cancel_barrier();
3180       //   exit from construct;
3181       // }
3182       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3183       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3184       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3185       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3186       CGF.EmitBlock(ExitBB);
3187       // __kmpc_cancel_barrier();
3188       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3189       // exit from construct;
3190       auto CancelDest =
3191           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3192       CGF.EmitBranchThroughCleanup(CancelDest);
3193       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3194     }
3195   }
3196 }
3197
3198 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
3199                                      const Expr *IfCond,
3200                                      OpenMPDirectiveKind CancelRegion) {
3201   if (!CGF.HaveInsertPoint())
3202     return;
3203   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
3204   // kmp_int32 cncl_kind);
3205   if (auto *OMPRegionInfo =
3206           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3207     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3208       return;
3209     auto &&ThenGen = [this, Loc, CancelRegion,
3210                       OMPRegionInfo](CodeGenFunction &CGF) {
3211       llvm::Value *Args[] = {
3212           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3213           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3214       // Ignore return result until untied tasks are supported.
3215       auto *Result =
3216           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
3217       // if (__kmpc_cancel()) {
3218       //  __kmpc_cancel_barrier();
3219       //   exit from construct;
3220       // }
3221       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3222       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3223       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3224       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3225       CGF.EmitBlock(ExitBB);
3226       // __kmpc_cancel_barrier();
3227       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3228       // exit from construct;
3229       auto CancelDest =
3230           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3231       CGF.EmitBranchThroughCleanup(CancelDest);
3232       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3233     };
3234     if (IfCond)
3235       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
3236     else
3237       ThenGen(CGF);
3238   }
3239 }
3240
3241 llvm::Value *
3242 CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
3243                                             const RegionCodeGenTy &CodeGen) {
3244   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3245
3246   CodeGenFunction CGF(CGM, true);
3247   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
3248   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3249   return CGF.GenerateOpenMPCapturedStmtFunction(CS);
3250 }
3251
3252 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3253                                      const OMPExecutableDirective &D,
3254                                      llvm::Value *OutlinedFn,
3255                                      const Expr *IfCond, const Expr *Device,
3256                                      ArrayRef<llvm::Value *> CapturedVars) {
3257   if (!CGF.HaveInsertPoint())
3258     return;
3259   /// \brief Values for bit flags used to specify the mapping type for
3260   /// offloading.
3261   enum OpenMPOffloadMappingFlags {
3262     /// \brief Allocate memory on the device and move data from host to device.
3263     OMP_MAP_TO = 0x01,
3264     /// \brief Allocate memory on the device and move data from device to host.
3265     OMP_MAP_FROM = 0x02,
3266     /// \brief The element passed to the device is a pointer.
3267     OMP_MAP_PTR = 0x20,
3268     /// \brief Pass the element to the device by value.
3269     OMP_MAP_BYCOPY = 0x80,
3270   };
3271
3272   enum OpenMPOffloadingReservedDeviceIDs {
3273     /// \brief Device ID if the device was not defined, runtime should get it
3274     /// from environment variables in the spec.
3275     OMP_DEVICEID_UNDEF = -1,
3276   };
3277
3278   auto &Ctx = CGF.getContext();
3279
3280   // Fill up the arrays with the all the captured variables.
3281   SmallVector<llvm::Value *, 16> BasePointers;
3282   SmallVector<llvm::Value *, 16> Pointers;
3283   SmallVector<llvm::Value *, 16> Sizes;
3284   SmallVector<unsigned, 16> MapTypes;
3285
3286   bool hasVLACaptures = false;
3287
3288   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3289   auto RI = CS.getCapturedRecordDecl()->field_begin();
3290   // auto II = CS.capture_init_begin();
3291   auto CV = CapturedVars.begin();
3292   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3293                                             CE = CS.capture_end();
3294        CI != CE; ++CI, ++RI, ++CV) {
3295     StringRef Name;
3296     QualType Ty;
3297     llvm::Value *BasePointer;
3298     llvm::Value *Pointer;
3299     llvm::Value *Size;
3300     unsigned MapType;
3301
3302     // VLA sizes are passed to the outlined region by copy.
3303     if (CI->capturesVariableArrayType()) {
3304       BasePointer = Pointer = *CV;
3305       Size = getTypeSize(CGF, RI->getType());
3306       // Copy to the device as an argument. No need to retrieve it.
3307       MapType = OMP_MAP_BYCOPY;
3308       hasVLACaptures = true;
3309     } else if (CI->capturesThis()) {
3310       BasePointer = Pointer = *CV;
3311       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3312       Size = getTypeSize(CGF, PtrTy->getPointeeType());
3313       // Default map type.
3314       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3315     } else if (CI->capturesVariableByCopy()) {
3316       MapType = OMP_MAP_BYCOPY;
3317       if (!RI->getType()->isAnyPointerType()) {
3318         // If the field is not a pointer, we need to save the actual value and
3319         // load it as a void pointer.
3320         auto DstAddr = CGF.CreateMemTemp(
3321             Ctx.getUIntPtrType(),
3322             Twine(CI->getCapturedVar()->getName()) + ".casted");
3323         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
3324
3325         auto *SrcAddrVal = CGF.EmitScalarConversion(
3326             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
3327             Ctx.getPointerType(RI->getType()), SourceLocation());
3328         LValue SrcLV =
3329             CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
3330
3331         // Store the value using the source type pointer.
3332         CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
3333
3334         // Load the value using the destination type pointer.
3335         BasePointer = Pointer =
3336             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
3337       } else {
3338         MapType |= OMP_MAP_PTR;
3339         BasePointer = Pointer = *CV;
3340       }
3341       Size = getTypeSize(CGF, RI->getType());
3342     } else {
3343       assert(CI->capturesVariable() && "Expected captured reference.");
3344       BasePointer = Pointer = *CV;
3345
3346       const ReferenceType *PtrTy =
3347           cast<ReferenceType>(RI->getType().getTypePtr());
3348       QualType ElementType = PtrTy->getPointeeType();
3349       Size = getTypeSize(CGF, ElementType);
3350       // The default map type for a scalar/complex type is 'to' because by
3351       // default the value doesn't have to be retrieved. For an aggregate type,
3352       // the default is 'tofrom'.
3353       MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
3354                                                : OMP_MAP_TO;
3355       if (ElementType->isAnyPointerType())
3356         MapType |= OMP_MAP_PTR;
3357     }
3358
3359     BasePointers.push_back(BasePointer);
3360     Pointers.push_back(Pointer);
3361     Sizes.push_back(Size);
3362     MapTypes.push_back(MapType);
3363   }
3364
3365   // Keep track on whether the host function has to be executed.
3366   auto OffloadErrorQType =
3367       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3368   auto OffloadError = CGF.MakeAddrLValue(
3369       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3370       OffloadErrorQType);
3371   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3372                         OffloadError);
3373
3374   // Fill up the pointer arrays and transfer execution to the device.
3375   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
3376                     hasVLACaptures, Device, OffloadError,
3377                     OffloadErrorQType](CodeGenFunction &CGF) {
3378     unsigned PointerNumVal = BasePointers.size();
3379     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3380     llvm::Value *BasePointersArray;
3381     llvm::Value *PointersArray;
3382     llvm::Value *SizesArray;
3383     llvm::Value *MapTypesArray;
3384
3385     if (PointerNumVal) {
3386       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3387       QualType PointerArrayType = Ctx.getConstantArrayType(
3388           Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
3389           /*IndexTypeQuals=*/0);
3390
3391       BasePointersArray =
3392           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3393       PointersArray =
3394           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3395
3396       // If we don't have any VLA types, we can use a constant array for the map
3397       // sizes, otherwise we need to fill up the arrays as we do for the
3398       // pointers.
3399       if (hasVLACaptures) {
3400         QualType SizeArrayType = Ctx.getConstantArrayType(
3401             Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
3402             /*IndexTypeQuals=*/0);
3403         SizesArray =
3404             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3405       } else {
3406         // We expect all the sizes to be constant, so we collect them to create
3407         // a constant array.
3408         SmallVector<llvm::Constant *, 16> ConstSizes;
3409         for (auto S : Sizes)
3410           ConstSizes.push_back(cast<llvm::Constant>(S));
3411
3412         auto *SizesArrayInit = llvm::ConstantArray::get(
3413             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3414         auto *SizesArrayGbl = new llvm::GlobalVariable(
3415             CGM.getModule(), SizesArrayInit->getType(),
3416             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3417             SizesArrayInit, ".offload_sizes");
3418         SizesArrayGbl->setUnnamedAddr(true);
3419         SizesArray = SizesArrayGbl;
3420       }
3421
3422       // The map types are always constant so we don't need to generate code to
3423       // fill arrays. Instead, we create an array constant.
3424       llvm::Constant *MapTypesArrayInit =
3425           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
3426       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
3427           CGM.getModule(), MapTypesArrayInit->getType(),
3428           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3429           MapTypesArrayInit, ".offload_maptypes");
3430       MapTypesArrayGbl->setUnnamedAddr(true);
3431       MapTypesArray = MapTypesArrayGbl;
3432
3433       for (unsigned i = 0; i < PointerNumVal; ++i) {
3434
3435         llvm::Value *BPVal = BasePointers[i];
3436         if (BPVal->getType()->isPointerTy())
3437           BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
3438         else {
3439           assert(BPVal->getType()->isIntegerTy() &&
3440                  "If not a pointer, the value type must be an integer.");
3441           BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
3442         }
3443         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
3444             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
3445             BasePointersArray, 0, i);
3446         Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
3447         CGF.Builder.CreateStore(BPVal, BPAddr);
3448
3449         llvm::Value *PVal = Pointers[i];
3450         if (PVal->getType()->isPointerTy())
3451           PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
3452         else {
3453           assert(PVal->getType()->isIntegerTy() &&
3454                  "If not a pointer, the value type must be an integer.");
3455           PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
3456         }
3457         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
3458             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3459             0, i);
3460         Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
3461         CGF.Builder.CreateStore(PVal, PAddr);
3462
3463         if (hasVLACaptures) {
3464           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
3465               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3466               /*Idx0=*/0,
3467               /*Idx1=*/i);
3468           Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
3469           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
3470                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
3471                                   SAddr);
3472         }
3473       }
3474
3475       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3476           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
3477           /*Idx0=*/0, /*Idx1=*/0);
3478       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3479           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
3480           /*Idx0=*/0,
3481           /*Idx1=*/0);
3482       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3483           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
3484           /*Idx0=*/0, /*Idx1=*/0);
3485       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
3486           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
3487           /*Idx0=*/0,
3488           /*Idx1=*/0);
3489
3490     } else {
3491       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3492       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
3493       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
3494       MapTypesArray =
3495           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
3496     }
3497
3498     // On top of the arrays that were filled up, the target offloading call
3499     // takes as arguments the device id as well as the host pointer. The host
3500     // pointer is used by the runtime library to identify the current target
3501     // region, so it only has to be unique and not necessarily point to
3502     // anything. It could be the pointer to the outlined function that
3503     // implements the target region, but we aren't using that so that the
3504     // compiler doesn't need to keep that, and could therefore inline the host
3505     // function if proven worthwhile during optimization.
3506
3507     llvm::Value *HostPtr = new llvm::GlobalVariable(
3508         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3509         llvm::GlobalValue::PrivateLinkage,
3510         llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
3511
3512     // Emit device ID if any.
3513     llvm::Value *DeviceID;
3514     if (Device)
3515       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3516                                            CGM.Int32Ty, /*isSigned=*/true);
3517     else
3518       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
3519
3520     llvm::Value *OffloadingArgs[] = {
3521         DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
3522         PointersArray, SizesArray, MapTypesArray};
3523     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
3524                                       OffloadingArgs);
3525
3526     CGF.EmitStoreOfScalar(Return, OffloadError);
3527   };
3528
3529   if (IfCond) {
3530     // Notify that the host version must be executed.
3531     auto &&ElseGen = [this, OffloadError,
3532                       OffloadErrorQType](CodeGenFunction &CGF) {
3533       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
3534                             OffloadError);
3535     };
3536     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3537   } else {
3538     CodeGenFunction::RunCleanupsScope Scope(CGF);
3539     ThenGen(CGF);
3540   }
3541
3542   // Check the error code and execute the host version if required.
3543   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
3544   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
3545   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
3546   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
3547   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
3548
3549   CGF.EmitBlock(OffloadFailedBlock);
3550   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
3551   CGF.EmitBranch(OffloadContBlock);
3552
3553   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
3554   return;
3555 }