]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Merge ^/head r294599 through r294776.
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGOpenMPRuntime.cpp
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29
30 using namespace clang;
31 using namespace CodeGen;
32
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50
51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57
58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74
75   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
76
77   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
78
79   bool hasCancel() const { return HasCancel; }
80
81   static bool classof(const CGCapturedStmtInfo *Info) {
82     return Info->getKind() == CR_OpenMP;
83   }
84
85 protected:
86   CGOpenMPRegionKind RegionKind;
87   RegionCodeGenTy CodeGen;
88   OpenMPDirectiveKind Kind;
89   bool HasCancel;
90 };
91
92 /// \brief API for captured statement code generation in OpenMP constructs.
93 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
94 public:
95   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
96                              const RegionCodeGenTy &CodeGen,
97                              OpenMPDirectiveKind Kind, bool HasCancel)
98       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
99                            HasCancel),
100         ThreadIDVar(ThreadIDVar) {
101     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
102   }
103   /// \brief Get a variable or parameter for storing global thread id
104   /// inside OpenMP construct.
105   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
106
107   /// \brief Get the name of the capture helper.
108   StringRef getHelperName() const override { return ".omp_outlined."; }
109
110   static bool classof(const CGCapturedStmtInfo *Info) {
111     return CGOpenMPRegionInfo::classof(Info) &&
112            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
113                ParallelOutlinedRegion;
114   }
115
116 private:
117   /// \brief A variable or parameter storing global thread id for OpenMP
118   /// constructs.
119   const VarDecl *ThreadIDVar;
120 };
121
122 /// \brief API for captured statement code generation in OpenMP constructs.
123 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
124 public:
125   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
126                                  const VarDecl *ThreadIDVar,
127                                  const RegionCodeGenTy &CodeGen,
128                                  OpenMPDirectiveKind Kind, bool HasCancel)
129       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
130         ThreadIDVar(ThreadIDVar) {
131     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
132   }
133   /// \brief Get a variable or parameter for storing global thread id
134   /// inside OpenMP construct.
135   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
136
137   /// \brief Get an LValue for the current ThreadID variable.
138   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
139
140   /// \brief Get the name of the capture helper.
141   StringRef getHelperName() const override { return ".omp_outlined."; }
142
143   static bool classof(const CGCapturedStmtInfo *Info) {
144     return CGOpenMPRegionInfo::classof(Info) &&
145            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
146                TaskOutlinedRegion;
147   }
148
149 private:
150   /// \brief A variable or parameter storing global thread id for OpenMP
151   /// constructs.
152   const VarDecl *ThreadIDVar;
153 };
154
155 /// \brief API for inlined captured statement code generation in OpenMP
156 /// constructs.
157 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
158 public:
159   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
160                             const RegionCodeGenTy &CodeGen,
161                             OpenMPDirectiveKind Kind, bool HasCancel)
162       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
163         OldCSI(OldCSI),
164         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
165   // \brief Retrieve the value of the context parameter.
166   llvm::Value *getContextValue() const override {
167     if (OuterRegionInfo)
168       return OuterRegionInfo->getContextValue();
169     llvm_unreachable("No context value for inlined OpenMP region");
170   }
171   void setContextValue(llvm::Value *V) override {
172     if (OuterRegionInfo) {
173       OuterRegionInfo->setContextValue(V);
174       return;
175     }
176     llvm_unreachable("No context value for inlined OpenMP region");
177   }
178   /// \brief Lookup the captured field decl for a variable.
179   const FieldDecl *lookup(const VarDecl *VD) const override {
180     if (OuterRegionInfo)
181       return OuterRegionInfo->lookup(VD);
182     // If there is no outer outlined region,no need to lookup in a list of
183     // captured variables, we can use the original one.
184     return nullptr;
185   }
186   FieldDecl *getThisFieldDecl() const override {
187     if (OuterRegionInfo)
188       return OuterRegionInfo->getThisFieldDecl();
189     return nullptr;
190   }
191   /// \brief Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override {
194     if (OuterRegionInfo)
195       return OuterRegionInfo->getThreadIDVariable();
196     return nullptr;
197   }
198
199   /// \brief Get the name of the capture helper.
200   StringRef getHelperName() const override {
201     if (auto *OuterRegionInfo = getOldCSI())
202       return OuterRegionInfo->getHelperName();
203     llvm_unreachable("No helper name for inlined OpenMP construct");
204   }
205
206   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
207
208   static bool classof(const CGCapturedStmtInfo *Info) {
209     return CGOpenMPRegionInfo::classof(Info) &&
210            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
211   }
212
213 private:
214   /// \brief CodeGen info about outer OpenMP region.
215   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
216   CGOpenMPRegionInfo *OuterRegionInfo;
217 };
218
219 /// \brief API for captured statement code generation in OpenMP target
220 /// constructs. For this captures, implicit parameters are used instead of the
221 /// captured fields. The name of the target region has to be unique in a given
222 /// application so it is provided by the client, because only the client has
223 /// the information to generate that.
224 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
225 public:
226   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
227                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
228       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
229                            /*HasCancel=*/false),
230         HelperName(HelperName) {}
231
232   /// \brief This is unused for target regions because each starts executing
233   /// with a single thread.
234   const VarDecl *getThreadIDVariable() const override { return nullptr; }
235
236   /// \brief Get the name of the capture helper.
237   StringRef getHelperName() const override { return HelperName; }
238
239   static bool classof(const CGCapturedStmtInfo *Info) {
240     return CGOpenMPRegionInfo::classof(Info) &&
241            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
242   }
243
244 private:
245   StringRef HelperName;
246 };
247
248 /// \brief RAII for emitting code of OpenMP constructs.
249 class InlinedOpenMPRegionRAII {
250   CodeGenFunction &CGF;
251
252 public:
253   /// \brief Constructs region for combined constructs.
254   /// \param CodeGen Code generation sequence for combined directives. Includes
255   /// a list of functions used for code generation of implicitly inlined
256   /// regions.
257   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
258                           OpenMPDirectiveKind Kind, bool HasCancel)
259       : CGF(CGF) {
260     // Start emission for the construct.
261     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
262         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
263   }
264   ~InlinedOpenMPRegionRAII() {
265     // Restore original CapturedStmtInfo only if we're done with code emission.
266     auto *OldCSI =
267         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
268     delete CGF.CapturedStmtInfo;
269     CGF.CapturedStmtInfo = OldCSI;
270   }
271 };
272
273 } // anonymous namespace
274
275 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
276                                       QualType Ty) {
277   AlignmentSource Source;
278   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
279   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
280                             Ty->getPointeeType(), Source);
281 }
282
283 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
284   return emitLoadOfPointerLValue(CGF,
285                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
286                                  getThreadIDVariable()->getType());
287 }
288
289 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
290   if (!CGF.HaveInsertPoint())
291     return;
292   // 1.2.2 OpenMP Language Terminology
293   // Structured block - An executable statement with a single entry at the
294   // top and a single exit at the bottom.
295   // The point of exit cannot be a branch out of the structured block.
296   // longjmp() and throw() must not violate the entry/exit criteria.
297   CGF.EHStack.pushTerminate();
298   {
299     CodeGenFunction::RunCleanupsScope Scope(CGF);
300     CodeGen(CGF);
301   }
302   CGF.EHStack.popTerminate();
303 }
304
305 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
306     CodeGenFunction &CGF) {
307   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
308                             getThreadIDVariable()->getType(),
309                             AlignmentSource::Decl);
310 }
311
312 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
313     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr),
314       OffloadEntriesInfoManager(CGM) {
315   IdentTy = llvm::StructType::create(
316       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
317       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
318       CGM.Int8PtrTy /* psource */, nullptr);
319   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
320   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
321                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
322   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
323   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
324
325   loadOffloadInfoMetadata();
326 }
327
328 void CGOpenMPRuntime::clear() {
329   InternalVars.clear();
330 }
331
332 // Layout information for ident_t.
333 static CharUnits getIdentAlign(CodeGenModule &CGM) {
334   return CGM.getPointerAlign();
335 }
336 static CharUnits getIdentSize(CodeGenModule &CGM) {
337   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
338   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
339 }
340 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
341   // All the fields except the last are i32, so this works beautifully.
342   return unsigned(Field) * CharUnits::fromQuantity(4);
343 }
344 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
345                                    CGOpenMPRuntime::IdentFieldIndex Field,
346                                    const llvm::Twine &Name = "") {
347   auto Offset = getOffsetOfIdentField(Field);
348   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
349 }
350
351 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
352     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
353     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
354   assert(ThreadIDVar->getType()->isPointerType() &&
355          "thread id variable must be of type kmp_int32 *");
356   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
357   CodeGenFunction CGF(CGM, true);
358   bool HasCancel = false;
359   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
360     HasCancel = OPD->hasCancel();
361   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
362     HasCancel = OPSD->hasCancel();
363   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
364     HasCancel = OPFD->hasCancel();
365   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
366                                     HasCancel);
367   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
368   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
369 }
370
371 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
372     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
373     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
374   assert(!ThreadIDVar->getType()->isPointerType() &&
375          "thread id variable must be of type kmp_int32 for tasks");
376   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
377   CodeGenFunction CGF(CGM, true);
378   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
379                                         InnermostKind,
380                                         cast<OMPTaskDirective>(D).hasCancel());
381   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
382   return CGF.GenerateCapturedStmtFunction(*CS);
383 }
384
385 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
386   CharUnits Align = getIdentAlign(CGM);
387   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
388   if (!Entry) {
389     if (!DefaultOpenMPPSource) {
390       // Initialize default location for psource field of ident_t structure of
391       // all ident_t objects. Format is ";file;function;line;column;;".
392       // Taken from
393       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
394       DefaultOpenMPPSource =
395           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
396       DefaultOpenMPPSource =
397           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
398     }
399     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
400         CGM.getModule(), IdentTy, /*isConstant*/ true,
401         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
402     DefaultOpenMPLocation->setUnnamedAddr(true);
403     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
404
405     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
406     llvm::Constant *Values[] = {Zero,
407                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
408                                 Zero, Zero, DefaultOpenMPPSource};
409     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
410     DefaultOpenMPLocation->setInitializer(Init);
411     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
412   }
413   return Address(Entry, Align);
414 }
415
416 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
417                                                  SourceLocation Loc,
418                                                  OpenMPLocationFlags Flags) {
419   // If no debug info is generated - return global default location.
420   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
421       Loc.isInvalid())
422     return getOrCreateDefaultLocation(Flags).getPointer();
423
424   assert(CGF.CurFn && "No function in current CodeGenFunction.");
425
426   Address LocValue = Address::invalid();
427   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
428   if (I != OpenMPLocThreadIDMap.end())
429     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
430
431   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
432   // GetOpenMPThreadID was called before this routine.
433   if (!LocValue.isValid()) {
434     // Generate "ident_t .kmpc_loc.addr;"
435     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
436                                       ".kmpc_loc.addr");
437     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
438     Elem.second.DebugLoc = AI.getPointer();
439     LocValue = AI;
440
441     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
442     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
443     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
444                              CGM.getSize(getIdentSize(CGF.CGM)));
445   }
446
447   // char **psource = &.kmpc_loc_<flags>.addr.psource;
448   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
449
450   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
451   if (OMPDebugLoc == nullptr) {
452     SmallString<128> Buffer2;
453     llvm::raw_svector_ostream OS2(Buffer2);
454     // Build debug location
455     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
456     OS2 << ";" << PLoc.getFilename() << ";";
457     if (const FunctionDecl *FD =
458             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
459       OS2 << FD->getQualifiedNameAsString();
460     }
461     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
462     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
463     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
464   }
465   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
466   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
467
468   // Our callers always pass this to a runtime function, so for
469   // convenience, go ahead and return a naked pointer.
470   return LocValue.getPointer();
471 }
472
473 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
474                                           SourceLocation Loc) {
475   assert(CGF.CurFn && "No function in current CodeGenFunction.");
476
477   llvm::Value *ThreadID = nullptr;
478   // Check whether we've already cached a load of the thread id in this
479   // function.
480   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
481   if (I != OpenMPLocThreadIDMap.end()) {
482     ThreadID = I->second.ThreadID;
483     if (ThreadID != nullptr)
484       return ThreadID;
485   }
486   if (auto OMPRegionInfo =
487           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
488     if (OMPRegionInfo->getThreadIDVariable()) {
489       // Check if this an outlined function with thread id passed as argument.
490       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
491       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
492       // If value loaded in entry block, cache it and use it everywhere in
493       // function.
494       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
495         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
496         Elem.second.ThreadID = ThreadID;
497       }
498       return ThreadID;
499     }
500   }
501
502   // This is not an outlined function region - need to call __kmpc_int32
503   // kmpc_global_thread_num(ident_t *loc).
504   // Generate thread id value and cache this value for use across the
505   // function.
506   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
507   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
508   ThreadID =
509       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
510                           emitUpdateLocation(CGF, Loc));
511   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
512   Elem.second.ThreadID = ThreadID;
513   return ThreadID;
514 }
515
516 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
517   assert(CGF.CurFn && "No function in current CodeGenFunction.");
518   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
519     OpenMPLocThreadIDMap.erase(CGF.CurFn);
520 }
521
522 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
523   return llvm::PointerType::getUnqual(IdentTy);
524 }
525
526 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
527   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
528 }
529
530 llvm::Constant *
531 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
532   llvm::Constant *RTLFn = nullptr;
533   switch (Function) {
534   case OMPRTL__kmpc_fork_call: {
535     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
536     // microtask, ...);
537     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
538                                 getKmpc_MicroPointerTy()};
539     llvm::FunctionType *FnTy =
540         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
541     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
542     break;
543   }
544   case OMPRTL__kmpc_global_thread_num: {
545     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
546     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
547     llvm::FunctionType *FnTy =
548         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
549     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
550     break;
551   }
552   case OMPRTL__kmpc_threadprivate_cached: {
553     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
554     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
555     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
556                                 CGM.VoidPtrTy, CGM.SizeTy,
557                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
558     llvm::FunctionType *FnTy =
559         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
560     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
561     break;
562   }
563   case OMPRTL__kmpc_critical: {
564     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
565     // kmp_critical_name *crit);
566     llvm::Type *TypeParams[] = {
567         getIdentTyPointerTy(), CGM.Int32Ty,
568         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
569     llvm::FunctionType *FnTy =
570         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
571     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
572     break;
573   }
574   case OMPRTL__kmpc_critical_with_hint: {
575     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
576     // kmp_critical_name *crit, uintptr_t hint);
577     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
578                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
579                                 CGM.IntPtrTy};
580     llvm::FunctionType *FnTy =
581         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
582     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
583     break;
584   }
585   case OMPRTL__kmpc_threadprivate_register: {
586     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
587     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
588     // typedef void *(*kmpc_ctor)(void *);
589     auto KmpcCtorTy =
590         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
591                                 /*isVarArg*/ false)->getPointerTo();
592     // typedef void *(*kmpc_cctor)(void *, void *);
593     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
594     auto KmpcCopyCtorTy =
595         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
596                                 /*isVarArg*/ false)->getPointerTo();
597     // typedef void (*kmpc_dtor)(void *);
598     auto KmpcDtorTy =
599         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
600             ->getPointerTo();
601     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
602                               KmpcCopyCtorTy, KmpcDtorTy};
603     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
604                                         /*isVarArg*/ false);
605     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
606     break;
607   }
608   case OMPRTL__kmpc_end_critical: {
609     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
610     // kmp_critical_name *crit);
611     llvm::Type *TypeParams[] = {
612         getIdentTyPointerTy(), CGM.Int32Ty,
613         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
614     llvm::FunctionType *FnTy =
615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
617     break;
618   }
619   case OMPRTL__kmpc_cancel_barrier: {
620     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
621     // global_tid);
622     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
623     llvm::FunctionType *FnTy =
624         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
625     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
626     break;
627   }
628   case OMPRTL__kmpc_barrier: {
629     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
630     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
631     llvm::FunctionType *FnTy =
632         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
633     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
634     break;
635   }
636   case OMPRTL__kmpc_for_static_fini: {
637     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
638     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
639     llvm::FunctionType *FnTy =
640         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
641     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
642     break;
643   }
644   case OMPRTL__kmpc_push_num_threads: {
645     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
646     // kmp_int32 num_threads)
647     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
648                                 CGM.Int32Ty};
649     llvm::FunctionType *FnTy =
650         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
651     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
652     break;
653   }
654   case OMPRTL__kmpc_serialized_parallel: {
655     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
656     // global_tid);
657     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
658     llvm::FunctionType *FnTy =
659         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
660     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
661     break;
662   }
663   case OMPRTL__kmpc_end_serialized_parallel: {
664     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
665     // global_tid);
666     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
667     llvm::FunctionType *FnTy =
668         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
669     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
670     break;
671   }
672   case OMPRTL__kmpc_flush: {
673     // Build void __kmpc_flush(ident_t *loc);
674     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
675     llvm::FunctionType *FnTy =
676         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
677     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
678     break;
679   }
680   case OMPRTL__kmpc_master: {
681     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
682     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
683     llvm::FunctionType *FnTy =
684         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
685     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
686     break;
687   }
688   case OMPRTL__kmpc_end_master: {
689     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
690     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
691     llvm::FunctionType *FnTy =
692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
693     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
694     break;
695   }
696   case OMPRTL__kmpc_omp_taskyield: {
697     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
698     // int end_part);
699     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
700     llvm::FunctionType *FnTy =
701         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
702     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
703     break;
704   }
705   case OMPRTL__kmpc_single: {
706     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
707     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
708     llvm::FunctionType *FnTy =
709         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
710     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
711     break;
712   }
713   case OMPRTL__kmpc_end_single: {
714     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
715     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
716     llvm::FunctionType *FnTy =
717         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
718     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
719     break;
720   }
721   case OMPRTL__kmpc_omp_task_alloc: {
722     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
723     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
724     // kmp_routine_entry_t *task_entry);
725     assert(KmpRoutineEntryPtrTy != nullptr &&
726            "Type kmp_routine_entry_t must be created.");
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
728                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
729     // Return void * and then cast to particular kmp_task_t type.
730     llvm::FunctionType *FnTy =
731         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
732     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
733     break;
734   }
735   case OMPRTL__kmpc_omp_task: {
736     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
737     // *new_task);
738     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
739                                 CGM.VoidPtrTy};
740     llvm::FunctionType *FnTy =
741         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
742     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
743     break;
744   }
745   case OMPRTL__kmpc_copyprivate: {
746     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
747     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
748     // kmp_int32 didit);
749     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
750     auto *CpyFnTy =
751         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
752     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
753                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
754                                 CGM.Int32Ty};
755     llvm::FunctionType *FnTy =
756         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
757     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
758     break;
759   }
760   case OMPRTL__kmpc_reduce: {
761     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
762     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
763     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
764     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
765     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
766                                                /*isVarArg=*/false);
767     llvm::Type *TypeParams[] = {
768         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
769         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
770         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
771     llvm::FunctionType *FnTy =
772         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
773     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
774     break;
775   }
776   case OMPRTL__kmpc_reduce_nowait: {
777     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
778     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
779     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
780     // *lck);
781     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
782     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
783                                                /*isVarArg=*/false);
784     llvm::Type *TypeParams[] = {
785         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
786         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
787         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
788     llvm::FunctionType *FnTy =
789         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
790     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
791     break;
792   }
793   case OMPRTL__kmpc_end_reduce: {
794     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
795     // kmp_critical_name *lck);
796     llvm::Type *TypeParams[] = {
797         getIdentTyPointerTy(), CGM.Int32Ty,
798         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
799     llvm::FunctionType *FnTy =
800         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
801     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
802     break;
803   }
804   case OMPRTL__kmpc_end_reduce_nowait: {
805     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
806     // kmp_critical_name *lck);
807     llvm::Type *TypeParams[] = {
808         getIdentTyPointerTy(), CGM.Int32Ty,
809         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
810     llvm::FunctionType *FnTy =
811         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
812     RTLFn =
813         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
814     break;
815   }
816   case OMPRTL__kmpc_omp_task_begin_if0: {
817     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
818     // *new_task);
819     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
820                                 CGM.VoidPtrTy};
821     llvm::FunctionType *FnTy =
822         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
823     RTLFn =
824         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
825     break;
826   }
827   case OMPRTL__kmpc_omp_task_complete_if0: {
828     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
829     // *new_task);
830     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
831                                 CGM.VoidPtrTy};
832     llvm::FunctionType *FnTy =
833         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
834     RTLFn = CGM.CreateRuntimeFunction(FnTy,
835                                       /*Name=*/"__kmpc_omp_task_complete_if0");
836     break;
837   }
838   case OMPRTL__kmpc_ordered: {
839     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
841     llvm::FunctionType *FnTy =
842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
843     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
844     break;
845   }
846   case OMPRTL__kmpc_end_ordered: {
847     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
848     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
849     llvm::FunctionType *FnTy =
850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
851     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
852     break;
853   }
854   case OMPRTL__kmpc_omp_taskwait: {
855     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
856     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
857     llvm::FunctionType *FnTy =
858         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
859     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
860     break;
861   }
862   case OMPRTL__kmpc_taskgroup: {
863     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
864     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
865     llvm::FunctionType *FnTy =
866         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
867     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
868     break;
869   }
870   case OMPRTL__kmpc_end_taskgroup: {
871     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
872     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
873     llvm::FunctionType *FnTy =
874         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
875     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
876     break;
877   }
878   case OMPRTL__kmpc_push_proc_bind: {
879     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
880     // int proc_bind)
881     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
884     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
885     break;
886   }
887   case OMPRTL__kmpc_omp_task_with_deps: {
888     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
889     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
890     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
891     llvm::Type *TypeParams[] = {
892         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
893         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
894     llvm::FunctionType *FnTy =
895         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
896     RTLFn =
897         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
898     break;
899   }
900   case OMPRTL__kmpc_omp_wait_deps: {
901     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
902     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
903     // kmp_depend_info_t *noalias_dep_list);
904     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
905                                 CGM.Int32Ty,           CGM.VoidPtrTy,
906                                 CGM.Int32Ty,           CGM.VoidPtrTy};
907     llvm::FunctionType *FnTy =
908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
909     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
910     break;
911   }
912   case OMPRTL__kmpc_cancellationpoint: {
913     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
914     // global_tid, kmp_int32 cncl_kind)
915     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
916     llvm::FunctionType *FnTy =
917         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
918     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
919     break;
920   }
921   case OMPRTL__kmpc_cancel: {
922     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
923     // kmp_int32 cncl_kind)
924     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
925     llvm::FunctionType *FnTy =
926         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
927     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
928     break;
929   }
930   case OMPRTL__tgt_target: {
931     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
932     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
933     // *arg_types);
934     llvm::Type *TypeParams[] = {CGM.Int32Ty,
935                                 CGM.VoidPtrTy,
936                                 CGM.Int32Ty,
937                                 CGM.VoidPtrPtrTy,
938                                 CGM.VoidPtrPtrTy,
939                                 CGM.SizeTy->getPointerTo(),
940                                 CGM.Int32Ty->getPointerTo()};
941     llvm::FunctionType *FnTy =
942         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
943     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
944     break;
945   }
946   case OMPRTL__tgt_register_lib: {
947     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
948     QualType ParamTy =
949         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
950     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
951     llvm::FunctionType *FnTy =
952         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
953     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
954     break;
955   }
956   case OMPRTL__tgt_unregister_lib: {
957     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
958     QualType ParamTy =
959         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
960     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
961     llvm::FunctionType *FnTy =
962         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
963     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
964     break;
965   }
966   }
967   return RTLFn;
968 }
969
970 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
971   auto &C = CGF.getContext();
972   llvm::Value *Size = nullptr;
973   auto SizeInChars = C.getTypeSizeInChars(Ty);
974   if (SizeInChars.isZero()) {
975     // getTypeSizeInChars() returns 0 for a VLA.
976     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
977       llvm::Value *ArraySize;
978       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
979       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
980     }
981     SizeInChars = C.getTypeSizeInChars(Ty);
982     assert(!SizeInChars.isZero());
983     Size = CGF.Builder.CreateNUWMul(
984         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
985   } else
986     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
987   return Size;
988 }
989
990 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
991                                                              bool IVSigned) {
992   assert((IVSize == 32 || IVSize == 64) &&
993          "IV size is not compatible with the omp runtime");
994   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
995                                        : "__kmpc_for_static_init_4u")
996                            : (IVSigned ? "__kmpc_for_static_init_8"
997                                        : "__kmpc_for_static_init_8u");
998   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
999   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1000   llvm::Type *TypeParams[] = {
1001     getIdentTyPointerTy(),                     // loc
1002     CGM.Int32Ty,                               // tid
1003     CGM.Int32Ty,                               // schedtype
1004     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1005     PtrTy,                                     // p_lower
1006     PtrTy,                                     // p_upper
1007     PtrTy,                                     // p_stride
1008     ITy,                                       // incr
1009     ITy                                        // chunk
1010   };
1011   llvm::FunctionType *FnTy =
1012       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1013   return CGM.CreateRuntimeFunction(FnTy, Name);
1014 }
1015
1016 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1017                                                             bool IVSigned) {
1018   assert((IVSize == 32 || IVSize == 64) &&
1019          "IV size is not compatible with the omp runtime");
1020   auto Name =
1021       IVSize == 32
1022           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1023           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1024   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1025   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1026                                CGM.Int32Ty,           // tid
1027                                CGM.Int32Ty,           // schedtype
1028                                ITy,                   // lower
1029                                ITy,                   // upper
1030                                ITy,                   // stride
1031                                ITy                    // chunk
1032   };
1033   llvm::FunctionType *FnTy =
1034       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1035   return CGM.CreateRuntimeFunction(FnTy, Name);
1036 }
1037
1038 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1039                                                             bool IVSigned) {
1040   assert((IVSize == 32 || IVSize == 64) &&
1041          "IV size is not compatible with the omp runtime");
1042   auto Name =
1043       IVSize == 32
1044           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1045           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1046   llvm::Type *TypeParams[] = {
1047       getIdentTyPointerTy(), // loc
1048       CGM.Int32Ty,           // tid
1049   };
1050   llvm::FunctionType *FnTy =
1051       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1052   return CGM.CreateRuntimeFunction(FnTy, Name);
1053 }
1054
1055 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1056                                                             bool IVSigned) {
1057   assert((IVSize == 32 || IVSize == 64) &&
1058          "IV size is not compatible with the omp runtime");
1059   auto Name =
1060       IVSize == 32
1061           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1062           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1063   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1064   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1065   llvm::Type *TypeParams[] = {
1066     getIdentTyPointerTy(),                     // loc
1067     CGM.Int32Ty,                               // tid
1068     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1069     PtrTy,                                     // p_lower
1070     PtrTy,                                     // p_upper
1071     PtrTy                                      // p_stride
1072   };
1073   llvm::FunctionType *FnTy =
1074       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1075   return CGM.CreateRuntimeFunction(FnTy, Name);
1076 }
1077
1078 llvm::Constant *
1079 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1080   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1081          !CGM.getContext().getTargetInfo().isTLSSupported());
1082   // Lookup the entry, lazily creating it if necessary.
1083   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1084                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1085 }
1086
1087 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1088                                                 const VarDecl *VD,
1089                                                 Address VDAddr,
1090                                                 SourceLocation Loc) {
1091   if (CGM.getLangOpts().OpenMPUseTLS &&
1092       CGM.getContext().getTargetInfo().isTLSSupported())
1093     return VDAddr;
1094
1095   auto VarTy = VDAddr.getElementType();
1096   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1097                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1098                                                        CGM.Int8PtrTy),
1099                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1100                          getOrCreateThreadPrivateCache(VD)};
1101   return Address(CGF.EmitRuntimeCall(
1102       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1103                  VDAddr.getAlignment());
1104 }
1105
1106 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1107     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1108     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1109   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1110   // library.
1111   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1112   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1113                       OMPLoc);
1114   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1115   // to register constructor/destructor for variable.
1116   llvm::Value *Args[] = {OMPLoc,
1117                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1118                                                        CGM.VoidPtrTy),
1119                          Ctor, CopyCtor, Dtor};
1120   CGF.EmitRuntimeCall(
1121       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1122 }
1123
1124 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1125     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1126     bool PerformInit, CodeGenFunction *CGF) {
1127   if (CGM.getLangOpts().OpenMPUseTLS &&
1128       CGM.getContext().getTargetInfo().isTLSSupported())
1129     return nullptr;
1130
1131   VD = VD->getDefinition(CGM.getContext());
1132   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1133     ThreadPrivateWithDefinition.insert(VD);
1134     QualType ASTTy = VD->getType();
1135
1136     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1137     auto Init = VD->getAnyInitializer();
1138     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1139       // Generate function that re-emits the declaration's initializer into the
1140       // threadprivate copy of the variable VD
1141       CodeGenFunction CtorCGF(CGM);
1142       FunctionArgList Args;
1143       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1144                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1145       Args.push_back(&Dst);
1146
1147       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1148           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1149           /*isVariadic=*/false);
1150       auto FTy = CGM.getTypes().GetFunctionType(FI);
1151       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1152           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1153       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1154                             Args, SourceLocation());
1155       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1156           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1157           CGM.getContext().VoidPtrTy, Dst.getLocation());
1158       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1159       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1160                                              CtorCGF.ConvertTypeForMem(ASTTy));
1161       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1162                                /*IsInitializer=*/true);
1163       ArgVal = CtorCGF.EmitLoadOfScalar(
1164           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1165           CGM.getContext().VoidPtrTy, Dst.getLocation());
1166       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1167       CtorCGF.FinishFunction();
1168       Ctor = Fn;
1169     }
1170     if (VD->getType().isDestructedType() != QualType::DK_none) {
1171       // Generate function that emits destructor call for the threadprivate copy
1172       // of the variable VD
1173       CodeGenFunction DtorCGF(CGM);
1174       FunctionArgList Args;
1175       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1176                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1177       Args.push_back(&Dst);
1178
1179       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1180           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1181           /*isVariadic=*/false);
1182       auto FTy = CGM.getTypes().GetFunctionType(FI);
1183       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1184           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1185       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1186                             SourceLocation());
1187       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1188           DtorCGF.GetAddrOfLocalVar(&Dst),
1189           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1190       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1191                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1192                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1193       DtorCGF.FinishFunction();
1194       Dtor = Fn;
1195     }
1196     // Do not emit init function if it is not required.
1197     if (!Ctor && !Dtor)
1198       return nullptr;
1199
1200     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1201     auto CopyCtorTy =
1202         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1203                                 /*isVarArg=*/false)->getPointerTo();
1204     // Copying constructor for the threadprivate variable.
1205     // Must be NULL - reserved by runtime, but currently it requires that this
1206     // parameter is always NULL. Otherwise it fires assertion.
1207     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1208     if (Ctor == nullptr) {
1209       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1210                                             /*isVarArg=*/false)->getPointerTo();
1211       Ctor = llvm::Constant::getNullValue(CtorTy);
1212     }
1213     if (Dtor == nullptr) {
1214       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1215                                             /*isVarArg=*/false)->getPointerTo();
1216       Dtor = llvm::Constant::getNullValue(DtorTy);
1217     }
1218     if (!CGF) {
1219       auto InitFunctionTy =
1220           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1221       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1222           InitFunctionTy, ".__omp_threadprivate_init_.",
1223           CGM.getTypes().arrangeNullaryFunction());
1224       CodeGenFunction InitCGF(CGM);
1225       FunctionArgList ArgList;
1226       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1227                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1228                             Loc);
1229       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1230       InitCGF.FinishFunction();
1231       return InitFunction;
1232     }
1233     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1234   }
1235   return nullptr;
1236 }
1237
1238 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1239 /// function. Here is the logic:
1240 /// if (Cond) {
1241 ///   ThenGen();
1242 /// } else {
1243 ///   ElseGen();
1244 /// }
1245 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1246                             const RegionCodeGenTy &ThenGen,
1247                             const RegionCodeGenTy &ElseGen) {
1248   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1249
1250   // If the condition constant folds and can be elided, try to avoid emitting
1251   // the condition and the dead arm of the if/else.
1252   bool CondConstant;
1253   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1254     CodeGenFunction::RunCleanupsScope Scope(CGF);
1255     if (CondConstant) {
1256       ThenGen(CGF);
1257     } else {
1258       ElseGen(CGF);
1259     }
1260     return;
1261   }
1262
1263   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1264   // emit the conditional branch.
1265   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1266   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1267   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1268   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1269
1270   // Emit the 'then' code.
1271   CGF.EmitBlock(ThenBlock);
1272   {
1273     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1274     ThenGen(CGF);
1275   }
1276   CGF.EmitBranch(ContBlock);
1277   // Emit the 'else' code if present.
1278   {
1279     // There is no need to emit line number for unconditional branch.
1280     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1281     CGF.EmitBlock(ElseBlock);
1282   }
1283   {
1284     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1285     ElseGen(CGF);
1286   }
1287   {
1288     // There is no need to emit line number for unconditional branch.
1289     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1290     CGF.EmitBranch(ContBlock);
1291   }
1292   // Emit the continuation block for code after the if.
1293   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1294 }
1295
1296 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1297                                        llvm::Value *OutlinedFn,
1298                                        ArrayRef<llvm::Value *> CapturedVars,
1299                                        const Expr *IfCond) {
1300   if (!CGF.HaveInsertPoint())
1301     return;
1302   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1303   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1304                     RTLoc](CodeGenFunction &CGF) {
1305     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1306     llvm::Value *Args[] = {
1307         RTLoc,
1308         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1309         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1310     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1311     RealArgs.append(std::begin(Args), std::end(Args));
1312     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1313
1314     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1315     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1316   };
1317   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1318                     Loc](CodeGenFunction &CGF) {
1319     auto ThreadID = getThreadID(CGF, Loc);
1320     // Build calls:
1321     // __kmpc_serialized_parallel(&Loc, GTid);
1322     llvm::Value *Args[] = {RTLoc, ThreadID};
1323     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1324                         Args);
1325
1326     // OutlinedFn(&GTid, &zero, CapturedStruct);
1327     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1328     Address ZeroAddr =
1329       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1330                            /*Name*/ ".zero.addr");
1331     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1332     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1333     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1334     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1335     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1336     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1337
1338     // __kmpc_end_serialized_parallel(&Loc, GTid);
1339     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1340     CGF.EmitRuntimeCall(
1341         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1342   };
1343   if (IfCond) {
1344     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1345   } else {
1346     CodeGenFunction::RunCleanupsScope Scope(CGF);
1347     ThenGen(CGF);
1348   }
1349 }
1350
1351 // If we're inside an (outlined) parallel region, use the region info's
1352 // thread-ID variable (it is passed in a first argument of the outlined function
1353 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1354 // regular serial code region, get thread ID by calling kmp_int32
1355 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1356 // return the address of that temp.
1357 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1358                                              SourceLocation Loc) {
1359   if (auto OMPRegionInfo =
1360           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1361     if (OMPRegionInfo->getThreadIDVariable())
1362       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1363
1364   auto ThreadID = getThreadID(CGF, Loc);
1365   auto Int32Ty =
1366       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1367   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1368   CGF.EmitStoreOfScalar(ThreadID,
1369                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1370
1371   return ThreadIDTemp;
1372 }
1373
1374 llvm::Constant *
1375 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1376                                              const llvm::Twine &Name) {
1377   SmallString<256> Buffer;
1378   llvm::raw_svector_ostream Out(Buffer);
1379   Out << Name;
1380   auto RuntimeName = Out.str();
1381   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1382   if (Elem.second) {
1383     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1384            "OMP internal variable has different type than requested");
1385     return &*Elem.second;
1386   }
1387
1388   return Elem.second = new llvm::GlobalVariable(
1389              CGM.getModule(), Ty, /*IsConstant*/ false,
1390              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1391              Elem.first());
1392 }
1393
1394 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1395   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1396   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1397 }
1398
1399 namespace {
1400 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1401   llvm::Value *Callee;
1402   llvm::Value *Args[N];
1403
1404 public:
1405   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1406       : Callee(Callee) {
1407     assert(CleanupArgs.size() == N);
1408     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1409   }
1410   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1411     if (!CGF.HaveInsertPoint())
1412       return;
1413     CGF.EmitRuntimeCall(Callee, Args);
1414   }
1415 };
1416 } // anonymous namespace
1417
1418 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1419                                          StringRef CriticalName,
1420                                          const RegionCodeGenTy &CriticalOpGen,
1421                                          SourceLocation Loc, const Expr *Hint) {
1422   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1423   // CriticalOpGen();
1424   // __kmpc_end_critical(ident_t *, gtid, Lock);
1425   // Prepare arguments and build a call to __kmpc_critical
1426   if (!CGF.HaveInsertPoint())
1427     return;
1428   CodeGenFunction::RunCleanupsScope Scope(CGF);
1429   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1430                          getCriticalRegionLock(CriticalName)};
1431   if (Hint) {
1432     llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
1433                                                      std::end(Args));
1434     auto *HintVal = CGF.EmitScalarExpr(Hint);
1435     ArgsWithHint.push_back(
1436         CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
1437     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
1438                         ArgsWithHint);
1439   } else
1440     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1441   // Build a call to __kmpc_end_critical
1442   CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1443       NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1444       llvm::makeArrayRef(Args));
1445   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1446 }
1447
1448 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1449                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1450                        const RegionCodeGenTy &BodyOpGen) {
1451   llvm::Value *CallBool = CGF.EmitScalarConversion(
1452       IfCond,
1453       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1454       CGF.getContext().BoolTy, Loc);
1455
1456   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1457   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1458   // Generate the branch (If-stmt)
1459   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1460   CGF.EmitBlock(ThenBlock);
1461   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1462   // Emit the rest of bblocks/branches
1463   CGF.EmitBranch(ContBlock);
1464   CGF.EmitBlock(ContBlock, true);
1465 }
1466
1467 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1468                                        const RegionCodeGenTy &MasterOpGen,
1469                                        SourceLocation Loc) {
1470   if (!CGF.HaveInsertPoint())
1471     return;
1472   // if(__kmpc_master(ident_t *, gtid)) {
1473   //   MasterOpGen();
1474   //   __kmpc_end_master(ident_t *, gtid);
1475   // }
1476   // Prepare arguments and build a call to __kmpc_master
1477   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1478   auto *IsMaster =
1479       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1480   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1481       MasterCallEndCleanup;
1482   emitIfStmt(
1483       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1484         CodeGenFunction::RunCleanupsScope Scope(CGF);
1485         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1486             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1487             llvm::makeArrayRef(Args));
1488         MasterOpGen(CGF);
1489       });
1490 }
1491
1492 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1493                                         SourceLocation Loc) {
1494   if (!CGF.HaveInsertPoint())
1495     return;
1496   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1497   llvm::Value *Args[] = {
1498       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1499       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1500   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1501 }
1502
1503 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1504                                           const RegionCodeGenTy &TaskgroupOpGen,
1505                                           SourceLocation Loc) {
1506   if (!CGF.HaveInsertPoint())
1507     return;
1508   // __kmpc_taskgroup(ident_t *, gtid);
1509   // TaskgroupOpGen();
1510   // __kmpc_end_taskgroup(ident_t *, gtid);
1511   // Prepare arguments and build a call to __kmpc_taskgroup
1512   {
1513     CodeGenFunction::RunCleanupsScope Scope(CGF);
1514     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1515     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1516     // Build a call to __kmpc_end_taskgroup
1517     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1518         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1519         llvm::makeArrayRef(Args));
1520     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1521   }
1522 }
1523
1524 /// Given an array of pointers to variables, project the address of a
1525 /// given variable.
1526 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1527                                       unsigned Index, const VarDecl *Var) {
1528   // Pull out the pointer to the variable.
1529   Address PtrAddr =
1530       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1531   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1532
1533   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1534   Addr = CGF.Builder.CreateElementBitCast(
1535       Addr, CGF.ConvertTypeForMem(Var->getType()));
1536   return Addr;
1537 }
1538
1539 static llvm::Value *emitCopyprivateCopyFunction(
1540     CodeGenModule &CGM, llvm::Type *ArgsType,
1541     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1542     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1543   auto &C = CGM.getContext();
1544   // void copy_func(void *LHSArg, void *RHSArg);
1545   FunctionArgList Args;
1546   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1547                            C.VoidPtrTy);
1548   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1549                            C.VoidPtrTy);
1550   Args.push_back(&LHSArg);
1551   Args.push_back(&RHSArg);
1552   FunctionType::ExtInfo EI;
1553   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1554       C.VoidTy, Args, EI, /*isVariadic=*/false);
1555   auto *Fn = llvm::Function::Create(
1556       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1557       ".omp.copyprivate.copy_func", &CGM.getModule());
1558   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
1559   CodeGenFunction CGF(CGM);
1560   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1561   // Dest = (void*[n])(LHSArg);
1562   // Src = (void*[n])(RHSArg);
1563   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1564       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1565       ArgsType), CGF.getPointerAlign());
1566   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1567       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1568       ArgsType), CGF.getPointerAlign());
1569   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1570   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1571   // ...
1572   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1573   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1574     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1575     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1576
1577     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1578     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1579
1580     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1581     QualType Type = VD->getType();
1582     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1583   }
1584   CGF.FinishFunction();
1585   return Fn;
1586 }
1587
1588 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1589                                        const RegionCodeGenTy &SingleOpGen,
1590                                        SourceLocation Loc,
1591                                        ArrayRef<const Expr *> CopyprivateVars,
1592                                        ArrayRef<const Expr *> SrcExprs,
1593                                        ArrayRef<const Expr *> DstExprs,
1594                                        ArrayRef<const Expr *> AssignmentOps) {
1595   if (!CGF.HaveInsertPoint())
1596     return;
1597   assert(CopyprivateVars.size() == SrcExprs.size() &&
1598          CopyprivateVars.size() == DstExprs.size() &&
1599          CopyprivateVars.size() == AssignmentOps.size());
1600   auto &C = CGM.getContext();
1601   // int32 did_it = 0;
1602   // if(__kmpc_single(ident_t *, gtid)) {
1603   //   SingleOpGen();
1604   //   __kmpc_end_single(ident_t *, gtid);
1605   //   did_it = 1;
1606   // }
1607   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1608   // <copy_func>, did_it);
1609
1610   Address DidIt = Address::invalid();
1611   if (!CopyprivateVars.empty()) {
1612     // int32 did_it = 0;
1613     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1614     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1615     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1616   }
1617   // Prepare arguments and build a call to __kmpc_single
1618   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1619   auto *IsSingle =
1620       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1621   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1622       SingleCallEndCleanup;
1623   emitIfStmt(
1624       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1625         CodeGenFunction::RunCleanupsScope Scope(CGF);
1626         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1627             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1628             llvm::makeArrayRef(Args));
1629         SingleOpGen(CGF);
1630         if (DidIt.isValid()) {
1631           // did_it = 1;
1632           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1633         }
1634       });
1635   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1636   // <copy_func>, did_it);
1637   if (DidIt.isValid()) {
1638     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1639     auto CopyprivateArrayTy =
1640         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1641                                /*IndexTypeQuals=*/0);
1642     // Create a list of all private variables for copyprivate.
1643     Address CopyprivateList =
1644         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1645     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1646       Address Elem = CGF.Builder.CreateConstArrayGEP(
1647           CopyprivateList, I, CGF.getPointerSize());
1648       CGF.Builder.CreateStore(
1649           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1650               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1651           Elem);
1652     }
1653     // Build function that copies private values from single region to all other
1654     // threads in the corresponding parallel region.
1655     auto *CpyFn = emitCopyprivateCopyFunction(
1656         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1657         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1658     auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
1659     Address CL =
1660       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1661                                                       CGF.VoidPtrTy);
1662     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1663     llvm::Value *Args[] = {
1664         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1665         getThreadID(CGF, Loc),        // i32 <gtid>
1666         BufSize,                      // size_t <buf_size>
1667         CL.getPointer(),              // void *<copyprivate list>
1668         CpyFn,                        // void (*) (void *, void *) <copy_func>
1669         DidItVal                      // i32 did_it
1670     };
1671     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1672   }
1673 }
1674
1675 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1676                                         const RegionCodeGenTy &OrderedOpGen,
1677                                         SourceLocation Loc, bool IsThreads) {
1678   if (!CGF.HaveInsertPoint())
1679     return;
1680   // __kmpc_ordered(ident_t *, gtid);
1681   // OrderedOpGen();
1682   // __kmpc_end_ordered(ident_t *, gtid);
1683   // Prepare arguments and build a call to __kmpc_ordered
1684   CodeGenFunction::RunCleanupsScope Scope(CGF);
1685   if (IsThreads) {
1686     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1687     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1688     // Build a call to __kmpc_end_ordered
1689     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1690         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1691         llvm::makeArrayRef(Args));
1692   }
1693   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1694 }
1695
1696 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1697                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1698                                       bool ForceSimpleCall) {
1699   if (!CGF.HaveInsertPoint())
1700     return;
1701   // Build call __kmpc_cancel_barrier(loc, thread_id);
1702   // Build call __kmpc_barrier(loc, thread_id);
1703   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1704   if (Kind == OMPD_for) {
1705     Flags =
1706         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1707   } else if (Kind == OMPD_sections) {
1708     Flags = static_cast<OpenMPLocationFlags>(Flags |
1709                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1710   } else if (Kind == OMPD_single) {
1711     Flags =
1712         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1713   } else if (Kind == OMPD_barrier) {
1714     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1715   } else {
1716     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1717   }
1718   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1719   // thread_id);
1720   auto *OMPRegionInfo =
1721       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
1722   // Do not emit barrier call in the single directive emitted in some rare cases
1723   // for sections directives.
1724   if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
1725     return;
1726   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1727                          getThreadID(CGF, Loc)};
1728   if (OMPRegionInfo) {
1729     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1730       auto *Result = CGF.EmitRuntimeCall(
1731           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1732       if (EmitChecks) {
1733         // if (__kmpc_cancel_barrier()) {
1734         //   exit from construct;
1735         // }
1736         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1737         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1738         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1739         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1740         CGF.EmitBlock(ExitBB);
1741         //   exit from construct;
1742         auto CancelDestination =
1743             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1744         CGF.EmitBranchThroughCleanup(CancelDestination);
1745         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1746       }
1747       return;
1748     }
1749   }
1750   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1751 }
1752
1753 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1754 /// the enum sched_type in kmp.h).
1755 enum OpenMPSchedType {
1756   /// \brief Lower bound for default (unordered) versions.
1757   OMP_sch_lower = 32,
1758   OMP_sch_static_chunked = 33,
1759   OMP_sch_static = 34,
1760   OMP_sch_dynamic_chunked = 35,
1761   OMP_sch_guided_chunked = 36,
1762   OMP_sch_runtime = 37,
1763   OMP_sch_auto = 38,
1764   /// \brief Lower bound for 'ordered' versions.
1765   OMP_ord_lower = 64,
1766   OMP_ord_static_chunked = 65,
1767   OMP_ord_static = 66,
1768   OMP_ord_dynamic_chunked = 67,
1769   OMP_ord_guided_chunked = 68,
1770   OMP_ord_runtime = 69,
1771   OMP_ord_auto = 70,
1772   OMP_sch_default = OMP_sch_static,
1773 };
1774
1775 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1776 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1777                                           bool Chunked, bool Ordered) {
1778   switch (ScheduleKind) {
1779   case OMPC_SCHEDULE_static:
1780     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1781                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1782   case OMPC_SCHEDULE_dynamic:
1783     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1784   case OMPC_SCHEDULE_guided:
1785     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1786   case OMPC_SCHEDULE_runtime:
1787     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1788   case OMPC_SCHEDULE_auto:
1789     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1790   case OMPC_SCHEDULE_unknown:
1791     assert(!Chunked && "chunk was specified but schedule kind not known");
1792     return Ordered ? OMP_ord_static : OMP_sch_static;
1793   }
1794   llvm_unreachable("Unexpected runtime schedule");
1795 }
1796
1797 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1798                                          bool Chunked) const {
1799   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1800   return Schedule == OMP_sch_static;
1801 }
1802
1803 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1804   auto Schedule =
1805       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1806   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1807   return Schedule != OMP_sch_static;
1808 }
1809
1810 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1811                                           SourceLocation Loc,
1812                                           OpenMPScheduleClauseKind ScheduleKind,
1813                                           unsigned IVSize, bool IVSigned,
1814                                           bool Ordered, llvm::Value *UB,
1815                                           llvm::Value *Chunk) {
1816   if (!CGF.HaveInsertPoint())
1817     return;
1818   OpenMPSchedType Schedule =
1819       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1820   assert(Ordered ||
1821          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1822           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1823   // Call __kmpc_dispatch_init(
1824   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1825   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1826   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1827
1828   // If the Chunk was not specified in the clause - use default value 1.
1829   if (Chunk == nullptr)
1830     Chunk = CGF.Builder.getIntN(IVSize, 1);
1831   llvm::Value *Args[] = {
1832     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1833     getThreadID(CGF, Loc),
1834     CGF.Builder.getInt32(Schedule), // Schedule type
1835     CGF.Builder.getIntN(IVSize, 0), // Lower
1836     UB,                             // Upper
1837     CGF.Builder.getIntN(IVSize, 1), // Stride
1838     Chunk                           // Chunk
1839   };
1840   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1841 }
1842
1843 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1844                                         SourceLocation Loc,
1845                                         OpenMPScheduleClauseKind ScheduleKind,
1846                                         unsigned IVSize, bool IVSigned,
1847                                         bool Ordered, Address IL, Address LB,
1848                                         Address UB, Address ST,
1849                                         llvm::Value *Chunk) {
1850   if (!CGF.HaveInsertPoint())
1851     return;
1852   OpenMPSchedType Schedule =
1853     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1854   assert(!Ordered);
1855   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1856          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1857
1858   // Call __kmpc_for_static_init(
1859   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1860   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1861   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1862   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1863   if (Chunk == nullptr) {
1864     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1865            "expected static non-chunked schedule");
1866     // If the Chunk was not specified in the clause - use default value 1.
1867       Chunk = CGF.Builder.getIntN(IVSize, 1);
1868   } else {
1869     assert((Schedule == OMP_sch_static_chunked ||
1870             Schedule == OMP_ord_static_chunked) &&
1871            "expected static chunked schedule");
1872   }
1873   llvm::Value *Args[] = {
1874     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1875     getThreadID(CGF, Loc),
1876     CGF.Builder.getInt32(Schedule), // Schedule type
1877     IL.getPointer(),                // &isLastIter
1878     LB.getPointer(),                // &LB
1879     UB.getPointer(),                // &UB
1880     ST.getPointer(),                // &Stride
1881     CGF.Builder.getIntN(IVSize, 1), // Incr
1882     Chunk                           // Chunk
1883   };
1884   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1885 }
1886
1887 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1888                                           SourceLocation Loc) {
1889   if (!CGF.HaveInsertPoint())
1890     return;
1891   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1892   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1893                          getThreadID(CGF, Loc)};
1894   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1895                       Args);
1896 }
1897
1898 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1899                                                  SourceLocation Loc,
1900                                                  unsigned IVSize,
1901                                                  bool IVSigned) {
1902   if (!CGF.HaveInsertPoint())
1903     return;
1904   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1905   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1906                          getThreadID(CGF, Loc)};
1907   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1908 }
1909
1910 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1911                                           SourceLocation Loc, unsigned IVSize,
1912                                           bool IVSigned, Address IL,
1913                                           Address LB, Address UB,
1914                                           Address ST) {
1915   // Call __kmpc_dispatch_next(
1916   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1917   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1918   //          kmp_int[32|64] *p_stride);
1919   llvm::Value *Args[] = {
1920       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1921       IL.getPointer(), // &isLastIter
1922       LB.getPointer(), // &Lower
1923       UB.getPointer(), // &Upper
1924       ST.getPointer()  // &Stride
1925   };
1926   llvm::Value *Call =
1927       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1928   return CGF.EmitScalarConversion(
1929       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1930       CGF.getContext().BoolTy, Loc);
1931 }
1932
1933 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1934                                            llvm::Value *NumThreads,
1935                                            SourceLocation Loc) {
1936   if (!CGF.HaveInsertPoint())
1937     return;
1938   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1939   llvm::Value *Args[] = {
1940       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1941       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1942   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1943                       Args);
1944 }
1945
1946 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1947                                          OpenMPProcBindClauseKind ProcBind,
1948                                          SourceLocation Loc) {
1949   if (!CGF.HaveInsertPoint())
1950     return;
1951   // Constants for proc bind value accepted by the runtime.
1952   enum ProcBindTy {
1953     ProcBindFalse = 0,
1954     ProcBindTrue,
1955     ProcBindMaster,
1956     ProcBindClose,
1957     ProcBindSpread,
1958     ProcBindIntel,
1959     ProcBindDefault
1960   } RuntimeProcBind;
1961   switch (ProcBind) {
1962   case OMPC_PROC_BIND_master:
1963     RuntimeProcBind = ProcBindMaster;
1964     break;
1965   case OMPC_PROC_BIND_close:
1966     RuntimeProcBind = ProcBindClose;
1967     break;
1968   case OMPC_PROC_BIND_spread:
1969     RuntimeProcBind = ProcBindSpread;
1970     break;
1971   case OMPC_PROC_BIND_unknown:
1972     llvm_unreachable("Unsupported proc_bind value.");
1973   }
1974   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1975   llvm::Value *Args[] = {
1976       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1977       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1978   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1979 }
1980
1981 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1982                                 SourceLocation Loc) {
1983   if (!CGF.HaveInsertPoint())
1984     return;
1985   // Build call void __kmpc_flush(ident_t *loc)
1986   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1987                       emitUpdateLocation(CGF, Loc));
1988 }
1989
1990 namespace {
1991 /// \brief Indexes of fields for type kmp_task_t.
1992 enum KmpTaskTFields {
1993   /// \brief List of shared variables.
1994   KmpTaskTShareds,
1995   /// \brief Task routine.
1996   KmpTaskTRoutine,
1997   /// \brief Partition id for the untied tasks.
1998   KmpTaskTPartId,
1999   /// \brief Function with call of destructors for private variables.
2000   KmpTaskTDestructors,
2001 };
2002 } // anonymous namespace
2003
2004 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2005   // FIXME: Add other entries type when they become supported.
2006   return OffloadEntriesTargetRegion.empty();
2007 }
2008
2009 /// \brief Initialize target region entry.
2010 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2011     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2012                                     StringRef ParentName, unsigned LineNum,
2013                                     unsigned ColNum, unsigned Order) {
2014   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2015                                              "only required for the device "
2016                                              "code generation.");
2017   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
2018       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2019   ++OffloadingEntriesNum;
2020 }
2021
2022 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2023     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2024                                   StringRef ParentName, unsigned LineNum,
2025                                   unsigned ColNum, llvm::Constant *Addr,
2026                                   llvm::Constant *ID) {
2027   // If we are emitting code for a target, the entry is already initialized,
2028   // only has to be registered.
2029   if (CGM.getLangOpts().OpenMPIsDevice) {
2030     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2031                                     ColNum) &&
2032            "Entry must exist.");
2033     auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName]
2034                                             [LineNum][ColNum];
2035     assert(Entry.isValid() && "Entry not initialized!");
2036     Entry.setAddress(Addr);
2037     Entry.setID(ID);
2038     return;
2039   } else {
2040     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2041     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
2042         Entry;
2043   }
2044 }
2045
2046 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2047     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2048     unsigned ColNum) const {
2049   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2050   if (PerDevice == OffloadEntriesTargetRegion.end())
2051     return false;
2052   auto PerFile = PerDevice->second.find(FileID);
2053   if (PerFile == PerDevice->second.end())
2054     return false;
2055   auto PerParentName = PerFile->second.find(ParentName);
2056   if (PerParentName == PerFile->second.end())
2057     return false;
2058   auto PerLine = PerParentName->second.find(LineNum);
2059   if (PerLine == PerParentName->second.end())
2060     return false;
2061   auto PerColumn = PerLine->second.find(ColNum);
2062   if (PerColumn == PerLine->second.end())
2063     return false;
2064   // Fail if this entry is already registered.
2065   if (PerColumn->second.getAddress() || PerColumn->second.getID())
2066     return false;
2067   return true;
2068 }
2069
2070 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2071     const OffloadTargetRegionEntryInfoActTy &Action) {
2072   // Scan all target region entries and perform the provided action.
2073   for (auto &D : OffloadEntriesTargetRegion)
2074     for (auto &F : D.second)
2075       for (auto &P : F.second)
2076         for (auto &L : P.second)
2077           for (auto &C : L.second)
2078             Action(D.first, F.first, P.first(), L.first, C.first, C.second);
2079 }
2080
2081 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2082 /// \a Codegen. This is used to emit the two functions that register and
2083 /// unregister the descriptor of the current compilation unit.
2084 static llvm::Function *
2085 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2086                                          const RegionCodeGenTy &Codegen) {
2087   auto &C = CGM.getContext();
2088   FunctionArgList Args;
2089   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2090                              /*Id=*/nullptr, C.VoidPtrTy);
2091   Args.push_back(&DummyPtr);
2092
2093   CodeGenFunction CGF(CGM);
2094   GlobalDecl();
2095   auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2096       C.VoidTy, Args, FunctionType::ExtInfo(),
2097       /*isVariadic=*/false);
2098   auto FTy = CGM.getTypes().GetFunctionType(FI);
2099   auto *Fn =
2100       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2101   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2102   Codegen(CGF);
2103   CGF.FinishFunction();
2104   return Fn;
2105 }
2106
2107 llvm::Function *
2108 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2109
2110   // If we don't have entries or if we are emitting code for the device, we
2111   // don't need to do anything.
2112   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2113     return nullptr;
2114
2115   auto &M = CGM.getModule();
2116   auto &C = CGM.getContext();
2117
2118   // Get list of devices we care about
2119   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2120
2121   // We should be creating an offloading descriptor only if there are devices
2122   // specified.
2123   assert(!Devices.empty() && "No OpenMP offloading devices??");
2124
2125   // Create the external variables that will point to the begin and end of the
2126   // host entries section. These will be defined by the linker.
2127   auto *OffloadEntryTy =
2128       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2129   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2130       M, OffloadEntryTy, /*isConstant=*/true,
2131       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
2132       ".omp_offloading.entries_begin");
2133   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2134       M, OffloadEntryTy, /*isConstant=*/true,
2135       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
2136       ".omp_offloading.entries_end");
2137
2138   // Create all device images
2139   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2140   auto *DeviceImageTy = cast<llvm::StructType>(
2141       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2142
2143   for (unsigned i = 0; i < Devices.size(); ++i) {
2144     StringRef T = Devices[i].getTriple();
2145     auto *ImgBegin = new llvm::GlobalVariable(
2146         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2147         /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T));
2148     auto *ImgEnd = new llvm::GlobalVariable(
2149         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2150         /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T));
2151
2152     llvm::Constant *Dev =
2153         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2154                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2155     DeviceImagesEntires.push_back(Dev);
2156   }
2157
2158   // Create device images global array.
2159   llvm::ArrayType *DeviceImagesInitTy =
2160       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2161   llvm::Constant *DeviceImagesInit =
2162       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2163
2164   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2165       M, DeviceImagesInitTy, /*isConstant=*/true,
2166       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2167       ".omp_offloading.device_images");
2168   DeviceImages->setUnnamedAddr(true);
2169
2170   // This is a Zero array to be used in the creation of the constant expressions
2171   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2172                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2173
2174   // Create the target region descriptor.
2175   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2176       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2177   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2178       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2179       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2180                                            Index),
2181       HostEntriesBegin, HostEntriesEnd, nullptr);
2182
2183   auto *Desc = new llvm::GlobalVariable(
2184       M, BinaryDescriptorTy, /*isConstant=*/true,
2185       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2186       ".omp_offloading.descriptor");
2187
2188   // Emit code to register or unregister the descriptor at execution
2189   // startup or closing, respectively.
2190
2191   // Create a variable to drive the registration and unregistration of the
2192   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2193   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2194   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2195                                 IdentInfo, C.CharTy);
2196
2197   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2198       CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) {
2199         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2200                              Desc);
2201       });
2202   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2203       CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) {
2204         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2205                              Desc);
2206         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2207       });
2208   return RegFn;
2209 }
2210
2211 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
2212                                          uint64_t Size) {
2213   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2214       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2215   llvm::LLVMContext &C = CGM.getModule().getContext();
2216   llvm::Module &M = CGM.getModule();
2217
2218   // Make sure the address has the right type.
2219   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy);
2220
2221   // Create constant string with the name.
2222   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2223
2224   llvm::GlobalVariable *Str =
2225       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2226                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2227                                ".omp_offloading.entry_name");
2228   Str->setUnnamedAddr(true);
2229   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2230
2231   // Create the entry struct.
2232   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2233       TgtOffloadEntryType, AddrPtr, StrPtr,
2234       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2235   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2236       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2237       EntryInit, ".omp_offloading.entry");
2238
2239   // The entry has to be created in the section the linker expects it to be.
2240   Entry->setSection(".omp_offloading.entries");
2241   // We can't have any padding between symbols, so we need to have 1-byte
2242   // alignment.
2243   Entry->setAlignment(1);
2244   return;
2245 }
2246
2247 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2248   // Emit the offloading entries and metadata so that the device codegen side
2249   // can
2250   // easily figure out what to emit. The produced metadata looks like this:
2251   //
2252   // !omp_offload.info = !{!1, ...}
2253   //
2254   // Right now we only generate metadata for function that contain target
2255   // regions.
2256
2257   // If we do not have entries, we dont need to do anything.
2258   if (OffloadEntriesInfoManager.empty())
2259     return;
2260
2261   llvm::Module &M = CGM.getModule();
2262   llvm::LLVMContext &C = M.getContext();
2263   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2264       OrderedEntries(OffloadEntriesInfoManager.size());
2265
2266   // Create the offloading info metadata node.
2267   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2268
2269   // Auxiliar methods to create metadata values and strings.
2270   auto getMDInt = [&](unsigned v) {
2271     return llvm::ConstantAsMetadata::get(
2272         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2273   };
2274
2275   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2276
2277   // Create function that emits metadata for each target region entry;
2278   auto &&TargetRegionMetadataEmitter = [&](
2279       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2280       unsigned Column,
2281       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2282     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2283     // Generate metadata for target regions. Each entry of this metadata
2284     // contains:
2285     // - Entry 0 -> Kind of this type of metadata (0).
2286     // - Entry 1 -> Device ID of the file where the entry was identified.
2287     // - Entry 2 -> File ID of the file where the entry was identified.
2288     // - Entry 3 -> Mangled name of the function where the entry was identified.
2289     // - Entry 4 -> Line in the file where the entry was identified.
2290     // - Entry 5 -> Column in the file where the entry was identified.
2291     // - Entry 6 -> Order the entry was created.
2292     // The first element of the metadata node is the kind.
2293     Ops.push_back(getMDInt(E.getKind()));
2294     Ops.push_back(getMDInt(DeviceID));
2295     Ops.push_back(getMDInt(FileID));
2296     Ops.push_back(getMDString(ParentName));
2297     Ops.push_back(getMDInt(Line));
2298     Ops.push_back(getMDInt(Column));
2299     Ops.push_back(getMDInt(E.getOrder()));
2300
2301     // Save this entry in the right position of the ordered entries array.
2302     OrderedEntries[E.getOrder()] = &E;
2303
2304     // Add metadata to the named metadata node.
2305     MD->addOperand(llvm::MDNode::get(C, Ops));
2306   };
2307
2308   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2309       TargetRegionMetadataEmitter);
2310
2311   for (auto *E : OrderedEntries) {
2312     assert(E && "All ordered entries must exist!");
2313     if (auto *CE =
2314             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2315                 E)) {
2316       assert(CE->getID() && CE->getAddress() &&
2317              "Entry ID and Addr are invalid!");
2318       createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0);
2319     } else
2320       llvm_unreachable("Unsupported entry kind.");
2321   }
2322 }
2323
2324 /// \brief Loads all the offload entries information from the host IR
2325 /// metadata.
2326 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
2327   // If we are in target mode, load the metadata from the host IR. This code has
2328   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2329
2330   if (!CGM.getLangOpts().OpenMPIsDevice)
2331     return;
2332
2333   if (CGM.getLangOpts().OMPHostIRFile.empty())
2334     return;
2335
2336   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
2337   if (Buf.getError())
2338     return;
2339
2340   llvm::LLVMContext C;
2341   auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
2342
2343   if (ME.getError())
2344     return;
2345
2346   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
2347   if (!MD)
2348     return;
2349
2350   for (auto I : MD->operands()) {
2351     llvm::MDNode *MN = cast<llvm::MDNode>(I);
2352
2353     auto getMDInt = [&](unsigned Idx) {
2354       llvm::ConstantAsMetadata *V =
2355           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
2356       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
2357     };
2358
2359     auto getMDString = [&](unsigned Idx) {
2360       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
2361       return V->getString();
2362     };
2363
2364     switch (getMDInt(0)) {
2365     default:
2366       llvm_unreachable("Unexpected metadata!");
2367       break;
2368     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
2369         OFFLOAD_ENTRY_INFO_TARGET_REGION:
2370       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
2371           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
2372           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
2373           /*Column=*/getMDInt(5), /*Order=*/getMDInt(6));
2374       break;
2375     }
2376   }
2377 }
2378
2379 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2380   if (!KmpRoutineEntryPtrTy) {
2381     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2382     auto &C = CGM.getContext();
2383     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2384     FunctionProtoType::ExtProtoInfo EPI;
2385     KmpRoutineEntryPtrQTy = C.getPointerType(
2386         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2387     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2388   }
2389 }
2390
2391 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
2392                                        QualType FieldTy) {
2393   auto *Field = FieldDecl::Create(
2394       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
2395       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
2396       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
2397   Field->setAccess(AS_public);
2398   DC->addDecl(Field);
2399   return Field;
2400 }
2401
2402 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
2403
2404   // Make sure the type of the entry is already created. This is the type we
2405   // have to create:
2406   // struct __tgt_offload_entry{
2407   //   void      *addr;       // Pointer to the offload entry info.
2408   //                          // (function or global)
2409   //   char      *name;       // Name of the function or global.
2410   //   size_t     size;       // Size of the entry info (0 if it a function).
2411   // };
2412   if (TgtOffloadEntryQTy.isNull()) {
2413     ASTContext &C = CGM.getContext();
2414     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
2415     RD->startDefinition();
2416     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2417     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
2418     addFieldToRecordDecl(C, RD, C.getSizeType());
2419     RD->completeDefinition();
2420     TgtOffloadEntryQTy = C.getRecordType(RD);
2421   }
2422   return TgtOffloadEntryQTy;
2423 }
2424
2425 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
2426   // These are the types we need to build:
2427   // struct __tgt_device_image{
2428   // void   *ImageStart;       // Pointer to the target code start.
2429   // void   *ImageEnd;         // Pointer to the target code end.
2430   // // We also add the host entries to the device image, as it may be useful
2431   // // for the target runtime to have access to that information.
2432   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
2433   //                                       // the entries.
2434   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2435   //                                       // entries (non inclusive).
2436   // };
2437   if (TgtDeviceImageQTy.isNull()) {
2438     ASTContext &C = CGM.getContext();
2439     auto *RD = C.buildImplicitRecord("__tgt_device_image");
2440     RD->startDefinition();
2441     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2442     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2443     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2444     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2445     RD->completeDefinition();
2446     TgtDeviceImageQTy = C.getRecordType(RD);
2447   }
2448   return TgtDeviceImageQTy;
2449 }
2450
2451 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
2452   // struct __tgt_bin_desc{
2453   //   int32_t              NumDevices;      // Number of devices supported.
2454   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
2455   //                                         // (one per device).
2456   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
2457   //                                         // entries.
2458   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2459   //                                         // entries (non inclusive).
2460   // };
2461   if (TgtBinaryDescriptorQTy.isNull()) {
2462     ASTContext &C = CGM.getContext();
2463     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
2464     RD->startDefinition();
2465     addFieldToRecordDecl(
2466         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
2467     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
2468     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2469     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2470     RD->completeDefinition();
2471     TgtBinaryDescriptorQTy = C.getRecordType(RD);
2472   }
2473   return TgtBinaryDescriptorQTy;
2474 }
2475
2476 namespace {
2477 struct PrivateHelpersTy {
2478   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
2479                    const VarDecl *PrivateElemInit)
2480       : Original(Original), PrivateCopy(PrivateCopy),
2481         PrivateElemInit(PrivateElemInit) {}
2482   const VarDecl *Original;
2483   const VarDecl *PrivateCopy;
2484   const VarDecl *PrivateElemInit;
2485 };
2486 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2487 } // anonymous namespace
2488
2489 static RecordDecl *
2490 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2491   if (!Privates.empty()) {
2492     auto &C = CGM.getContext();
2493     // Build struct .kmp_privates_t. {
2494     //         /*  private vars  */
2495     //       };
2496     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
2497     RD->startDefinition();
2498     for (auto &&Pair : Privates) {
2499       auto *VD = Pair.second.Original;
2500       auto Type = VD->getType();
2501       Type = Type.getNonReferenceType();
2502       auto *FD = addFieldToRecordDecl(C, RD, Type);
2503       if (VD->hasAttrs()) {
2504         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2505              E(VD->getAttrs().end());
2506              I != E; ++I)
2507           FD->addAttr(*I);
2508       }
2509     }
2510     RD->completeDefinition();
2511     return RD;
2512   }
2513   return nullptr;
2514 }
2515
2516 static RecordDecl *
2517 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
2518                          QualType KmpRoutineEntryPointerQTy) {
2519   auto &C = CGM.getContext();
2520   // Build struct kmp_task_t {
2521   //         void *              shareds;
2522   //         kmp_routine_entry_t routine;
2523   //         kmp_int32           part_id;
2524   //         kmp_routine_entry_t destructors;
2525   //       };
2526   auto *RD = C.buildImplicitRecord("kmp_task_t");
2527   RD->startDefinition();
2528   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2529   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2530   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2531   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2532   RD->completeDefinition();
2533   return RD;
2534 }
2535
2536 static RecordDecl *
2537 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2538                                      ArrayRef<PrivateDataTy> Privates) {
2539   auto &C = CGM.getContext();
2540   // Build struct kmp_task_t_with_privates {
2541   //         kmp_task_t task_data;
2542   //         .kmp_privates_t. privates;
2543   //       };
2544   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2545   RD->startDefinition();
2546   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2547   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2548     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2549   }
2550   RD->completeDefinition();
2551   return RD;
2552 }
2553
2554 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2555 /// argument.
2556 /// \code
2557 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2558 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2559 ///   tt->shareds);
2560 ///   return 0;
2561 /// }
2562 /// \endcode
2563 static llvm::Value *
2564 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2565                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2566                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2567                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2568                       llvm::Value *TaskPrivatesMap) {
2569   auto &C = CGM.getContext();
2570   FunctionArgList Args;
2571   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2572   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2573                                 /*Id=*/nullptr,
2574                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2575   Args.push_back(&GtidArg);
2576   Args.push_back(&TaskTypeArg);
2577   FunctionType::ExtInfo Info;
2578   auto &TaskEntryFnInfo =
2579       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2580                                                     /*isVariadic=*/false);
2581   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2582   auto *TaskEntry =
2583       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2584                              ".omp_task_entry.", &CGM.getModule());
2585   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
2586   CodeGenFunction CGF(CGM);
2587   CGF.disableDebugInfo();
2588   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2589
2590   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2591   // tt->task_data.shareds);
2592   auto *GtidParam = CGF.EmitLoadOfScalar(
2593       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2594   LValue TDBase = emitLoadOfPointerLValue(
2595       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2596   auto *KmpTaskTWithPrivatesQTyRD =
2597       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2598   LValue Base =
2599       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2600   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2601   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2602   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2603   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2604
2605   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2606   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2607   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2608       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2609       CGF.ConvertTypeForMem(SharedsPtrTy));
2610
2611   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2612   llvm::Value *PrivatesParam;
2613   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2614     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2615     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2616         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2617   } else {
2618     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2619   }
2620
2621   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2622                              TaskPrivatesMap, SharedsParam};
2623   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2624   CGF.EmitStoreThroughLValue(
2625       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2626       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2627   CGF.FinishFunction();
2628   return TaskEntry;
2629 }
2630
2631 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2632                                             SourceLocation Loc,
2633                                             QualType KmpInt32Ty,
2634                                             QualType KmpTaskTWithPrivatesPtrQTy,
2635                                             QualType KmpTaskTWithPrivatesQTy) {
2636   auto &C = CGM.getContext();
2637   FunctionArgList Args;
2638   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2639   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2640                                 /*Id=*/nullptr,
2641                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2642   Args.push_back(&GtidArg);
2643   Args.push_back(&TaskTypeArg);
2644   FunctionType::ExtInfo Info;
2645   auto &DestructorFnInfo =
2646       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2647                                                     /*isVariadic=*/false);
2648   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2649   auto *DestructorFn =
2650       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2651                              ".omp_task_destructor.", &CGM.getModule());
2652   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
2653                                     DestructorFnInfo);
2654   CodeGenFunction CGF(CGM);
2655   CGF.disableDebugInfo();
2656   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2657                     Args);
2658
2659   LValue Base = emitLoadOfPointerLValue(
2660       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2661   auto *KmpTaskTWithPrivatesQTyRD =
2662       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2663   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2664   Base = CGF.EmitLValueForField(Base, *FI);
2665   for (auto *Field :
2666        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2667     if (auto DtorKind = Field->getType().isDestructedType()) {
2668       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2669       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2670     }
2671   }
2672   CGF.FinishFunction();
2673   return DestructorFn;
2674 }
2675
2676 /// \brief Emit a privates mapping function for correct handling of private and
2677 /// firstprivate variables.
2678 /// \code
2679 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2680 /// **noalias priv1,...,  <tyn> **noalias privn) {
2681 ///   *priv1 = &.privates.priv1;
2682 ///   ...;
2683 ///   *privn = &.privates.privn;
2684 /// }
2685 /// \endcode
2686 static llvm::Value *
2687 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2688                                ArrayRef<const Expr *> PrivateVars,
2689                                ArrayRef<const Expr *> FirstprivateVars,
2690                                QualType PrivatesQTy,
2691                                ArrayRef<PrivateDataTy> Privates) {
2692   auto &C = CGM.getContext();
2693   FunctionArgList Args;
2694   ImplicitParamDecl TaskPrivatesArg(
2695       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2696       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2697   Args.push_back(&TaskPrivatesArg);
2698   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2699   unsigned Counter = 1;
2700   for (auto *E: PrivateVars) {
2701     Args.push_back(ImplicitParamDecl::Create(
2702         C, /*DC=*/nullptr, Loc,
2703         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2704                             .withConst()
2705                             .withRestrict()));
2706     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2707     PrivateVarsPos[VD] = Counter;
2708     ++Counter;
2709   }
2710   for (auto *E : FirstprivateVars) {
2711     Args.push_back(ImplicitParamDecl::Create(
2712         C, /*DC=*/nullptr, Loc,
2713         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2714                             .withConst()
2715                             .withRestrict()));
2716     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2717     PrivateVarsPos[VD] = Counter;
2718     ++Counter;
2719   }
2720   FunctionType::ExtInfo Info;
2721   auto &TaskPrivatesMapFnInfo =
2722       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2723                                                     /*isVariadic=*/false);
2724   auto *TaskPrivatesMapTy =
2725       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2726   auto *TaskPrivatesMap = llvm::Function::Create(
2727       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2728       ".omp_task_privates_map.", &CGM.getModule());
2729   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
2730                                     TaskPrivatesMapFnInfo);
2731   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2732   CodeGenFunction CGF(CGM);
2733   CGF.disableDebugInfo();
2734   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2735                     TaskPrivatesMapFnInfo, Args);
2736
2737   // *privi = &.privates.privi;
2738   LValue Base = emitLoadOfPointerLValue(
2739       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2740   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2741   Counter = 0;
2742   for (auto *Field : PrivatesQTyRD->fields()) {
2743     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2744     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2745     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2746     auto RefLoadLVal =
2747         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2748     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2749     ++Counter;
2750   }
2751   CGF.FinishFunction();
2752   return TaskPrivatesMap;
2753 }
2754
2755 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2756                                      const PrivateDataTy *P2) {
2757   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2758 }
2759
2760 void CGOpenMPRuntime::emitTaskCall(
2761     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2762     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2763     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2764     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2765     ArrayRef<const Expr *> PrivateCopies,
2766     ArrayRef<const Expr *> FirstprivateVars,
2767     ArrayRef<const Expr *> FirstprivateCopies,
2768     ArrayRef<const Expr *> FirstprivateInits,
2769     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2770   if (!CGF.HaveInsertPoint())
2771     return;
2772   auto &C = CGM.getContext();
2773   llvm::SmallVector<PrivateDataTy, 8> Privates;
2774   // Aggregate privates and sort them by the alignment.
2775   auto I = PrivateCopies.begin();
2776   for (auto *E : PrivateVars) {
2777     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2778     Privates.push_back(std::make_pair(
2779         C.getDeclAlign(VD),
2780         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2781                          /*PrivateElemInit=*/nullptr)));
2782     ++I;
2783   }
2784   I = FirstprivateCopies.begin();
2785   auto IElemInitRef = FirstprivateInits.begin();
2786   for (auto *E : FirstprivateVars) {
2787     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2788     Privates.push_back(std::make_pair(
2789         C.getDeclAlign(VD),
2790         PrivateHelpersTy(
2791             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2792             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2793     ++I, ++IElemInitRef;
2794   }
2795   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2796                        array_pod_sort_comparator);
2797   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2798   // Build type kmp_routine_entry_t (if not built yet).
2799   emitKmpRoutineEntryT(KmpInt32Ty);
2800   // Build type kmp_task_t (if not built yet).
2801   if (KmpTaskTQTy.isNull()) {
2802     KmpTaskTQTy = C.getRecordType(
2803         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2804   }
2805   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2806   // Build particular struct kmp_task_t for the given task.
2807   auto *KmpTaskTWithPrivatesQTyRD =
2808       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2809   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2810   QualType KmpTaskTWithPrivatesPtrQTy =
2811       C.getPointerType(KmpTaskTWithPrivatesQTy);
2812   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2813   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2814   auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
2815   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2816
2817   // Emit initial values for private copies (if any).
2818   llvm::Value *TaskPrivatesMap = nullptr;
2819   auto *TaskPrivatesMapTy =
2820       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2821                 3)
2822           ->getType();
2823   if (!Privates.empty()) {
2824     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2825     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2826         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2827     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2828         TaskPrivatesMap, TaskPrivatesMapTy);
2829   } else {
2830     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2831         cast<llvm::PointerType>(TaskPrivatesMapTy));
2832   }
2833   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2834   // kmp_task_t *tt);
2835   auto *TaskEntry = emitProxyTaskFunction(
2836       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2837       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2838
2839   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2840   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2841   // kmp_routine_entry_t *task_entry);
2842   // Task flags. Format is taken from
2843   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2844   // description of kmp_tasking_flags struct.
2845   const unsigned TiedFlag = 0x1;
2846   const unsigned FinalFlag = 0x2;
2847   unsigned Flags = Tied ? TiedFlag : 0;
2848   auto *TaskFlags =
2849       Final.getPointer()
2850           ? CGF.Builder.CreateSelect(Final.getPointer(),
2851                                      CGF.Builder.getInt32(FinalFlag),
2852                                      CGF.Builder.getInt32(/*C=*/0))
2853           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2854   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2855   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
2856   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
2857                               getThreadID(CGF, Loc), TaskFlags,
2858                               KmpTaskTWithPrivatesTySize, SharedsSize,
2859                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2860                                   TaskEntry, KmpRoutineEntryPtrTy)};
2861   auto *NewTask = CGF.EmitRuntimeCall(
2862       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2863   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2864       NewTask, KmpTaskTWithPrivatesPtrTy);
2865   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2866                                                KmpTaskTWithPrivatesQTy);
2867   LValue TDBase =
2868       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2869   // Fill the data in the resulting kmp_task_t record.
2870   // Copy shareds if there are any.
2871   Address KmpTaskSharedsPtr = Address::invalid();
2872   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2873     KmpTaskSharedsPtr =
2874         Address(CGF.EmitLoadOfScalar(
2875                     CGF.EmitLValueForField(
2876                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2877                                            KmpTaskTShareds)),
2878                     Loc),
2879                 CGF.getNaturalTypeAlignment(SharedsTy));
2880     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2881   }
2882   // Emit initial values for private copies (if any).
2883   bool NeedsCleanup = false;
2884   if (!Privates.empty()) {
2885     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2886     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2887     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2888     LValue SharedsBase;
2889     if (!FirstprivateVars.empty()) {
2890       SharedsBase = CGF.MakeAddrLValue(
2891           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2892               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2893           SharedsTy);
2894     }
2895     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2896         cast<CapturedStmt>(*D.getAssociatedStmt()));
2897     for (auto &&Pair : Privates) {
2898       auto *VD = Pair.second.PrivateCopy;
2899       auto *Init = VD->getAnyInitializer();
2900       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2901       if (Init) {
2902         if (auto *Elem = Pair.second.PrivateElemInit) {
2903           auto *OriginalVD = Pair.second.Original;
2904           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2905           auto SharedRefLValue =
2906               CGF.EmitLValueForField(SharedsBase, SharedField);
2907           SharedRefLValue = CGF.MakeAddrLValue(
2908               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2909               SharedRefLValue.getType(), AlignmentSource::Decl);
2910           QualType Type = OriginalVD->getType();
2911           if (Type->isArrayType()) {
2912             // Initialize firstprivate array.
2913             if (!isa<CXXConstructExpr>(Init) ||
2914                 CGF.isTrivialInitializer(Init)) {
2915               // Perform simple memcpy.
2916               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2917                                       SharedRefLValue.getAddress(), Type);
2918             } else {
2919               // Initialize firstprivate array using element-by-element
2920               // intialization.
2921               CGF.EmitOMPAggregateAssign(
2922                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2923                   Type, [&CGF, Elem, Init, &CapturesInfo](
2924                             Address DestElement, Address SrcElement) {
2925                     // Clean up any temporaries needed by the initialization.
2926                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2927                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2928                       return SrcElement;
2929                     });
2930                     (void)InitScope.Privatize();
2931                     // Emit initialization for single element.
2932                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2933                         CGF, &CapturesInfo);
2934                     CGF.EmitAnyExprToMem(Init, DestElement,
2935                                          Init->getType().getQualifiers(),
2936                                          /*IsInitializer=*/false);
2937                   });
2938             }
2939           } else {
2940             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2941             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2942               return SharedRefLValue.getAddress();
2943             });
2944             (void)InitScope.Privatize();
2945             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2946             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2947                                /*capturedByInit=*/false);
2948           }
2949         } else {
2950           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2951         }
2952       }
2953       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2954       ++FI;
2955     }
2956   }
2957   // Provide pointer to function with destructors for privates.
2958   llvm::Value *DestructorFn =
2959       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2960                                              KmpTaskTWithPrivatesPtrQTy,
2961                                              KmpTaskTWithPrivatesQTy)
2962                    : llvm::ConstantPointerNull::get(
2963                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2964   LValue Destructor = CGF.EmitLValueForField(
2965       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2966   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2967                             DestructorFn, KmpRoutineEntryPtrTy),
2968                         Destructor);
2969
2970   // Process list of dependences.
2971   Address DependenciesArray = Address::invalid();
2972   unsigned NumDependencies = Dependences.size();
2973   if (NumDependencies) {
2974     // Dependence kind for RTL.
2975     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
2976     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2977     RecordDecl *KmpDependInfoRD;
2978     QualType FlagsTy =
2979         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
2980     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2981     if (KmpDependInfoTy.isNull()) {
2982       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2983       KmpDependInfoRD->startDefinition();
2984       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2985       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2986       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2987       KmpDependInfoRD->completeDefinition();
2988       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2989     } else {
2990       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2991     }
2992     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2993     // Define type kmp_depend_info[<Dependences.size()>];
2994     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2995         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2996         ArrayType::Normal, /*IndexTypeQuals=*/0);
2997     // kmp_depend_info[<Dependences.size()>] deps;
2998     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2999     for (unsigned i = 0; i < NumDependencies; ++i) {
3000       const Expr *E = Dependences[i].second;
3001       auto Addr = CGF.EmitLValue(E);
3002       llvm::Value *Size;
3003       QualType Ty = E->getType();
3004       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3005         LValue UpAddrLVal =
3006             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3007         llvm::Value *UpAddr =
3008             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3009         llvm::Value *LowIntPtr =
3010             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3011         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3012         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3013       } else
3014         Size = getTypeSize(CGF, Ty);
3015       auto Base = CGF.MakeAddrLValue(
3016           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3017           KmpDependInfoTy);
3018       // deps[i].base_addr = &<Dependences[i].second>;
3019       auto BaseAddrLVal = CGF.EmitLValueForField(
3020           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3021       CGF.EmitStoreOfScalar(
3022           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3023           BaseAddrLVal);
3024       // deps[i].len = sizeof(<Dependences[i].second>);
3025       auto LenLVal = CGF.EmitLValueForField(
3026           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3027       CGF.EmitStoreOfScalar(Size, LenLVal);
3028       // deps[i].flags = <Dependences[i].first>;
3029       RTLDependenceKindTy DepKind;
3030       switch (Dependences[i].first) {
3031       case OMPC_DEPEND_in:
3032         DepKind = DepIn;
3033         break;
3034       // Out and InOut dependencies must use the same code.
3035       case OMPC_DEPEND_out:
3036       case OMPC_DEPEND_inout:
3037         DepKind = DepInOut;
3038         break;
3039       case OMPC_DEPEND_source:
3040       case OMPC_DEPEND_sink:
3041       case OMPC_DEPEND_unknown:
3042         llvm_unreachable("Unknown task dependence type");
3043       }
3044       auto FlagsLVal = CGF.EmitLValueForField(
3045           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3046       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3047                             FlagsLVal);
3048     }
3049     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3050         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3051         CGF.VoidPtrTy);
3052   }
3053
3054   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3055   // libcall.
3056   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
3057   // *new_task);
3058   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3059   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3060   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3061   // list is not empty
3062   auto *ThreadID = getThreadID(CGF, Loc);
3063   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3064   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3065   llvm::Value *DepTaskArgs[7];
3066   if (NumDependencies) {
3067     DepTaskArgs[0] = UpLoc;
3068     DepTaskArgs[1] = ThreadID;
3069     DepTaskArgs[2] = NewTask;
3070     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3071     DepTaskArgs[4] = DependenciesArray.getPointer();
3072     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3073     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3074   }
3075   auto &&ThenCodeGen = [this, NumDependencies,
3076                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
3077     // TODO: add check for untied tasks.    
3078     if (NumDependencies) {
3079       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
3080                           DepTaskArgs);
3081     } else {
3082       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
3083                           TaskArgs);
3084     }
3085   };
3086   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
3087       IfCallEndCleanup;
3088
3089   llvm::Value *DepWaitTaskArgs[6];
3090   if (NumDependencies) {
3091     DepWaitTaskArgs[0] = UpLoc;
3092     DepWaitTaskArgs[1] = ThreadID;
3093     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
3094     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
3095     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
3096     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3097   }
3098   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
3099                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
3100     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
3101     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
3102     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
3103     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
3104     // is specified.
3105     if (NumDependencies)
3106       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
3107                           DepWaitTaskArgs);
3108     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
3109     // kmp_task_t *new_task);
3110     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
3111                         TaskArgs);
3112     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
3113     // kmp_task_t *new_task);
3114     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
3115         NormalAndEHCleanup,
3116         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
3117         llvm::makeArrayRef(TaskArgs));
3118
3119     // Call proxy_task_entry(gtid, new_task);
3120     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
3121     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
3122   };
3123
3124   if (IfCond) {
3125     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
3126   } else {
3127     CodeGenFunction::RunCleanupsScope Scope(CGF);
3128     ThenCodeGen(CGF);
3129   }
3130 }
3131
3132 /// \brief Emit reduction operation for each element of array (required for
3133 /// array sections) LHS op = RHS.
3134 /// \param Type Type of array.
3135 /// \param LHSVar Variable on the left side of the reduction operation
3136 /// (references element of array in original variable).
3137 /// \param RHSVar Variable on the right side of the reduction operation
3138 /// (references element of array in original variable).
3139 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
3140 /// RHSVar.
3141 static void EmitOMPAggregateReduction(
3142     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
3143     const VarDecl *RHSVar,
3144     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
3145                                   const Expr *, const Expr *)> &RedOpGen,
3146     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
3147     const Expr *UpExpr = nullptr) {
3148   // Perform element-by-element initialization.
3149   QualType ElementTy;
3150   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
3151   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
3152
3153   // Drill down to the base element type on both arrays.
3154   auto ArrayTy = Type->getAsArrayTypeUnsafe();
3155   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
3156
3157   auto RHSBegin = RHSAddr.getPointer();
3158   auto LHSBegin = LHSAddr.getPointer();
3159   // Cast from pointer to array type to pointer to single element.
3160   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
3161   // The basic structure here is a while-do loop.
3162   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
3163   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
3164   auto IsEmpty =
3165       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
3166   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
3167
3168   // Enter the loop body, making that address the current address.
3169   auto EntryBB = CGF.Builder.GetInsertBlock();
3170   CGF.EmitBlock(BodyBB);
3171
3172   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
3173
3174   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
3175       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
3176   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
3177   Address RHSElementCurrent =
3178       Address(RHSElementPHI,
3179               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3180
3181   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
3182       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
3183   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
3184   Address LHSElementCurrent =
3185       Address(LHSElementPHI,
3186               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3187
3188   // Emit copy.
3189   CodeGenFunction::OMPPrivateScope Scope(CGF);
3190   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
3191   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
3192   Scope.Privatize();
3193   RedOpGen(CGF, XExpr, EExpr, UpExpr);
3194   Scope.ForceCleanup();
3195
3196   // Shift the address forward by one element.
3197   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
3198       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
3199   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
3200       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
3201   // Check whether we've reached the end.
3202   auto Done =
3203       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
3204   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
3205   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
3206   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
3207
3208   // Done.
3209   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
3210 }
3211
3212 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
3213                                           llvm::Type *ArgsType,
3214                                           ArrayRef<const Expr *> Privates,
3215                                           ArrayRef<const Expr *> LHSExprs,
3216                                           ArrayRef<const Expr *> RHSExprs,
3217                                           ArrayRef<const Expr *> ReductionOps) {
3218   auto &C = CGM.getContext();
3219
3220   // void reduction_func(void *LHSArg, void *RHSArg);
3221   FunctionArgList Args;
3222   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3223                            C.VoidPtrTy);
3224   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3225                            C.VoidPtrTy);
3226   Args.push_back(&LHSArg);
3227   Args.push_back(&RHSArg);
3228   FunctionType::ExtInfo EI;
3229   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
3230       C.VoidTy, Args, EI, /*isVariadic=*/false);
3231   auto *Fn = llvm::Function::Create(
3232       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
3233       ".omp.reduction.reduction_func", &CGM.getModule());
3234   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
3235   CodeGenFunction CGF(CGM);
3236   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
3237
3238   // Dst = (void*[n])(LHSArg);
3239   // Src = (void*[n])(RHSArg);
3240   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3241       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3242       ArgsType), CGF.getPointerAlign());
3243   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3244       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3245       ArgsType), CGF.getPointerAlign());
3246
3247   //  ...
3248   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
3249   //  ...
3250   CodeGenFunction::OMPPrivateScope Scope(CGF);
3251   auto IPriv = Privates.begin();
3252   unsigned Idx = 0;
3253   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
3254     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
3255     Scope.addPrivate(RHSVar, [&]() -> Address {
3256       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
3257     });
3258     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
3259     Scope.addPrivate(LHSVar, [&]() -> Address {
3260       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
3261     });
3262     QualType PrivTy = (*IPriv)->getType();
3263     if (PrivTy->isArrayType()) {
3264       // Get array size and emit VLA type.
3265       ++Idx;
3266       Address Elem =
3267           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
3268       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
3269       CodeGenFunction::OpaqueValueMapping OpaqueMap(
3270           CGF,
3271           cast<OpaqueValueExpr>(
3272               CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
3273           RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
3274       CGF.EmitVariablyModifiedType(PrivTy);
3275     }
3276   }
3277   Scope.Privatize();
3278   IPriv = Privates.begin();
3279   auto ILHS = LHSExprs.begin();
3280   auto IRHS = RHSExprs.begin();
3281   for (auto *E : ReductionOps) {
3282     if ((*IPriv)->getType()->isArrayType()) {
3283       // Emit reduction for array section.
3284       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3285       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3286       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3287                                 [=](CodeGenFunction &CGF, const Expr *,
3288                                     const Expr *,
3289                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
3290     } else
3291       // Emit reduction for array subscript or single variable.
3292       CGF.EmitIgnoredExpr(E);
3293     ++IPriv, ++ILHS, ++IRHS;
3294   }
3295   Scope.ForceCleanup();
3296   CGF.FinishFunction();
3297   return Fn;
3298 }
3299
3300 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
3301                                     ArrayRef<const Expr *> Privates,
3302                                     ArrayRef<const Expr *> LHSExprs,
3303                                     ArrayRef<const Expr *> RHSExprs,
3304                                     ArrayRef<const Expr *> ReductionOps,
3305                                     bool WithNowait, bool SimpleReduction) {
3306   if (!CGF.HaveInsertPoint())
3307     return;
3308   // Next code should be emitted for reduction:
3309   //
3310   // static kmp_critical_name lock = { 0 };
3311   //
3312   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
3313   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
3314   //  ...
3315   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
3316   //  *(Type<n>-1*)rhs[<n>-1]);
3317   // }
3318   //
3319   // ...
3320   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
3321   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3322   // RedList, reduce_func, &<lock>)) {
3323   // case 1:
3324   //  ...
3325   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3326   //  ...
3327   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3328   // break;
3329   // case 2:
3330   //  ...
3331   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3332   //  ...
3333   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
3334   // break;
3335   // default:;
3336   // }
3337   //
3338   // if SimpleReduction is true, only the next code is generated:
3339   //  ...
3340   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3341   //  ...
3342
3343   auto &C = CGM.getContext();
3344
3345   if (SimpleReduction) {
3346     CodeGenFunction::RunCleanupsScope Scope(CGF);
3347     auto IPriv = Privates.begin();
3348     auto ILHS = LHSExprs.begin();
3349     auto IRHS = RHSExprs.begin();
3350     for (auto *E : ReductionOps) {
3351       if ((*IPriv)->getType()->isArrayType()) {
3352         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3353         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3354         EmitOMPAggregateReduction(
3355             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3356             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
3357                 const Expr *) { CGF.EmitIgnoredExpr(E); });
3358       } else
3359         CGF.EmitIgnoredExpr(E);
3360       ++IPriv, ++ILHS, ++IRHS;
3361     }
3362     return;
3363   }
3364
3365   // 1. Build a list of reduction variables.
3366   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
3367   auto Size = RHSExprs.size();
3368   for (auto *E : Privates) {
3369     if (E->getType()->isArrayType())
3370       // Reserve place for array size.
3371       ++Size;
3372   }
3373   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
3374   QualType ReductionArrayTy =
3375       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3376                              /*IndexTypeQuals=*/0);
3377   Address ReductionList =
3378       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
3379   auto IPriv = Privates.begin();
3380   unsigned Idx = 0;
3381   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
3382     Address Elem =
3383       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
3384     CGF.Builder.CreateStore(
3385         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3386             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
3387         Elem);
3388     if ((*IPriv)->getType()->isArrayType()) {
3389       // Store array size.
3390       ++Idx;
3391       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
3392                                              CGF.getPointerSize());
3393       CGF.Builder.CreateStore(
3394           CGF.Builder.CreateIntToPtr(
3395               CGF.Builder.CreateIntCast(
3396                   CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
3397                                      (*IPriv)->getType()))
3398                       .first,
3399                   CGF.SizeTy, /*isSigned=*/false),
3400               CGF.VoidPtrTy),
3401           Elem);
3402     }
3403   }
3404
3405   // 2. Emit reduce_func().
3406   auto *ReductionFn = emitReductionFunction(
3407       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
3408       LHSExprs, RHSExprs, ReductionOps);
3409
3410   // 3. Create static kmp_critical_name lock = { 0 };
3411   auto *Lock = getCriticalRegionLock(".reduction");
3412
3413   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3414   // RedList, reduce_func, &<lock>);
3415   auto *IdentTLoc = emitUpdateLocation(
3416       CGF, Loc,
3417       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
3418   auto *ThreadId = getThreadID(CGF, Loc);
3419   auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
3420   auto *RL =
3421     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
3422                                                     CGF.VoidPtrTy);
3423   llvm::Value *Args[] = {
3424       IdentTLoc,                             // ident_t *<loc>
3425       ThreadId,                              // i32 <gtid>
3426       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
3427       ReductionArrayTySize,                  // size_type sizeof(RedList)
3428       RL,                                    // void *RedList
3429       ReductionFn, // void (*) (void *, void *) <reduce_func>
3430       Lock         // kmp_critical_name *&<lock>
3431   };
3432   auto Res = CGF.EmitRuntimeCall(
3433       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
3434                                        : OMPRTL__kmpc_reduce),
3435       Args);
3436
3437   // 5. Build switch(res)
3438   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
3439   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
3440
3441   // 6. Build case 1:
3442   //  ...
3443   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3444   //  ...
3445   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3446   // break;
3447   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
3448   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
3449   CGF.EmitBlock(Case1BB);
3450
3451   {
3452     CodeGenFunction::RunCleanupsScope Scope(CGF);
3453     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3454     llvm::Value *EndArgs[] = {
3455         IdentTLoc, // ident_t *<loc>
3456         ThreadId,  // i32 <gtid>
3457         Lock       // kmp_critical_name *&<lock>
3458     };
3459     CGF.EHStack
3460         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3461             NormalAndEHCleanup,
3462             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
3463                                              : OMPRTL__kmpc_end_reduce),
3464             llvm::makeArrayRef(EndArgs));
3465     auto IPriv = Privates.begin();
3466     auto ILHS = LHSExprs.begin();
3467     auto IRHS = RHSExprs.begin();
3468     for (auto *E : ReductionOps) {
3469       if ((*IPriv)->getType()->isArrayType()) {
3470         // Emit reduction for array section.
3471         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3472         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3473         EmitOMPAggregateReduction(
3474             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3475             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
3476                 const Expr *) { CGF.EmitIgnoredExpr(E); });
3477       } else
3478         // Emit reduction for array subscript or single variable.
3479         CGF.EmitIgnoredExpr(E);
3480       ++IPriv, ++ILHS, ++IRHS;
3481     }
3482   }
3483
3484   CGF.EmitBranch(DefaultBB);
3485
3486   // 7. Build case 2:
3487   //  ...
3488   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3489   //  ...
3490   // break;
3491   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
3492   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
3493   CGF.EmitBlock(Case2BB);
3494
3495   {
3496     CodeGenFunction::RunCleanupsScope Scope(CGF);
3497     if (!WithNowait) {
3498       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
3499       llvm::Value *EndArgs[] = {
3500           IdentTLoc, // ident_t *<loc>
3501           ThreadId,  // i32 <gtid>
3502           Lock       // kmp_critical_name *&<lock>
3503       };
3504       CGF.EHStack
3505           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3506               NormalAndEHCleanup,
3507               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
3508               llvm::makeArrayRef(EndArgs));
3509     }
3510     auto ILHS = LHSExprs.begin();
3511     auto IRHS = RHSExprs.begin();
3512     auto IPriv = Privates.begin();
3513     for (auto *E : ReductionOps) {
3514         const Expr *XExpr = nullptr;
3515         const Expr *EExpr = nullptr;
3516         const Expr *UpExpr = nullptr;
3517         BinaryOperatorKind BO = BO_Comma;
3518         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
3519           if (BO->getOpcode() == BO_Assign) {
3520             XExpr = BO->getLHS();
3521             UpExpr = BO->getRHS();
3522           }
3523         }
3524         // Try to emit update expression as a simple atomic.
3525         auto *RHSExpr = UpExpr;
3526         if (RHSExpr) {
3527           // Analyze RHS part of the whole expression.
3528           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
3529                   RHSExpr->IgnoreParenImpCasts())) {
3530             // If this is a conditional operator, analyze its condition for
3531             // min/max reduction operator.
3532             RHSExpr = ACO->getCond();
3533           }
3534           if (auto *BORHS =
3535                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
3536             EExpr = BORHS->getRHS();
3537             BO = BORHS->getOpcode();
3538           }
3539         }
3540         if (XExpr) {
3541           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3542           auto &&AtomicRedGen = [this, BO, VD, IPriv,
3543                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
3544                                       const Expr *EExpr, const Expr *UpExpr) {
3545             LValue X = CGF.EmitLValue(XExpr);
3546             RValue E;
3547             if (EExpr)
3548               E = CGF.EmitAnyExpr(EExpr);
3549             CGF.EmitOMPAtomicSimpleUpdateExpr(
3550                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
3551                 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
3552                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3553                   PrivateScope.addPrivate(
3554                       VD, [&CGF, VD, XRValue, Loc]() -> Address {
3555                         Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3556                         CGF.emitOMPSimpleStore(
3557                             CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
3558                             VD->getType().getNonReferenceType(), Loc);
3559                         return LHSTemp;
3560                       });
3561                   (void)PrivateScope.Privatize();
3562                   return CGF.EmitAnyExpr(UpExpr);
3563                 });
3564           };
3565           if ((*IPriv)->getType()->isArrayType()) {
3566             // Emit atomic reduction for array section.
3567             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3568             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3569                                       AtomicRedGen, XExpr, EExpr, UpExpr);
3570           } else
3571             // Emit atomic reduction for array subscript or single variable.
3572             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3573         } else {
3574           // Emit as a critical region.
3575           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
3576                                              const Expr *, const Expr *) {
3577             emitCriticalRegion(
3578                 CGF, ".atomic_reduction",
3579                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
3580           };
3581           if ((*IPriv)->getType()->isArrayType()) {
3582             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3583             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3584             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3585                                       CritRedGen);
3586           } else
3587             CritRedGen(CGF, nullptr, nullptr, nullptr);
3588         }
3589       ++ILHS, ++IRHS, ++IPriv;
3590     }
3591   }
3592
3593   CGF.EmitBranch(DefaultBB);
3594   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
3595 }
3596
3597 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
3598                                        SourceLocation Loc) {
3599   if (!CGF.HaveInsertPoint())
3600     return;
3601   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
3602   // global_tid);
3603   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3604   // Ignore return result until untied tasks are supported.
3605   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
3606 }
3607
3608 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
3609                                            OpenMPDirectiveKind InnerKind,
3610                                            const RegionCodeGenTy &CodeGen,
3611                                            bool HasCancel) {
3612   if (!CGF.HaveInsertPoint())
3613     return;
3614   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
3615   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
3616 }
3617
3618 namespace {
3619 enum RTCancelKind {
3620   CancelNoreq = 0,
3621   CancelParallel = 1,
3622   CancelLoop = 2,
3623   CancelSections = 3,
3624   CancelTaskgroup = 4
3625 };
3626 }
3627
3628 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
3629   RTCancelKind CancelKind = CancelNoreq;
3630   if (CancelRegion == OMPD_parallel)
3631     CancelKind = CancelParallel;
3632   else if (CancelRegion == OMPD_for)
3633     CancelKind = CancelLoop;
3634   else if (CancelRegion == OMPD_sections)
3635     CancelKind = CancelSections;
3636   else {
3637     assert(CancelRegion == OMPD_taskgroup);
3638     CancelKind = CancelTaskgroup;
3639   }
3640   return CancelKind;
3641 }
3642
3643 void CGOpenMPRuntime::emitCancellationPointCall(
3644     CodeGenFunction &CGF, SourceLocation Loc,
3645     OpenMPDirectiveKind CancelRegion) {
3646   if (!CGF.HaveInsertPoint())
3647     return;
3648   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
3649   // global_tid, kmp_int32 cncl_kind);
3650   if (auto *OMPRegionInfo =
3651           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3652     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3653       return;
3654     if (OMPRegionInfo->hasCancel()) {
3655       llvm::Value *Args[] = {
3656           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3657           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3658       // Ignore return result until untied tasks are supported.
3659       auto *Result = CGF.EmitRuntimeCall(
3660           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
3661       // if (__kmpc_cancellationpoint()) {
3662       //  __kmpc_cancel_barrier();
3663       //   exit from construct;
3664       // }
3665       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3666       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3667       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3668       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3669       CGF.EmitBlock(ExitBB);
3670       // __kmpc_cancel_barrier();
3671       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3672       // exit from construct;
3673       auto CancelDest =
3674           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3675       CGF.EmitBranchThroughCleanup(CancelDest);
3676       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3677     }
3678   }
3679 }
3680
3681 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
3682                                      const Expr *IfCond,
3683                                      OpenMPDirectiveKind CancelRegion) {
3684   if (!CGF.HaveInsertPoint())
3685     return;
3686   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
3687   // kmp_int32 cncl_kind);
3688   if (auto *OMPRegionInfo =
3689           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3690     if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
3691       return;
3692     auto &&ThenGen = [this, Loc, CancelRegion,
3693                       OMPRegionInfo](CodeGenFunction &CGF) {
3694       llvm::Value *Args[] = {
3695           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3696           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3697       // Ignore return result until untied tasks are supported.
3698       auto *Result =
3699           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
3700       // if (__kmpc_cancel()) {
3701       //  __kmpc_cancel_barrier();
3702       //   exit from construct;
3703       // }
3704       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3705       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3706       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3707       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3708       CGF.EmitBlock(ExitBB);
3709       // __kmpc_cancel_barrier();
3710       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3711       // exit from construct;
3712       auto CancelDest =
3713           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3714       CGF.EmitBranchThroughCleanup(CancelDest);
3715       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3716     };
3717     if (IfCond)
3718       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
3719     else
3720       ThenGen(CGF);
3721   }
3722 }
3723
3724 /// \brief Obtain information that uniquely identifies a target entry. This
3725 /// consists of the file and device IDs as well as line and column numbers
3726 /// associated with the relevant entry source location.
3727 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
3728                                      unsigned &DeviceID, unsigned &FileID,
3729                                      unsigned &LineNum, unsigned &ColumnNum) {
3730
3731   auto &SM = C.getSourceManager();
3732
3733   // The loc should be always valid and have a file ID (the user cannot use
3734   // #pragma directives in macros)
3735
3736   assert(Loc.isValid() && "Source location is expected to be always valid.");
3737   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
3738
3739   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
3740   assert(PLoc.isValid() && "Source location is expected to be always valid.");
3741
3742   llvm::sys::fs::UniqueID ID;
3743   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
3744     llvm_unreachable("Source file with target region no longer exists!");
3745
3746   DeviceID = ID.getDevice();
3747   FileID = ID.getFile();
3748   LineNum = PLoc.getLine();
3749   ColumnNum = PLoc.getColumn();
3750   return;
3751 }
3752
3753 void CGOpenMPRuntime::emitTargetOutlinedFunction(
3754     const OMPExecutableDirective &D, StringRef ParentName,
3755     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
3756     bool IsOffloadEntry) {
3757
3758   assert(!ParentName.empty() && "Invalid target region parent name!");
3759
3760   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3761
3762   // Emit target region as a standalone region.
3763   auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
3764     CGF.EmitStmt(CS.getCapturedStmt());
3765   };
3766
3767   // Create a unique name for the proxy/entry function that using the source
3768   // location information of the current target region. The name will be
3769   // something like:
3770   //
3771   // .omp_offloading.DD_FFFF.PP.lBB.cCC
3772   //
3773   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
3774   // mangled name of the function that encloses the target region, BB is the
3775   // line number of the target region, and CC is the column number of the target
3776   // region.
3777
3778   unsigned DeviceID;
3779   unsigned FileID;
3780   unsigned Line;
3781   unsigned Column;
3782   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
3783                            Line, Column);
3784   SmallString<64> EntryFnName;
3785   {
3786     llvm::raw_svector_ostream OS(EntryFnName);
3787     OS << ".omp_offloading" << llvm::format(".%x", DeviceID)
3788        << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c"
3789        << Column;
3790   }
3791
3792   CodeGenFunction CGF(CGM, true);
3793   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
3794   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3795
3796   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
3797
3798   // If this target outline function is not an offload entry, we don't need to
3799   // register it.
3800   if (!IsOffloadEntry)
3801     return;
3802
3803   // The target region ID is used by the runtime library to identify the current
3804   // target region, so it only has to be unique and not necessarily point to
3805   // anything. It could be the pointer to the outlined function that implements
3806   // the target region, but we aren't using that so that the compiler doesn't
3807   // need to keep that, and could therefore inline the host function if proven
3808   // worthwhile during optimization. In the other hand, if emitting code for the
3809   // device, the ID has to be the function address so that it can retrieved from
3810   // the offloading entry and launched by the runtime library. We also mark the
3811   // outlined function to have external linkage in case we are emitting code for
3812   // the device, because these functions will be entry points to the device.
3813
3814   if (CGM.getLangOpts().OpenMPIsDevice) {
3815     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
3816     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
3817   } else
3818     OutlinedFnID = new llvm::GlobalVariable(
3819         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3820         llvm::GlobalValue::PrivateLinkage,
3821         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
3822
3823   // Register the information for the entry associated with this target region.
3824   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3825       DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
3826   return;
3827 }
3828
3829 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3830                                      const OMPExecutableDirective &D,
3831                                      llvm::Value *OutlinedFn,
3832                                      llvm::Value *OutlinedFnID,
3833                                      const Expr *IfCond, const Expr *Device,
3834                                      ArrayRef<llvm::Value *> CapturedVars) {
3835   if (!CGF.HaveInsertPoint())
3836     return;
3837   /// \brief Values for bit flags used to specify the mapping type for
3838   /// offloading.
3839   enum OpenMPOffloadMappingFlags {
3840     /// \brief Allocate memory on the device and move data from host to device.
3841     OMP_MAP_TO = 0x01,
3842     /// \brief Allocate memory on the device and move data from device to host.
3843     OMP_MAP_FROM = 0x02,
3844     /// \brief The element passed to the device is a pointer.
3845     OMP_MAP_PTR = 0x20,
3846     /// \brief Pass the element to the device by value.
3847     OMP_MAP_BYCOPY = 0x80,
3848   };
3849
3850   enum OpenMPOffloadingReservedDeviceIDs {
3851     /// \brief Device ID if the device was not defined, runtime should get it
3852     /// from environment variables in the spec.
3853     OMP_DEVICEID_UNDEF = -1,
3854   };
3855
3856   assert(OutlinedFn && "Invalid outlined function!");
3857
3858   auto &Ctx = CGF.getContext();
3859
3860   // Fill up the arrays with the all the captured variables.
3861   SmallVector<llvm::Value *, 16> BasePointers;
3862   SmallVector<llvm::Value *, 16> Pointers;
3863   SmallVector<llvm::Value *, 16> Sizes;
3864   SmallVector<unsigned, 16> MapTypes;
3865
3866   bool hasVLACaptures = false;
3867
3868   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3869   auto RI = CS.getCapturedRecordDecl()->field_begin();
3870   // auto II = CS.capture_init_begin();
3871   auto CV = CapturedVars.begin();
3872   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3873                                             CE = CS.capture_end();
3874        CI != CE; ++CI, ++RI, ++CV) {
3875     StringRef Name;
3876     QualType Ty;
3877     llvm::Value *BasePointer;
3878     llvm::Value *Pointer;
3879     llvm::Value *Size;
3880     unsigned MapType;
3881
3882     // VLA sizes are passed to the outlined region by copy.
3883     if (CI->capturesVariableArrayType()) {
3884       BasePointer = Pointer = *CV;
3885       Size = getTypeSize(CGF, RI->getType());
3886       // Copy to the device as an argument. No need to retrieve it.
3887       MapType = OMP_MAP_BYCOPY;
3888       hasVLACaptures = true;
3889     } else if (CI->capturesThis()) {
3890       BasePointer = Pointer = *CV;
3891       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3892       Size = getTypeSize(CGF, PtrTy->getPointeeType());
3893       // Default map type.
3894       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3895     } else if (CI->capturesVariableByCopy()) {
3896       MapType = OMP_MAP_BYCOPY;
3897       if (!RI->getType()->isAnyPointerType()) {
3898         // If the field is not a pointer, we need to save the actual value and
3899         // load it as a void pointer.
3900         auto DstAddr = CGF.CreateMemTemp(
3901             Ctx.getUIntPtrType(),
3902             Twine(CI->getCapturedVar()->getName()) + ".casted");
3903         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
3904
3905         auto *SrcAddrVal = CGF.EmitScalarConversion(
3906             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
3907             Ctx.getPointerType(RI->getType()), SourceLocation());
3908         LValue SrcLV =
3909             CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
3910
3911         // Store the value using the source type pointer.
3912         CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
3913
3914         // Load the value using the destination type pointer.
3915         BasePointer = Pointer =
3916             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
3917       } else {
3918         MapType |= OMP_MAP_PTR;
3919         BasePointer = Pointer = *CV;
3920       }
3921       Size = getTypeSize(CGF, RI->getType());
3922     } else {
3923       assert(CI->capturesVariable() && "Expected captured reference.");
3924       BasePointer = Pointer = *CV;
3925
3926       const ReferenceType *PtrTy =
3927           cast<ReferenceType>(RI->getType().getTypePtr());
3928       QualType ElementType = PtrTy->getPointeeType();
3929       Size = getTypeSize(CGF, ElementType);
3930       // The default map type for a scalar/complex type is 'to' because by
3931       // default the value doesn't have to be retrieved. For an aggregate type,
3932       // the default is 'tofrom'.
3933       MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
3934                                                : OMP_MAP_TO;
3935       if (ElementType->isAnyPointerType())
3936         MapType |= OMP_MAP_PTR;
3937     }
3938
3939     BasePointers.push_back(BasePointer);
3940     Pointers.push_back(Pointer);
3941     Sizes.push_back(Size);
3942     MapTypes.push_back(MapType);
3943   }
3944
3945   // Keep track on whether the host function has to be executed.
3946   auto OffloadErrorQType =
3947       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3948   auto OffloadError = CGF.MakeAddrLValue(
3949       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3950       OffloadErrorQType);
3951   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3952                         OffloadError);
3953
3954   // Fill up the pointer arrays and transfer execution to the device.
3955   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
3956                     hasVLACaptures, Device, OutlinedFnID, OffloadError,
3957                     OffloadErrorQType](CodeGenFunction &CGF) {
3958     unsigned PointerNumVal = BasePointers.size();
3959     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3960     llvm::Value *BasePointersArray;
3961     llvm::Value *PointersArray;
3962     llvm::Value *SizesArray;
3963     llvm::Value *MapTypesArray;
3964
3965     if (PointerNumVal) {
3966       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3967       QualType PointerArrayType = Ctx.getConstantArrayType(
3968           Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
3969           /*IndexTypeQuals=*/0);
3970
3971       BasePointersArray =
3972           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3973       PointersArray =
3974           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3975
3976       // If we don't have any VLA types, we can use a constant array for the map
3977       // sizes, otherwise we need to fill up the arrays as we do for the
3978       // pointers.
3979       if (hasVLACaptures) {
3980         QualType SizeArrayType = Ctx.getConstantArrayType(
3981             Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
3982             /*IndexTypeQuals=*/0);
3983         SizesArray =
3984             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3985       } else {
3986         // We expect all the sizes to be constant, so we collect them to create
3987         // a constant array.
3988         SmallVector<llvm::Constant *, 16> ConstSizes;
3989         for (auto S : Sizes)
3990           ConstSizes.push_back(cast<llvm::Constant>(S));
3991
3992         auto *SizesArrayInit = llvm::ConstantArray::get(
3993             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3994         auto *SizesArrayGbl = new llvm::GlobalVariable(
3995             CGM.getModule(), SizesArrayInit->getType(),
3996             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3997             SizesArrayInit, ".offload_sizes");
3998         SizesArrayGbl->setUnnamedAddr(true);
3999         SizesArray = SizesArrayGbl;
4000       }
4001
4002       // The map types are always constant so we don't need to generate code to
4003       // fill arrays. Instead, we create an array constant.
4004       llvm::Constant *MapTypesArrayInit =
4005           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
4006       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
4007           CGM.getModule(), MapTypesArrayInit->getType(),
4008           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
4009           MapTypesArrayInit, ".offload_maptypes");
4010       MapTypesArrayGbl->setUnnamedAddr(true);
4011       MapTypesArray = MapTypesArrayGbl;
4012
4013       for (unsigned i = 0; i < PointerNumVal; ++i) {
4014
4015         llvm::Value *BPVal = BasePointers[i];
4016         if (BPVal->getType()->isPointerTy())
4017           BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
4018         else {
4019           assert(BPVal->getType()->isIntegerTy() &&
4020                  "If not a pointer, the value type must be an integer.");
4021           BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
4022         }
4023         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
4024             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
4025             BasePointersArray, 0, i);
4026         Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
4027         CGF.Builder.CreateStore(BPVal, BPAddr);
4028
4029         llvm::Value *PVal = Pointers[i];
4030         if (PVal->getType()->isPointerTy())
4031           PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
4032         else {
4033           assert(PVal->getType()->isIntegerTy() &&
4034                  "If not a pointer, the value type must be an integer.");
4035           PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
4036         }
4037         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
4038             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
4039             0, i);
4040         Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
4041         CGF.Builder.CreateStore(PVal, PAddr);
4042
4043         if (hasVLACaptures) {
4044           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
4045               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
4046               /*Idx0=*/0,
4047               /*Idx1=*/i);
4048           Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
4049           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
4050                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
4051                                   SAddr);
4052         }
4053       }
4054
4055       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4056           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
4057           /*Idx0=*/0, /*Idx1=*/0);
4058       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4059           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
4060           /*Idx0=*/0,
4061           /*Idx1=*/0);
4062       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4063           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
4064           /*Idx0=*/0, /*Idx1=*/0);
4065       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4066           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
4067           /*Idx0=*/0,
4068           /*Idx1=*/0);
4069
4070     } else {
4071       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
4072       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
4073       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
4074       MapTypesArray =
4075           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
4076     }
4077
4078     // On top of the arrays that were filled up, the target offloading call
4079     // takes as arguments the device id as well as the host pointer. The host
4080     // pointer is used by the runtime library to identify the current target
4081     // region, so it only has to be unique and not necessarily point to
4082     // anything. It could be the pointer to the outlined function that
4083     // implements the target region, but we aren't using that so that the
4084     // compiler doesn't need to keep that, and could therefore inline the host
4085     // function if proven worthwhile during optimization.
4086
4087     // From this point on, we need to have an ID of the target region defined.
4088     assert(OutlinedFnID && "Invalid outlined function ID!");
4089
4090     // Emit device ID if any.
4091     llvm::Value *DeviceID;
4092     if (Device)
4093       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4094                                            CGM.Int32Ty, /*isSigned=*/true);
4095     else
4096       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
4097
4098     llvm::Value *OffloadingArgs[] = {
4099         DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
4100         PointersArray, SizesArray,   MapTypesArray};
4101     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
4102                                       OffloadingArgs);
4103
4104     CGF.EmitStoreOfScalar(Return, OffloadError);
4105   };
4106
4107   // Notify that the host version must be executed.
4108   auto &&ElseGen = [this, OffloadError,
4109                     OffloadErrorQType](CodeGenFunction &CGF) {
4110     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
4111                           OffloadError);
4112   };
4113
4114   // If we have a target function ID it means that we need to support
4115   // offloading, otherwise, just execute on the host. We need to execute on host
4116   // regardless of the conditional in the if clause if, e.g., the user do not
4117   // specify target triples.
4118   if (OutlinedFnID) {
4119     if (IfCond) {
4120       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
4121     } else {
4122       CodeGenFunction::RunCleanupsScope Scope(CGF);
4123       ThenGen(CGF);
4124     }
4125   } else {
4126     CodeGenFunction::RunCleanupsScope Scope(CGF);
4127     ElseGen(CGF);
4128   }
4129
4130   // Check the error code and execute the host version if required.
4131   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
4132   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
4133   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
4134   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
4135   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
4136
4137   CGF.EmitBlock(OffloadFailedBlock);
4138   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
4139   CGF.EmitBranch(OffloadContBlock);
4140
4141   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
4142   return;
4143 }
4144
4145 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
4146                                                     StringRef ParentName) {
4147   if (!S)
4148     return;
4149
4150   // If we find a OMP target directive, codegen the outline function and
4151   // register the result.
4152   // FIXME: Add other directives with target when they become supported.
4153   bool isTargetDirective = isa<OMPTargetDirective>(S);
4154
4155   if (isTargetDirective) {
4156     auto *E = cast<OMPExecutableDirective>(S);
4157     unsigned DeviceID;
4158     unsigned FileID;
4159     unsigned Line;
4160     unsigned Column;
4161     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
4162                              FileID, Line, Column);
4163
4164     // Is this a target region that should not be emitted as an entry point? If
4165     // so just signal we are done with this target region.
4166     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(
4167             DeviceID, FileID, ParentName, Line, Column))
4168       return;
4169
4170     llvm::Function *Fn;
4171     llvm::Constant *Addr;
4172     emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
4173                                /*isOffloadEntry=*/true);
4174     assert(Fn && Addr && "Target region emission failed.");
4175     return;
4176   }
4177
4178   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
4179     if (!E->getAssociatedStmt())
4180       return;
4181
4182     scanForTargetRegionsFunctions(
4183         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
4184         ParentName);
4185     return;
4186   }
4187
4188   // If this is a lambda function, look into its body.
4189   if (auto *L = dyn_cast<LambdaExpr>(S))
4190     S = L->getBody();
4191
4192   // Keep looking for target regions recursively.
4193   for (auto *II : S->children())
4194     scanForTargetRegionsFunctions(II, ParentName);
4195
4196   return;
4197 }
4198
4199 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
4200   auto &FD = *cast<FunctionDecl>(GD.getDecl());
4201
4202   // If emitting code for the host, we do not process FD here. Instead we do
4203   // the normal code generation.
4204   if (!CGM.getLangOpts().OpenMPIsDevice)
4205     return false;
4206
4207   // Try to detect target regions in the function.
4208   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
4209
4210   // We should not emit any function othen that the ones created during the
4211   // scanning. Therefore, we signal that this function is completely dealt
4212   // with.
4213   return true;
4214 }
4215
4216 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
4217   if (!CGM.getLangOpts().OpenMPIsDevice)
4218     return false;
4219
4220   // Check if there are Ctors/Dtors in this declaration and look for target
4221   // regions in it. We use the complete variant to produce the kernel name
4222   // mangling.
4223   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
4224   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
4225     for (auto *Ctor : RD->ctors()) {
4226       StringRef ParentName =
4227           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
4228       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
4229     }
4230     auto *Dtor = RD->getDestructor();
4231     if (Dtor) {
4232       StringRef ParentName =
4233           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
4234       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
4235     }
4236   }
4237
4238   // If we are in target mode we do not emit any global (declare target is not
4239   // implemented yet). Therefore we signal that GD was processed in this case.
4240   return true;
4241 }
4242
4243 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
4244   auto *VD = GD.getDecl();
4245   if (isa<FunctionDecl>(VD))
4246     return emitTargetFunctions(GD);
4247
4248   return emitTargetGlobalVariable(GD);
4249 }
4250
4251 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
4252   // If we have offloading in the current module, we need to emit the entries
4253   // now and register the offloading descriptor.
4254   createOffloadEntriesAndInfoMetadata();
4255
4256   // Create and register the offloading binary descriptors. This is the main
4257   // entity that captures all the information about offloading in the current
4258   // compilation unit.
4259   return createOffloadingBinaryDescriptorRegistration();
4260 }