]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
MFV r299425:
[FreeBSD/FreeBSD.git] / contrib / llvm / tools / clang / lib / CodeGen / CGOpenMPRuntime.cpp
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Decl.h"
19 #include "clang/AST/StmtOpenMP.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/Bitcode/ReaderWriter.h"
22 #include "llvm/IR/CallSite.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/Value.h"
26 #include "llvm/Support/Format.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <cassert>
29
30 using namespace clang;
31 using namespace CodeGen;
32
33 namespace {
34 /// \brief Base class for handling code generation inside OpenMP regions.
35 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
36 public:
37   /// \brief Kinds of OpenMP regions used in codegen.
38   enum CGOpenMPRegionKind {
39     /// \brief Region with outlined function for standalone 'parallel'
40     /// directive.
41     ParallelOutlinedRegion,
42     /// \brief Region with outlined function for standalone 'task' directive.
43     TaskOutlinedRegion,
44     /// \brief Region for constructs that do not require function outlining,
45     /// like 'for', 'sections', 'atomic' etc. directives.
46     InlinedRegion,
47     /// \brief Region with outlined function for standalone 'target' directive.
48     TargetRegion,
49   };
50
51   CGOpenMPRegionInfo(const CapturedStmt &CS,
52                      const CGOpenMPRegionKind RegionKind,
53                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
54                      bool HasCancel)
55       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
56         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
57
58   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
59                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
60                      bool HasCancel)
61       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
62         Kind(Kind), HasCancel(HasCancel) {}
63
64   /// \brief Get a variable or parameter for storing global thread id
65   /// inside OpenMP construct.
66   virtual const VarDecl *getThreadIDVariable() const = 0;
67
68   /// \brief Emit the captured statement body.
69   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
70
71   /// \brief Get an LValue for the current ThreadID variable.
72   /// \return LValue for thread id variable. This LValue always has type int32*.
73   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
74
75   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
76
77   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
78
79   bool hasCancel() const { return HasCancel; }
80
81   static bool classof(const CGCapturedStmtInfo *Info) {
82     return Info->getKind() == CR_OpenMP;
83   }
84
85 protected:
86   CGOpenMPRegionKind RegionKind;
87   RegionCodeGenTy CodeGen;
88   OpenMPDirectiveKind Kind;
89   bool HasCancel;
90 };
91
92 /// \brief API for captured statement code generation in OpenMP constructs.
93 class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
94 public:
95   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
96                              const RegionCodeGenTy &CodeGen,
97                              OpenMPDirectiveKind Kind, bool HasCancel)
98       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
99                            HasCancel),
100         ThreadIDVar(ThreadIDVar) {
101     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
102   }
103   /// \brief Get a variable or parameter for storing global thread id
104   /// inside OpenMP construct.
105   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
106
107   /// \brief Get the name of the capture helper.
108   StringRef getHelperName() const override { return ".omp_outlined."; }
109
110   static bool classof(const CGCapturedStmtInfo *Info) {
111     return CGOpenMPRegionInfo::classof(Info) &&
112            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
113                ParallelOutlinedRegion;
114   }
115
116 private:
117   /// \brief A variable or parameter storing global thread id for OpenMP
118   /// constructs.
119   const VarDecl *ThreadIDVar;
120 };
121
122 /// \brief API for captured statement code generation in OpenMP constructs.
123 class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
124 public:
125   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
126                                  const VarDecl *ThreadIDVar,
127                                  const RegionCodeGenTy &CodeGen,
128                                  OpenMPDirectiveKind Kind, bool HasCancel)
129       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
130         ThreadIDVar(ThreadIDVar) {
131     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
132   }
133   /// \brief Get a variable or parameter for storing global thread id
134   /// inside OpenMP construct.
135   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
136
137   /// \brief Get an LValue for the current ThreadID variable.
138   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
139
140   /// \brief Get the name of the capture helper.
141   StringRef getHelperName() const override { return ".omp_outlined."; }
142
143   static bool classof(const CGCapturedStmtInfo *Info) {
144     return CGOpenMPRegionInfo::classof(Info) &&
145            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
146                TaskOutlinedRegion;
147   }
148
149 private:
150   /// \brief A variable or parameter storing global thread id for OpenMP
151   /// constructs.
152   const VarDecl *ThreadIDVar;
153 };
154
155 /// \brief API for inlined captured statement code generation in OpenMP
156 /// constructs.
157 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
158 public:
159   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
160                             const RegionCodeGenTy &CodeGen,
161                             OpenMPDirectiveKind Kind, bool HasCancel)
162       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
163         OldCSI(OldCSI),
164         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
165   // \brief Retrieve the value of the context parameter.
166   llvm::Value *getContextValue() const override {
167     if (OuterRegionInfo)
168       return OuterRegionInfo->getContextValue();
169     llvm_unreachable("No context value for inlined OpenMP region");
170   }
171   void setContextValue(llvm::Value *V) override {
172     if (OuterRegionInfo) {
173       OuterRegionInfo->setContextValue(V);
174       return;
175     }
176     llvm_unreachable("No context value for inlined OpenMP region");
177   }
178   /// \brief Lookup the captured field decl for a variable.
179   const FieldDecl *lookup(const VarDecl *VD) const override {
180     if (OuterRegionInfo)
181       return OuterRegionInfo->lookup(VD);
182     // If there is no outer outlined region,no need to lookup in a list of
183     // captured variables, we can use the original one.
184     return nullptr;
185   }
186   FieldDecl *getThisFieldDecl() const override {
187     if (OuterRegionInfo)
188       return OuterRegionInfo->getThisFieldDecl();
189     return nullptr;
190   }
191   /// \brief Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override {
194     if (OuterRegionInfo)
195       return OuterRegionInfo->getThreadIDVariable();
196     return nullptr;
197   }
198
199   /// \brief Get the name of the capture helper.
200   StringRef getHelperName() const override {
201     if (auto *OuterRegionInfo = getOldCSI())
202       return OuterRegionInfo->getHelperName();
203     llvm_unreachable("No helper name for inlined OpenMP construct");
204   }
205
206   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
207
208   static bool classof(const CGCapturedStmtInfo *Info) {
209     return CGOpenMPRegionInfo::classof(Info) &&
210            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
211   }
212
213 private:
214   /// \brief CodeGen info about outer OpenMP region.
215   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
216   CGOpenMPRegionInfo *OuterRegionInfo;
217 };
218
219 /// \brief API for captured statement code generation in OpenMP target
220 /// constructs. For this captures, implicit parameters are used instead of the
221 /// captured fields. The name of the target region has to be unique in a given
222 /// application so it is provided by the client, because only the client has
223 /// the information to generate that.
224 class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
225 public:
226   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
227                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
228       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
229                            /*HasCancel=*/false),
230         HelperName(HelperName) {}
231
232   /// \brief This is unused for target regions because each starts executing
233   /// with a single thread.
234   const VarDecl *getThreadIDVariable() const override { return nullptr; }
235
236   /// \brief Get the name of the capture helper.
237   StringRef getHelperName() const override { return HelperName; }
238
239   static bool classof(const CGCapturedStmtInfo *Info) {
240     return CGOpenMPRegionInfo::classof(Info) &&
241            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
242   }
243
244 private:
245   StringRef HelperName;
246 };
247
248 /// \brief RAII for emitting code of OpenMP constructs.
249 class InlinedOpenMPRegionRAII {
250   CodeGenFunction &CGF;
251
252 public:
253   /// \brief Constructs region for combined constructs.
254   /// \param CodeGen Code generation sequence for combined directives. Includes
255   /// a list of functions used for code generation of implicitly inlined
256   /// regions.
257   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
258                           OpenMPDirectiveKind Kind, bool HasCancel)
259       : CGF(CGF) {
260     // Start emission for the construct.
261     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
262         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
263   }
264   ~InlinedOpenMPRegionRAII() {
265     // Restore original CapturedStmtInfo only if we're done with code emission.
266     auto *OldCSI =
267         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
268     delete CGF.CapturedStmtInfo;
269     CGF.CapturedStmtInfo = OldCSI;
270   }
271 };
272
273 } // anonymous namespace
274
275 static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
276                                       QualType Ty) {
277   AlignmentSource Source;
278   CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
279   return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
280                             Ty->getPointeeType(), Source);
281 }
282
283 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
284   return emitLoadOfPointerLValue(CGF,
285                                  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
286                                  getThreadIDVariable()->getType());
287 }
288
289 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
290   if (!CGF.HaveInsertPoint())
291     return;
292   // 1.2.2 OpenMP Language Terminology
293   // Structured block - An executable statement with a single entry at the
294   // top and a single exit at the bottom.
295   // The point of exit cannot be a branch out of the structured block.
296   // longjmp() and throw() must not violate the entry/exit criteria.
297   CGF.EHStack.pushTerminate();
298   {
299     CodeGenFunction::RunCleanupsScope Scope(CGF);
300     CodeGen(CGF);
301   }
302   CGF.EHStack.popTerminate();
303 }
304
305 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
306     CodeGenFunction &CGF) {
307   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
308                             getThreadIDVariable()->getType(),
309                             AlignmentSource::Decl);
310 }
311
312 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
313     : CGM(CGM), DefaultOpenMPPSource(nullptr), KmpRoutineEntryPtrTy(nullptr),
314       OffloadEntriesInfoManager(CGM) {
315   IdentTy = llvm::StructType::create(
316       "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
317       CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
318       CGM.Int8PtrTy /* psource */, nullptr);
319   // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
320   llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
321                                llvm::PointerType::getUnqual(CGM.Int32Ty)};
322   Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
323   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
324
325   loadOffloadInfoMetadata();
326 }
327
328 void CGOpenMPRuntime::clear() {
329   InternalVars.clear();
330 }
331
332 // Layout information for ident_t.
333 static CharUnits getIdentAlign(CodeGenModule &CGM) {
334   return CGM.getPointerAlign();
335 }
336 static CharUnits getIdentSize(CodeGenModule &CGM) {
337   assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
338   return CharUnits::fromQuantity(16) + CGM.getPointerSize();
339 }
340 static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
341   // All the fields except the last are i32, so this works beautifully.
342   return unsigned(Field) * CharUnits::fromQuantity(4);
343 }
344 static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
345                                    CGOpenMPRuntime::IdentFieldIndex Field,
346                                    const llvm::Twine &Name = "") {
347   auto Offset = getOffsetOfIdentField(Field);
348   return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
349 }
350
351 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
352     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
353     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
354   assert(ThreadIDVar->getType()->isPointerType() &&
355          "thread id variable must be of type kmp_int32 *");
356   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
357   CodeGenFunction CGF(CGM, true);
358   bool HasCancel = false;
359   if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
360     HasCancel = OPD->hasCancel();
361   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
362     HasCancel = OPSD->hasCancel();
363   else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
364     HasCancel = OPFD->hasCancel();
365   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
366                                     HasCancel);
367   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
368   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
369 }
370
371 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
372     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
373     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
374   assert(!ThreadIDVar->getType()->isPointerType() &&
375          "thread id variable must be of type kmp_int32 for tasks");
376   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
377   CodeGenFunction CGF(CGM, true);
378   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
379                                         InnermostKind,
380                                         cast<OMPTaskDirective>(D).hasCancel());
381   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
382   return CGF.GenerateCapturedStmtFunction(*CS);
383 }
384
385 Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
386   CharUnits Align = getIdentAlign(CGM);
387   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
388   if (!Entry) {
389     if (!DefaultOpenMPPSource) {
390       // Initialize default location for psource field of ident_t structure of
391       // all ident_t objects. Format is ";file;function;line;column;;".
392       // Taken from
393       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
394       DefaultOpenMPPSource =
395           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
396       DefaultOpenMPPSource =
397           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
398     }
399     auto DefaultOpenMPLocation = new llvm::GlobalVariable(
400         CGM.getModule(), IdentTy, /*isConstant*/ true,
401         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
402     DefaultOpenMPLocation->setUnnamedAddr(true);
403     DefaultOpenMPLocation->setAlignment(Align.getQuantity());
404
405     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
406     llvm::Constant *Values[] = {Zero,
407                                 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
408                                 Zero, Zero, DefaultOpenMPPSource};
409     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
410     DefaultOpenMPLocation->setInitializer(Init);
411     OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
412   }
413   return Address(Entry, Align);
414 }
415
416 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
417                                                  SourceLocation Loc,
418                                                  OpenMPLocationFlags Flags) {
419   // If no debug info is generated - return global default location.
420   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
421       Loc.isInvalid())
422     return getOrCreateDefaultLocation(Flags).getPointer();
423
424   assert(CGF.CurFn && "No function in current CodeGenFunction.");
425
426   Address LocValue = Address::invalid();
427   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
428   if (I != OpenMPLocThreadIDMap.end())
429     LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
430
431   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
432   // GetOpenMPThreadID was called before this routine.
433   if (!LocValue.isValid()) {
434     // Generate "ident_t .kmpc_loc.addr;"
435     Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
436                                       ".kmpc_loc.addr");
437     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
438     Elem.second.DebugLoc = AI.getPointer();
439     LocValue = AI;
440
441     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
442     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
443     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
444                              CGM.getSize(getIdentSize(CGF.CGM)));
445   }
446
447   // char **psource = &.kmpc_loc_<flags>.addr.psource;
448   Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
449
450   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
451   if (OMPDebugLoc == nullptr) {
452     SmallString<128> Buffer2;
453     llvm::raw_svector_ostream OS2(Buffer2);
454     // Build debug location
455     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
456     OS2 << ";" << PLoc.getFilename() << ";";
457     if (const FunctionDecl *FD =
458             dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
459       OS2 << FD->getQualifiedNameAsString();
460     }
461     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
462     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
463     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
464   }
465   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
466   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
467
468   // Our callers always pass this to a runtime function, so for
469   // convenience, go ahead and return a naked pointer.
470   return LocValue.getPointer();
471 }
472
473 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
474                                           SourceLocation Loc) {
475   assert(CGF.CurFn && "No function in current CodeGenFunction.");
476
477   llvm::Value *ThreadID = nullptr;
478   // Check whether we've already cached a load of the thread id in this
479   // function.
480   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
481   if (I != OpenMPLocThreadIDMap.end()) {
482     ThreadID = I->second.ThreadID;
483     if (ThreadID != nullptr)
484       return ThreadID;
485   }
486   if (auto *OMPRegionInfo =
487           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
488     if (OMPRegionInfo->getThreadIDVariable()) {
489       // Check if this an outlined function with thread id passed as argument.
490       auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
491       ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
492       // If value loaded in entry block, cache it and use it everywhere in
493       // function.
494       if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
495         auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
496         Elem.second.ThreadID = ThreadID;
497       }
498       return ThreadID;
499     }
500   }
501
502   // This is not an outlined function region - need to call __kmpc_int32
503   // kmpc_global_thread_num(ident_t *loc).
504   // Generate thread id value and cache this value for use across the
505   // function.
506   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
507   CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
508   ThreadID =
509       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
510                           emitUpdateLocation(CGF, Loc));
511   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
512   Elem.second.ThreadID = ThreadID;
513   return ThreadID;
514 }
515
516 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
517   assert(CGF.CurFn && "No function in current CodeGenFunction.");
518   if (OpenMPLocThreadIDMap.count(CGF.CurFn))
519     OpenMPLocThreadIDMap.erase(CGF.CurFn);
520 }
521
522 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
523   return llvm::PointerType::getUnqual(IdentTy);
524 }
525
526 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
527   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
528 }
529
530 llvm::Constant *
531 CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
532   llvm::Constant *RTLFn = nullptr;
533   switch (Function) {
534   case OMPRTL__kmpc_fork_call: {
535     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
536     // microtask, ...);
537     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
538                                 getKmpc_MicroPointerTy()};
539     llvm::FunctionType *FnTy =
540         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
541     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
542     break;
543   }
544   case OMPRTL__kmpc_global_thread_num: {
545     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
546     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
547     llvm::FunctionType *FnTy =
548         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
549     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
550     break;
551   }
552   case OMPRTL__kmpc_threadprivate_cached: {
553     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
554     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
555     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
556                                 CGM.VoidPtrTy, CGM.SizeTy,
557                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
558     llvm::FunctionType *FnTy =
559         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
560     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
561     break;
562   }
563   case OMPRTL__kmpc_critical: {
564     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
565     // kmp_critical_name *crit);
566     llvm::Type *TypeParams[] = {
567         getIdentTyPointerTy(), CGM.Int32Ty,
568         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
569     llvm::FunctionType *FnTy =
570         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
571     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
572     break;
573   }
574   case OMPRTL__kmpc_critical_with_hint: {
575     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
576     // kmp_critical_name *crit, uintptr_t hint);
577     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
578                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
579                                 CGM.IntPtrTy};
580     llvm::FunctionType *FnTy =
581         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
582     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
583     break;
584   }
585   case OMPRTL__kmpc_threadprivate_register: {
586     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
587     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
588     // typedef void *(*kmpc_ctor)(void *);
589     auto KmpcCtorTy =
590         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
591                                 /*isVarArg*/ false)->getPointerTo();
592     // typedef void *(*kmpc_cctor)(void *, void *);
593     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
594     auto KmpcCopyCtorTy =
595         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
596                                 /*isVarArg*/ false)->getPointerTo();
597     // typedef void (*kmpc_dtor)(void *);
598     auto KmpcDtorTy =
599         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
600             ->getPointerTo();
601     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
602                               KmpcCopyCtorTy, KmpcDtorTy};
603     auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
604                                         /*isVarArg*/ false);
605     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
606     break;
607   }
608   case OMPRTL__kmpc_end_critical: {
609     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
610     // kmp_critical_name *crit);
611     llvm::Type *TypeParams[] = {
612         getIdentTyPointerTy(), CGM.Int32Ty,
613         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
614     llvm::FunctionType *FnTy =
615         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
616     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
617     break;
618   }
619   case OMPRTL__kmpc_cancel_barrier: {
620     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
621     // global_tid);
622     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
623     llvm::FunctionType *FnTy =
624         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
625     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
626     break;
627   }
628   case OMPRTL__kmpc_barrier: {
629     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
630     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
631     llvm::FunctionType *FnTy =
632         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
633     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
634     break;
635   }
636   case OMPRTL__kmpc_for_static_fini: {
637     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
638     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
639     llvm::FunctionType *FnTy =
640         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
641     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
642     break;
643   }
644   case OMPRTL__kmpc_push_num_threads: {
645     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
646     // kmp_int32 num_threads)
647     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
648                                 CGM.Int32Ty};
649     llvm::FunctionType *FnTy =
650         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
651     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
652     break;
653   }
654   case OMPRTL__kmpc_serialized_parallel: {
655     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
656     // global_tid);
657     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
658     llvm::FunctionType *FnTy =
659         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
660     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
661     break;
662   }
663   case OMPRTL__kmpc_end_serialized_parallel: {
664     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
665     // global_tid);
666     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
667     llvm::FunctionType *FnTy =
668         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
669     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
670     break;
671   }
672   case OMPRTL__kmpc_flush: {
673     // Build void __kmpc_flush(ident_t *loc);
674     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
675     llvm::FunctionType *FnTy =
676         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
677     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
678     break;
679   }
680   case OMPRTL__kmpc_master: {
681     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
682     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
683     llvm::FunctionType *FnTy =
684         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
685     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
686     break;
687   }
688   case OMPRTL__kmpc_end_master: {
689     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
690     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
691     llvm::FunctionType *FnTy =
692         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
693     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
694     break;
695   }
696   case OMPRTL__kmpc_omp_taskyield: {
697     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
698     // int end_part);
699     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
700     llvm::FunctionType *FnTy =
701         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
702     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
703     break;
704   }
705   case OMPRTL__kmpc_single: {
706     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
707     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
708     llvm::FunctionType *FnTy =
709         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
710     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
711     break;
712   }
713   case OMPRTL__kmpc_end_single: {
714     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
715     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
716     llvm::FunctionType *FnTy =
717         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
718     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
719     break;
720   }
721   case OMPRTL__kmpc_omp_task_alloc: {
722     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
723     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
724     // kmp_routine_entry_t *task_entry);
725     assert(KmpRoutineEntryPtrTy != nullptr &&
726            "Type kmp_routine_entry_t must be created.");
727     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
728                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
729     // Return void * and then cast to particular kmp_task_t type.
730     llvm::FunctionType *FnTy =
731         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
732     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
733     break;
734   }
735   case OMPRTL__kmpc_omp_task: {
736     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
737     // *new_task);
738     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
739                                 CGM.VoidPtrTy};
740     llvm::FunctionType *FnTy =
741         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
742     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
743     break;
744   }
745   case OMPRTL__kmpc_copyprivate: {
746     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
747     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
748     // kmp_int32 didit);
749     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
750     auto *CpyFnTy =
751         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
752     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
753                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
754                                 CGM.Int32Ty};
755     llvm::FunctionType *FnTy =
756         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
757     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
758     break;
759   }
760   case OMPRTL__kmpc_reduce: {
761     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
762     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
763     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
764     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
765     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
766                                                /*isVarArg=*/false);
767     llvm::Type *TypeParams[] = {
768         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
769         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
770         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
771     llvm::FunctionType *FnTy =
772         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
773     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
774     break;
775   }
776   case OMPRTL__kmpc_reduce_nowait: {
777     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
778     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
779     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
780     // *lck);
781     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
782     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
783                                                /*isVarArg=*/false);
784     llvm::Type *TypeParams[] = {
785         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
786         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
787         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
788     llvm::FunctionType *FnTy =
789         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
790     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
791     break;
792   }
793   case OMPRTL__kmpc_end_reduce: {
794     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
795     // kmp_critical_name *lck);
796     llvm::Type *TypeParams[] = {
797         getIdentTyPointerTy(), CGM.Int32Ty,
798         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
799     llvm::FunctionType *FnTy =
800         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
801     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
802     break;
803   }
804   case OMPRTL__kmpc_end_reduce_nowait: {
805     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
806     // kmp_critical_name *lck);
807     llvm::Type *TypeParams[] = {
808         getIdentTyPointerTy(), CGM.Int32Ty,
809         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
810     llvm::FunctionType *FnTy =
811         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
812     RTLFn =
813         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
814     break;
815   }
816   case OMPRTL__kmpc_omp_task_begin_if0: {
817     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
818     // *new_task);
819     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
820                                 CGM.VoidPtrTy};
821     llvm::FunctionType *FnTy =
822         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
823     RTLFn =
824         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
825     break;
826   }
827   case OMPRTL__kmpc_omp_task_complete_if0: {
828     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
829     // *new_task);
830     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
831                                 CGM.VoidPtrTy};
832     llvm::FunctionType *FnTy =
833         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
834     RTLFn = CGM.CreateRuntimeFunction(FnTy,
835                                       /*Name=*/"__kmpc_omp_task_complete_if0");
836     break;
837   }
838   case OMPRTL__kmpc_ordered: {
839     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
840     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
841     llvm::FunctionType *FnTy =
842         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
843     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
844     break;
845   }
846   case OMPRTL__kmpc_end_ordered: {
847     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
848     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
849     llvm::FunctionType *FnTy =
850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
851     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
852     break;
853   }
854   case OMPRTL__kmpc_omp_taskwait: {
855     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
856     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
857     llvm::FunctionType *FnTy =
858         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
859     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
860     break;
861   }
862   case OMPRTL__kmpc_taskgroup: {
863     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
864     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
865     llvm::FunctionType *FnTy =
866         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
867     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
868     break;
869   }
870   case OMPRTL__kmpc_end_taskgroup: {
871     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
872     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
873     llvm::FunctionType *FnTy =
874         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
875     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
876     break;
877   }
878   case OMPRTL__kmpc_push_proc_bind: {
879     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
880     // int proc_bind)
881     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
882     llvm::FunctionType *FnTy =
883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
884     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
885     break;
886   }
887   case OMPRTL__kmpc_omp_task_with_deps: {
888     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
889     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
890     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
891     llvm::Type *TypeParams[] = {
892         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
893         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
894     llvm::FunctionType *FnTy =
895         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
896     RTLFn =
897         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
898     break;
899   }
900   case OMPRTL__kmpc_omp_wait_deps: {
901     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
902     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
903     // kmp_depend_info_t *noalias_dep_list);
904     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
905                                 CGM.Int32Ty,           CGM.VoidPtrTy,
906                                 CGM.Int32Ty,           CGM.VoidPtrTy};
907     llvm::FunctionType *FnTy =
908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
909     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
910     break;
911   }
912   case OMPRTL__kmpc_cancellationpoint: {
913     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
914     // global_tid, kmp_int32 cncl_kind)
915     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
916     llvm::FunctionType *FnTy =
917         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
918     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
919     break;
920   }
921   case OMPRTL__kmpc_cancel: {
922     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
923     // kmp_int32 cncl_kind)
924     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
925     llvm::FunctionType *FnTy =
926         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
927     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
928     break;
929   }
930   case OMPRTL__tgt_target: {
931     // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
932     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
933     // *arg_types);
934     llvm::Type *TypeParams[] = {CGM.Int32Ty,
935                                 CGM.VoidPtrTy,
936                                 CGM.Int32Ty,
937                                 CGM.VoidPtrPtrTy,
938                                 CGM.VoidPtrPtrTy,
939                                 CGM.SizeTy->getPointerTo(),
940                                 CGM.Int32Ty->getPointerTo()};
941     llvm::FunctionType *FnTy =
942         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
943     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
944     break;
945   }
946   case OMPRTL__tgt_register_lib: {
947     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
948     QualType ParamTy =
949         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
950     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
951     llvm::FunctionType *FnTy =
952         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
953     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
954     break;
955   }
956   case OMPRTL__tgt_unregister_lib: {
957     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
958     QualType ParamTy =
959         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
960     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
961     llvm::FunctionType *FnTy =
962         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
963     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
964     break;
965   }
966   }
967   return RTLFn;
968 }
969
970 static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
971   auto &C = CGF.getContext();
972   llvm::Value *Size = nullptr;
973   auto SizeInChars = C.getTypeSizeInChars(Ty);
974   if (SizeInChars.isZero()) {
975     // getTypeSizeInChars() returns 0 for a VLA.
976     while (auto *VAT = C.getAsVariableArrayType(Ty)) {
977       llvm::Value *ArraySize;
978       std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
979       Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
980     }
981     SizeInChars = C.getTypeSizeInChars(Ty);
982     assert(!SizeInChars.isZero());
983     Size = CGF.Builder.CreateNUWMul(
984         Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
985   } else
986     Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
987   return Size;
988 }
989
990 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
991                                                              bool IVSigned) {
992   assert((IVSize == 32 || IVSize == 64) &&
993          "IV size is not compatible with the omp runtime");
994   auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
995                                        : "__kmpc_for_static_init_4u")
996                            : (IVSigned ? "__kmpc_for_static_init_8"
997                                        : "__kmpc_for_static_init_8u");
998   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
999   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1000   llvm::Type *TypeParams[] = {
1001     getIdentTyPointerTy(),                     // loc
1002     CGM.Int32Ty,                               // tid
1003     CGM.Int32Ty,                               // schedtype
1004     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1005     PtrTy,                                     // p_lower
1006     PtrTy,                                     // p_upper
1007     PtrTy,                                     // p_stride
1008     ITy,                                       // incr
1009     ITy                                        // chunk
1010   };
1011   llvm::FunctionType *FnTy =
1012       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1013   return CGM.CreateRuntimeFunction(FnTy, Name);
1014 }
1015
1016 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
1017                                                             bool IVSigned) {
1018   assert((IVSize == 32 || IVSize == 64) &&
1019          "IV size is not compatible with the omp runtime");
1020   auto Name =
1021       IVSize == 32
1022           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1023           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1024   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1025   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1026                                CGM.Int32Ty,           // tid
1027                                CGM.Int32Ty,           // schedtype
1028                                ITy,                   // lower
1029                                ITy,                   // upper
1030                                ITy,                   // stride
1031                                ITy                    // chunk
1032   };
1033   llvm::FunctionType *FnTy =
1034       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1035   return CGM.CreateRuntimeFunction(FnTy, Name);
1036 }
1037
1038 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
1039                                                             bool IVSigned) {
1040   assert((IVSize == 32 || IVSize == 64) &&
1041          "IV size is not compatible with the omp runtime");
1042   auto Name =
1043       IVSize == 32
1044           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1045           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1046   llvm::Type *TypeParams[] = {
1047       getIdentTyPointerTy(), // loc
1048       CGM.Int32Ty,           // tid
1049   };
1050   llvm::FunctionType *FnTy =
1051       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1052   return CGM.CreateRuntimeFunction(FnTy, Name);
1053 }
1054
1055 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
1056                                                             bool IVSigned) {
1057   assert((IVSize == 32 || IVSize == 64) &&
1058          "IV size is not compatible with the omp runtime");
1059   auto Name =
1060       IVSize == 32
1061           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1062           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1063   auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1064   auto PtrTy = llvm::PointerType::getUnqual(ITy);
1065   llvm::Type *TypeParams[] = {
1066     getIdentTyPointerTy(),                     // loc
1067     CGM.Int32Ty,                               // tid
1068     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1069     PtrTy,                                     // p_lower
1070     PtrTy,                                     // p_upper
1071     PtrTy                                      // p_stride
1072   };
1073   llvm::FunctionType *FnTy =
1074       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1075   return CGM.CreateRuntimeFunction(FnTy, Name);
1076 }
1077
1078 llvm::Constant *
1079 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1080   assert(!CGM.getLangOpts().OpenMPUseTLS ||
1081          !CGM.getContext().getTargetInfo().isTLSSupported());
1082   // Lookup the entry, lazily creating it if necessary.
1083   return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
1084                                      Twine(CGM.getMangledName(VD)) + ".cache.");
1085 }
1086
1087 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1088                                                 const VarDecl *VD,
1089                                                 Address VDAddr,
1090                                                 SourceLocation Loc) {
1091   if (CGM.getLangOpts().OpenMPUseTLS &&
1092       CGM.getContext().getTargetInfo().isTLSSupported())
1093     return VDAddr;
1094
1095   auto VarTy = VDAddr.getElementType();
1096   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1097                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1098                                                        CGM.Int8PtrTy),
1099                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1100                          getOrCreateThreadPrivateCache(VD)};
1101   return Address(CGF.EmitRuntimeCall(
1102       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
1103                  VDAddr.getAlignment());
1104 }
1105
1106 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1107     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1108     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1109   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1110   // library.
1111   auto OMPLoc = emitUpdateLocation(CGF, Loc);
1112   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1113                       OMPLoc);
1114   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1115   // to register constructor/destructor for variable.
1116   llvm::Value *Args[] = {OMPLoc,
1117                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1118                                                        CGM.VoidPtrTy),
1119                          Ctor, CopyCtor, Dtor};
1120   CGF.EmitRuntimeCall(
1121       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
1122 }
1123
1124 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1125     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1126     bool PerformInit, CodeGenFunction *CGF) {
1127   if (CGM.getLangOpts().OpenMPUseTLS &&
1128       CGM.getContext().getTargetInfo().isTLSSupported())
1129     return nullptr;
1130
1131   VD = VD->getDefinition(CGM.getContext());
1132   if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
1133     ThreadPrivateWithDefinition.insert(VD);
1134     QualType ASTTy = VD->getType();
1135
1136     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1137     auto Init = VD->getAnyInitializer();
1138     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1139       // Generate function that re-emits the declaration's initializer into the
1140       // threadprivate copy of the variable VD
1141       CodeGenFunction CtorCGF(CGM);
1142       FunctionArgList Args;
1143       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1144                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1145       Args.push_back(&Dst);
1146
1147       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1148           CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(),
1149           /*isVariadic=*/false);
1150       auto FTy = CGM.getTypes().GetFunctionType(FI);
1151       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1152           FTy, ".__kmpc_global_ctor_.", FI, Loc);
1153       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1154                             Args, SourceLocation());
1155       auto ArgVal = CtorCGF.EmitLoadOfScalar(
1156           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1157           CGM.getContext().VoidPtrTy, Dst.getLocation());
1158       Address Arg = Address(ArgVal, VDAddr.getAlignment());
1159       Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
1160                                              CtorCGF.ConvertTypeForMem(ASTTy));
1161       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1162                                /*IsInitializer=*/true);
1163       ArgVal = CtorCGF.EmitLoadOfScalar(
1164           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1165           CGM.getContext().VoidPtrTy, Dst.getLocation());
1166       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1167       CtorCGF.FinishFunction();
1168       Ctor = Fn;
1169     }
1170     if (VD->getType().isDestructedType() != QualType::DK_none) {
1171       // Generate function that emits destructor call for the threadprivate copy
1172       // of the variable VD
1173       CodeGenFunction DtorCGF(CGM);
1174       FunctionArgList Args;
1175       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
1176                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
1177       Args.push_back(&Dst);
1178
1179       auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1180           CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(),
1181           /*isVariadic=*/false);
1182       auto FTy = CGM.getTypes().GetFunctionType(FI);
1183       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
1184           FTy, ".__kmpc_global_dtor_.", FI, Loc);
1185       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1186                             SourceLocation());
1187       auto ArgVal = DtorCGF.EmitLoadOfScalar(
1188           DtorCGF.GetAddrOfLocalVar(&Dst),
1189           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1190       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1191                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1192                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1193       DtorCGF.FinishFunction();
1194       Dtor = Fn;
1195     }
1196     // Do not emit init function if it is not required.
1197     if (!Ctor && !Dtor)
1198       return nullptr;
1199
1200     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1201     auto CopyCtorTy =
1202         llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1203                                 /*isVarArg=*/false)->getPointerTo();
1204     // Copying constructor for the threadprivate variable.
1205     // Must be NULL - reserved by runtime, but currently it requires that this
1206     // parameter is always NULL. Otherwise it fires assertion.
1207     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1208     if (Ctor == nullptr) {
1209       auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1210                                             /*isVarArg=*/false)->getPointerTo();
1211       Ctor = llvm::Constant::getNullValue(CtorTy);
1212     }
1213     if (Dtor == nullptr) {
1214       auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1215                                             /*isVarArg=*/false)->getPointerTo();
1216       Dtor = llvm::Constant::getNullValue(DtorTy);
1217     }
1218     if (!CGF) {
1219       auto InitFunctionTy =
1220           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1221       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
1222           InitFunctionTy, ".__omp_threadprivate_init_.",
1223           CGM.getTypes().arrangeNullaryFunction());
1224       CodeGenFunction InitCGF(CGM);
1225       FunctionArgList ArgList;
1226       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1227                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
1228                             Loc);
1229       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1230       InitCGF.FinishFunction();
1231       return InitFunction;
1232     }
1233     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1234   }
1235   return nullptr;
1236 }
1237
1238 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
1239 /// function. Here is the logic:
1240 /// if (Cond) {
1241 ///   ThenGen();
1242 /// } else {
1243 ///   ElseGen();
1244 /// }
1245 static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
1246                             const RegionCodeGenTy &ThenGen,
1247                             const RegionCodeGenTy &ElseGen) {
1248   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1249
1250   // If the condition constant folds and can be elided, try to avoid emitting
1251   // the condition and the dead arm of the if/else.
1252   bool CondConstant;
1253   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1254     CodeGenFunction::RunCleanupsScope Scope(CGF);
1255     if (CondConstant) {
1256       ThenGen(CGF);
1257     } else {
1258       ElseGen(CGF);
1259     }
1260     return;
1261   }
1262
1263   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
1264   // emit the conditional branch.
1265   auto ThenBlock = CGF.createBasicBlock("omp_if.then");
1266   auto ElseBlock = CGF.createBasicBlock("omp_if.else");
1267   auto ContBlock = CGF.createBasicBlock("omp_if.end");
1268   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1269
1270   // Emit the 'then' code.
1271   CGF.EmitBlock(ThenBlock);
1272   {
1273     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1274     ThenGen(CGF);
1275   }
1276   CGF.EmitBranch(ContBlock);
1277   // Emit the 'else' code if present.
1278   {
1279     // There is no need to emit line number for unconditional branch.
1280     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1281     CGF.EmitBlock(ElseBlock);
1282   }
1283   {
1284     CodeGenFunction::RunCleanupsScope ThenScope(CGF);
1285     ElseGen(CGF);
1286   }
1287   {
1288     // There is no need to emit line number for unconditional branch.
1289     auto NL = ApplyDebugLocation::CreateEmpty(CGF);
1290     CGF.EmitBranch(ContBlock);
1291   }
1292   // Emit the continuation block for code after the if.
1293   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1294 }
1295
1296 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1297                                        llvm::Value *OutlinedFn,
1298                                        ArrayRef<llvm::Value *> CapturedVars,
1299                                        const Expr *IfCond) {
1300   if (!CGF.HaveInsertPoint())
1301     return;
1302   auto *RTLoc = emitUpdateLocation(CGF, Loc);
1303   auto &&ThenGen = [this, OutlinedFn, CapturedVars,
1304                     RTLoc](CodeGenFunction &CGF) {
1305     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1306     llvm::Value *Args[] = {
1307         RTLoc,
1308         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1309         CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
1310     llvm::SmallVector<llvm::Value *, 16> RealArgs;
1311     RealArgs.append(std::begin(Args), std::end(Args));
1312     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1313
1314     auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
1315     CGF.EmitRuntimeCall(RTLFn, RealArgs);
1316   };
1317   auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
1318                     Loc](CodeGenFunction &CGF) {
1319     auto ThreadID = getThreadID(CGF, Loc);
1320     // Build calls:
1321     // __kmpc_serialized_parallel(&Loc, GTid);
1322     llvm::Value *Args[] = {RTLoc, ThreadID};
1323     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel),
1324                         Args);
1325
1326     // OutlinedFn(&GTid, &zero, CapturedStruct);
1327     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
1328     Address ZeroAddr =
1329       CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
1330                            /*Name*/ ".zero.addr");
1331     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
1332     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1333     OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
1334     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
1335     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1336     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
1337
1338     // __kmpc_end_serialized_parallel(&Loc, GTid);
1339     llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID};
1340     CGF.EmitRuntimeCall(
1341         createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs);
1342   };
1343   if (IfCond) {
1344     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
1345   } else {
1346     CodeGenFunction::RunCleanupsScope Scope(CGF);
1347     ThenGen(CGF);
1348   }
1349 }
1350
1351 // If we're inside an (outlined) parallel region, use the region info's
1352 // thread-ID variable (it is passed in a first argument of the outlined function
1353 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1354 // regular serial code region, get thread ID by calling kmp_int32
1355 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1356 // return the address of that temp.
1357 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1358                                              SourceLocation Loc) {
1359   if (auto *OMPRegionInfo =
1360           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1361     if (OMPRegionInfo->getThreadIDVariable())
1362       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1363
1364   auto ThreadID = getThreadID(CGF, Loc);
1365   auto Int32Ty =
1366       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1367   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1368   CGF.EmitStoreOfScalar(ThreadID,
1369                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1370
1371   return ThreadIDTemp;
1372 }
1373
1374 llvm::Constant *
1375 CGOpenMPRuntime::getOrCreateInternalVariable(llvm::Type *Ty,
1376                                              const llvm::Twine &Name) {
1377   SmallString<256> Buffer;
1378   llvm::raw_svector_ostream Out(Buffer);
1379   Out << Name;
1380   auto RuntimeName = Out.str();
1381   auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
1382   if (Elem.second) {
1383     assert(Elem.second->getType()->getPointerElementType() == Ty &&
1384            "OMP internal variable has different type than requested");
1385     return &*Elem.second;
1386   }
1387
1388   return Elem.second = new llvm::GlobalVariable(
1389              CGM.getModule(), Ty, /*IsConstant*/ false,
1390              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
1391              Elem.first());
1392 }
1393
1394 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1395   llvm::Twine Name(".gomp_critical_user_", CriticalName);
1396   return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
1397 }
1398
1399 namespace {
1400 template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
1401   llvm::Value *Callee;
1402   llvm::Value *Args[N];
1403
1404 public:
1405   CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs)
1406       : Callee(Callee) {
1407     assert(CleanupArgs.size() == N);
1408     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
1409   }
1410   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
1411     if (!CGF.HaveInsertPoint())
1412       return;
1413     CGF.EmitRuntimeCall(Callee, Args);
1414   }
1415 };
1416 } // anonymous namespace
1417
1418 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1419                                          StringRef CriticalName,
1420                                          const RegionCodeGenTy &CriticalOpGen,
1421                                          SourceLocation Loc, const Expr *Hint) {
1422   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1423   // CriticalOpGen();
1424   // __kmpc_end_critical(ident_t *, gtid, Lock);
1425   // Prepare arguments and build a call to __kmpc_critical
1426   if (!CGF.HaveInsertPoint())
1427     return;
1428   CodeGenFunction::RunCleanupsScope Scope(CGF);
1429   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1430                          getCriticalRegionLock(CriticalName)};
1431   if (Hint) {
1432     llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
1433                                                      std::end(Args));
1434     auto *HintVal = CGF.EmitScalarExpr(Hint);
1435     ArgsWithHint.push_back(
1436         CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
1437     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
1438                         ArgsWithHint);
1439   } else
1440     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
1441   // Build a call to __kmpc_end_critical
1442   CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1443       NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
1444       llvm::makeArrayRef(Args));
1445   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
1446 }
1447
1448 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
1449                        OpenMPDirectiveKind Kind, SourceLocation Loc,
1450                        const RegionCodeGenTy &BodyOpGen) {
1451   llvm::Value *CallBool = CGF.EmitScalarConversion(
1452       IfCond,
1453       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
1454       CGF.getContext().BoolTy, Loc);
1455
1456   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1457   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
1458   // Generate the branch (If-stmt)
1459   CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1460   CGF.EmitBlock(ThenBlock);
1461   CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen);
1462   // Emit the rest of bblocks/branches
1463   CGF.EmitBranch(ContBlock);
1464   CGF.EmitBlock(ContBlock, true);
1465 }
1466
1467 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
1468                                        const RegionCodeGenTy &MasterOpGen,
1469                                        SourceLocation Loc) {
1470   if (!CGF.HaveInsertPoint())
1471     return;
1472   // if(__kmpc_master(ident_t *, gtid)) {
1473   //   MasterOpGen();
1474   //   __kmpc_end_master(ident_t *, gtid);
1475   // }
1476   // Prepare arguments and build a call to __kmpc_master
1477   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1478   auto *IsMaster =
1479       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
1480   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1481       MasterCallEndCleanup;
1482   emitIfStmt(
1483       CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
1484         CodeGenFunction::RunCleanupsScope Scope(CGF);
1485         CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
1486             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
1487             llvm::makeArrayRef(Args));
1488         MasterOpGen(CGF);
1489       });
1490 }
1491
1492 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
1493                                         SourceLocation Loc) {
1494   if (!CGF.HaveInsertPoint())
1495     return;
1496   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1497   llvm::Value *Args[] = {
1498       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1499       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
1500   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
1501 }
1502
1503 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
1504                                           const RegionCodeGenTy &TaskgroupOpGen,
1505                                           SourceLocation Loc) {
1506   if (!CGF.HaveInsertPoint())
1507     return;
1508   // __kmpc_taskgroup(ident_t *, gtid);
1509   // TaskgroupOpGen();
1510   // __kmpc_end_taskgroup(ident_t *, gtid);
1511   // Prepare arguments and build a call to __kmpc_taskgroup
1512   {
1513     CodeGenFunction::RunCleanupsScope Scope(CGF);
1514     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1515     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args);
1516     // Build a call to __kmpc_end_taskgroup
1517     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1518         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
1519         llvm::makeArrayRef(Args));
1520     emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
1521   }
1522 }
1523
1524 /// Given an array of pointers to variables, project the address of a
1525 /// given variable.
1526 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
1527                                       unsigned Index, const VarDecl *Var) {
1528   // Pull out the pointer to the variable.
1529   Address PtrAddr =
1530       CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
1531   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
1532
1533   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
1534   Addr = CGF.Builder.CreateElementBitCast(
1535       Addr, CGF.ConvertTypeForMem(Var->getType()));
1536   return Addr;
1537 }
1538
1539 static llvm::Value *emitCopyprivateCopyFunction(
1540     CodeGenModule &CGM, llvm::Type *ArgsType,
1541     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
1542     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
1543   auto &C = CGM.getContext();
1544   // void copy_func(void *LHSArg, void *RHSArg);
1545   FunctionArgList Args;
1546   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1547                            C.VoidPtrTy);
1548   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
1549                            C.VoidPtrTy);
1550   Args.push_back(&LHSArg);
1551   Args.push_back(&RHSArg);
1552   FunctionType::ExtInfo EI;
1553   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
1554       C.VoidTy, Args, EI, /*isVariadic=*/false);
1555   auto *Fn = llvm::Function::Create(
1556       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
1557       ".omp.copyprivate.copy_func", &CGM.getModule());
1558   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
1559   CodeGenFunction CGF(CGM);
1560   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
1561   // Dest = (void*[n])(LHSArg);
1562   // Src = (void*[n])(RHSArg);
1563   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1564       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
1565       ArgsType), CGF.getPointerAlign());
1566   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1567       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
1568       ArgsType), CGF.getPointerAlign());
1569   // *(Type0*)Dst[0] = *(Type0*)Src[0];
1570   // *(Type1*)Dst[1] = *(Type1*)Src[1];
1571   // ...
1572   // *(Typen*)Dst[n] = *(Typen*)Src[n];
1573   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
1574     auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
1575     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
1576
1577     auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
1578     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
1579
1580     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
1581     QualType Type = VD->getType();
1582     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
1583   }
1584   CGF.FinishFunction();
1585   return Fn;
1586 }
1587
1588 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
1589                                        const RegionCodeGenTy &SingleOpGen,
1590                                        SourceLocation Loc,
1591                                        ArrayRef<const Expr *> CopyprivateVars,
1592                                        ArrayRef<const Expr *> SrcExprs,
1593                                        ArrayRef<const Expr *> DstExprs,
1594                                        ArrayRef<const Expr *> AssignmentOps) {
1595   if (!CGF.HaveInsertPoint())
1596     return;
1597   assert(CopyprivateVars.size() == SrcExprs.size() &&
1598          CopyprivateVars.size() == DstExprs.size() &&
1599          CopyprivateVars.size() == AssignmentOps.size());
1600   auto &C = CGM.getContext();
1601   // int32 did_it = 0;
1602   // if(__kmpc_single(ident_t *, gtid)) {
1603   //   SingleOpGen();
1604   //   __kmpc_end_single(ident_t *, gtid);
1605   //   did_it = 1;
1606   // }
1607   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1608   // <copy_func>, did_it);
1609
1610   Address DidIt = Address::invalid();
1611   if (!CopyprivateVars.empty()) {
1612     // int32 did_it = 0;
1613     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1614     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
1615     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
1616   }
1617   // Prepare arguments and build a call to __kmpc_single
1618   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1619   auto *IsSingle =
1620       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
1621   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
1622       SingleCallEndCleanup;
1623   emitIfStmt(
1624       CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
1625         CodeGenFunction::RunCleanupsScope Scope(CGF);
1626         CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
1627             NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
1628             llvm::makeArrayRef(Args));
1629         SingleOpGen(CGF);
1630         if (DidIt.isValid()) {
1631           // did_it = 1;
1632           CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
1633         }
1634       });
1635   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
1636   // <copy_func>, did_it);
1637   if (DidIt.isValid()) {
1638     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
1639     auto CopyprivateArrayTy =
1640         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
1641                                /*IndexTypeQuals=*/0);
1642     // Create a list of all private variables for copyprivate.
1643     Address CopyprivateList =
1644         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
1645     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
1646       Address Elem = CGF.Builder.CreateConstArrayGEP(
1647           CopyprivateList, I, CGF.getPointerSize());
1648       CGF.Builder.CreateStore(
1649           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1650               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
1651           Elem);
1652     }
1653     // Build function that copies private values from single region to all other
1654     // threads in the corresponding parallel region.
1655     auto *CpyFn = emitCopyprivateCopyFunction(
1656         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
1657         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
1658     auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
1659     Address CL =
1660       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
1661                                                       CGF.VoidPtrTy);
1662     auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
1663     llvm::Value *Args[] = {
1664         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
1665         getThreadID(CGF, Loc),        // i32 <gtid>
1666         BufSize,                      // size_t <buf_size>
1667         CL.getPointer(),              // void *<copyprivate list>
1668         CpyFn,                        // void (*) (void *, void *) <copy_func>
1669         DidItVal                      // i32 did_it
1670     };
1671     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
1672   }
1673 }
1674
1675 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
1676                                         const RegionCodeGenTy &OrderedOpGen,
1677                                         SourceLocation Loc, bool IsThreads) {
1678   if (!CGF.HaveInsertPoint())
1679     return;
1680   // __kmpc_ordered(ident_t *, gtid);
1681   // OrderedOpGen();
1682   // __kmpc_end_ordered(ident_t *, gtid);
1683   // Prepare arguments and build a call to __kmpc_ordered
1684   CodeGenFunction::RunCleanupsScope Scope(CGF);
1685   if (IsThreads) {
1686     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
1687     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
1688     // Build a call to __kmpc_end_ordered
1689     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
1690         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
1691         llvm::makeArrayRef(Args));
1692   }
1693   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
1694 }
1695
1696 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
1697                                       OpenMPDirectiveKind Kind, bool EmitChecks,
1698                                       bool ForceSimpleCall) {
1699   if (!CGF.HaveInsertPoint())
1700     return;
1701   // Build call __kmpc_cancel_barrier(loc, thread_id);
1702   // Build call __kmpc_barrier(loc, thread_id);
1703   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
1704   if (Kind == OMPD_for) {
1705     Flags =
1706         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_FOR);
1707   } else if (Kind == OMPD_sections) {
1708     Flags = static_cast<OpenMPLocationFlags>(Flags |
1709                                              OMP_IDENT_BARRIER_IMPL_SECTIONS);
1710   } else if (Kind == OMPD_single) {
1711     Flags =
1712         static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL_SINGLE);
1713   } else if (Kind == OMPD_barrier) {
1714     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_EXPL);
1715   } else {
1716     Flags = static_cast<OpenMPLocationFlags>(Flags | OMP_IDENT_BARRIER_IMPL);
1717   }
1718   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
1719   // thread_id);
1720   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
1721                          getThreadID(CGF, Loc)};
1722   if (auto *OMPRegionInfo =
1723           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1724     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
1725       auto *Result = CGF.EmitRuntimeCall(
1726           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
1727       if (EmitChecks) {
1728         // if (__kmpc_cancel_barrier()) {
1729         //   exit from construct;
1730         // }
1731         auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
1732         auto *ContBB = CGF.createBasicBlock(".cancel.continue");
1733         auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
1734         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
1735         CGF.EmitBlock(ExitBB);
1736         //   exit from construct;
1737         auto CancelDestination =
1738             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
1739         CGF.EmitBranchThroughCleanup(CancelDestination);
1740         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
1741       }
1742       return;
1743     }
1744   }
1745   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
1746 }
1747
1748 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
1749 /// the enum sched_type in kmp.h).
1750 enum OpenMPSchedType {
1751   /// \brief Lower bound for default (unordered) versions.
1752   OMP_sch_lower = 32,
1753   OMP_sch_static_chunked = 33,
1754   OMP_sch_static = 34,
1755   OMP_sch_dynamic_chunked = 35,
1756   OMP_sch_guided_chunked = 36,
1757   OMP_sch_runtime = 37,
1758   OMP_sch_auto = 38,
1759   /// \brief Lower bound for 'ordered' versions.
1760   OMP_ord_lower = 64,
1761   OMP_ord_static_chunked = 65,
1762   OMP_ord_static = 66,
1763   OMP_ord_dynamic_chunked = 67,
1764   OMP_ord_guided_chunked = 68,
1765   OMP_ord_runtime = 69,
1766   OMP_ord_auto = 70,
1767   OMP_sch_default = OMP_sch_static,
1768 };
1769
1770 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
1771 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
1772                                           bool Chunked, bool Ordered) {
1773   switch (ScheduleKind) {
1774   case OMPC_SCHEDULE_static:
1775     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
1776                    : (Ordered ? OMP_ord_static : OMP_sch_static);
1777   case OMPC_SCHEDULE_dynamic:
1778     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
1779   case OMPC_SCHEDULE_guided:
1780     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
1781   case OMPC_SCHEDULE_runtime:
1782     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
1783   case OMPC_SCHEDULE_auto:
1784     return Ordered ? OMP_ord_auto : OMP_sch_auto;
1785   case OMPC_SCHEDULE_unknown:
1786     assert(!Chunked && "chunk was specified but schedule kind not known");
1787     return Ordered ? OMP_ord_static : OMP_sch_static;
1788   }
1789   llvm_unreachable("Unexpected runtime schedule");
1790 }
1791
1792 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
1793                                          bool Chunked) const {
1794   auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
1795   return Schedule == OMP_sch_static;
1796 }
1797
1798 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
1799   auto Schedule =
1800       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
1801   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
1802   return Schedule != OMP_sch_static;
1803 }
1804
1805 void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
1806                                           SourceLocation Loc,
1807                                           OpenMPScheduleClauseKind ScheduleKind,
1808                                           unsigned IVSize, bool IVSigned,
1809                                           bool Ordered, llvm::Value *UB,
1810                                           llvm::Value *Chunk) {
1811   if (!CGF.HaveInsertPoint())
1812     return;
1813   OpenMPSchedType Schedule =
1814       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1815   assert(Ordered ||
1816          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
1817           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
1818   // Call __kmpc_dispatch_init(
1819   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
1820   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
1821   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
1822
1823   // If the Chunk was not specified in the clause - use default value 1.
1824   if (Chunk == nullptr)
1825     Chunk = CGF.Builder.getIntN(IVSize, 1);
1826   llvm::Value *Args[] = {
1827     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1828     getThreadID(CGF, Loc),
1829     CGF.Builder.getInt32(Schedule), // Schedule type
1830     CGF.Builder.getIntN(IVSize, 0), // Lower
1831     UB,                             // Upper
1832     CGF.Builder.getIntN(IVSize, 1), // Stride
1833     Chunk                           // Chunk
1834   };
1835   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
1836 }
1837
1838 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
1839                                         SourceLocation Loc,
1840                                         OpenMPScheduleClauseKind ScheduleKind,
1841                                         unsigned IVSize, bool IVSigned,
1842                                         bool Ordered, Address IL, Address LB,
1843                                         Address UB, Address ST,
1844                                         llvm::Value *Chunk) {
1845   if (!CGF.HaveInsertPoint())
1846     return;
1847   OpenMPSchedType Schedule =
1848     getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
1849   assert(!Ordered);
1850   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
1851          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
1852
1853   // Call __kmpc_for_static_init(
1854   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
1855   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
1856   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
1857   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
1858   if (Chunk == nullptr) {
1859     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
1860            "expected static non-chunked schedule");
1861     // If the Chunk was not specified in the clause - use default value 1.
1862       Chunk = CGF.Builder.getIntN(IVSize, 1);
1863   } else {
1864     assert((Schedule == OMP_sch_static_chunked ||
1865             Schedule == OMP_ord_static_chunked) &&
1866            "expected static chunked schedule");
1867   }
1868   llvm::Value *Args[] = {
1869     emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1870     getThreadID(CGF, Loc),
1871     CGF.Builder.getInt32(Schedule), // Schedule type
1872     IL.getPointer(),                // &isLastIter
1873     LB.getPointer(),                // &LB
1874     UB.getPointer(),                // &UB
1875     ST.getPointer(),                // &Stride
1876     CGF.Builder.getIntN(IVSize, 1), // Incr
1877     Chunk                           // Chunk
1878   };
1879   CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
1880 }
1881
1882 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
1883                                           SourceLocation Loc) {
1884   if (!CGF.HaveInsertPoint())
1885     return;
1886   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
1887   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1888                          getThreadID(CGF, Loc)};
1889   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
1890                       Args);
1891 }
1892
1893 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
1894                                                  SourceLocation Loc,
1895                                                  unsigned IVSize,
1896                                                  bool IVSigned) {
1897   if (!CGF.HaveInsertPoint())
1898     return;
1899   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
1900   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
1901                          getThreadID(CGF, Loc)};
1902   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
1903 }
1904
1905 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
1906                                           SourceLocation Loc, unsigned IVSize,
1907                                           bool IVSigned, Address IL,
1908                                           Address LB, Address UB,
1909                                           Address ST) {
1910   // Call __kmpc_dispatch_next(
1911   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
1912   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
1913   //          kmp_int[32|64] *p_stride);
1914   llvm::Value *Args[] = {
1915       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
1916       IL.getPointer(), // &isLastIter
1917       LB.getPointer(), // &Lower
1918       UB.getPointer(), // &Upper
1919       ST.getPointer()  // &Stride
1920   };
1921   llvm::Value *Call =
1922       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
1923   return CGF.EmitScalarConversion(
1924       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
1925       CGF.getContext().BoolTy, Loc);
1926 }
1927
1928 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
1929                                            llvm::Value *NumThreads,
1930                                            SourceLocation Loc) {
1931   if (!CGF.HaveInsertPoint())
1932     return;
1933   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
1934   llvm::Value *Args[] = {
1935       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1936       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
1937   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
1938                       Args);
1939 }
1940
1941 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
1942                                          OpenMPProcBindClauseKind ProcBind,
1943                                          SourceLocation Loc) {
1944   if (!CGF.HaveInsertPoint())
1945     return;
1946   // Constants for proc bind value accepted by the runtime.
1947   enum ProcBindTy {
1948     ProcBindFalse = 0,
1949     ProcBindTrue,
1950     ProcBindMaster,
1951     ProcBindClose,
1952     ProcBindSpread,
1953     ProcBindIntel,
1954     ProcBindDefault
1955   } RuntimeProcBind;
1956   switch (ProcBind) {
1957   case OMPC_PROC_BIND_master:
1958     RuntimeProcBind = ProcBindMaster;
1959     break;
1960   case OMPC_PROC_BIND_close:
1961     RuntimeProcBind = ProcBindClose;
1962     break;
1963   case OMPC_PROC_BIND_spread:
1964     RuntimeProcBind = ProcBindSpread;
1965     break;
1966   case OMPC_PROC_BIND_unknown:
1967     llvm_unreachable("Unsupported proc_bind value.");
1968   }
1969   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
1970   llvm::Value *Args[] = {
1971       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1972       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
1973   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
1974 }
1975
1976 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
1977                                 SourceLocation Loc) {
1978   if (!CGF.HaveInsertPoint())
1979     return;
1980   // Build call void __kmpc_flush(ident_t *loc)
1981   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
1982                       emitUpdateLocation(CGF, Loc));
1983 }
1984
1985 namespace {
1986 /// \brief Indexes of fields for type kmp_task_t.
1987 enum KmpTaskTFields {
1988   /// \brief List of shared variables.
1989   KmpTaskTShareds,
1990   /// \brief Task routine.
1991   KmpTaskTRoutine,
1992   /// \brief Partition id for the untied tasks.
1993   KmpTaskTPartId,
1994   /// \brief Function with call of destructors for private variables.
1995   KmpTaskTDestructors,
1996 };
1997 } // anonymous namespace
1998
1999 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2000   // FIXME: Add other entries type when they become supported.
2001   return OffloadEntriesTargetRegion.empty();
2002 }
2003
2004 /// \brief Initialize target region entry.
2005 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2006     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2007                                     StringRef ParentName, unsigned LineNum,
2008                                     unsigned ColNum, unsigned Order) {
2009   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2010                                              "only required for the device "
2011                                              "code generation.");
2012   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
2013       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr);
2014   ++OffloadingEntriesNum;
2015 }
2016
2017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2018     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2019                                   StringRef ParentName, unsigned LineNum,
2020                                   unsigned ColNum, llvm::Constant *Addr,
2021                                   llvm::Constant *ID) {
2022   // If we are emitting code for a target, the entry is already initialized,
2023   // only has to be registered.
2024   if (CGM.getLangOpts().OpenMPIsDevice) {
2025     assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2026                                     ColNum) &&
2027            "Entry must exist.");
2028     auto &Entry = OffloadEntriesTargetRegion[DeviceID][FileID][ParentName]
2029                                             [LineNum][ColNum];
2030     assert(Entry.isValid() && "Entry not initialized!");
2031     Entry.setAddress(Addr);
2032     Entry.setID(ID);
2033     return;
2034   } else {
2035     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID);
2036     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum][ColNum] =
2037         Entry;
2038   }
2039 }
2040
2041 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
2042     unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
2043     unsigned ColNum) const {
2044   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
2045   if (PerDevice == OffloadEntriesTargetRegion.end())
2046     return false;
2047   auto PerFile = PerDevice->second.find(FileID);
2048   if (PerFile == PerDevice->second.end())
2049     return false;
2050   auto PerParentName = PerFile->second.find(ParentName);
2051   if (PerParentName == PerFile->second.end())
2052     return false;
2053   auto PerLine = PerParentName->second.find(LineNum);
2054   if (PerLine == PerParentName->second.end())
2055     return false;
2056   auto PerColumn = PerLine->second.find(ColNum);
2057   if (PerColumn == PerLine->second.end())
2058     return false;
2059   // Fail if this entry is already registered.
2060   if (PerColumn->second.getAddress() || PerColumn->second.getID())
2061     return false;
2062   return true;
2063 }
2064
2065 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
2066     const OffloadTargetRegionEntryInfoActTy &Action) {
2067   // Scan all target region entries and perform the provided action.
2068   for (auto &D : OffloadEntriesTargetRegion)
2069     for (auto &F : D.second)
2070       for (auto &P : F.second)
2071         for (auto &L : P.second)
2072           for (auto &C : L.second)
2073             Action(D.first, F.first, P.first(), L.first, C.first, C.second);
2074 }
2075
2076 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
2077 /// \a Codegen. This is used to emit the two functions that register and
2078 /// unregister the descriptor of the current compilation unit.
2079 static llvm::Function *
2080 createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
2081                                          const RegionCodeGenTy &Codegen) {
2082   auto &C = CGM.getContext();
2083   FunctionArgList Args;
2084   ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
2085                              /*Id=*/nullptr, C.VoidPtrTy);
2086   Args.push_back(&DummyPtr);
2087
2088   CodeGenFunction CGF(CGM);
2089   GlobalDecl();
2090   auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration(
2091       C.VoidTy, Args, FunctionType::ExtInfo(),
2092       /*isVariadic=*/false);
2093   auto FTy = CGM.getTypes().GetFunctionType(FI);
2094   auto *Fn =
2095       CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
2096   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
2097   Codegen(CGF);
2098   CGF.FinishFunction();
2099   return Fn;
2100 }
2101
2102 llvm::Function *
2103 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
2104
2105   // If we don't have entries or if we are emitting code for the device, we
2106   // don't need to do anything.
2107   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
2108     return nullptr;
2109
2110   auto &M = CGM.getModule();
2111   auto &C = CGM.getContext();
2112
2113   // Get list of devices we care about
2114   auto &Devices = CGM.getLangOpts().OMPTargetTriples;
2115
2116   // We should be creating an offloading descriptor only if there are devices
2117   // specified.
2118   assert(!Devices.empty() && "No OpenMP offloading devices??");
2119
2120   // Create the external variables that will point to the begin and end of the
2121   // host entries section. These will be defined by the linker.
2122   auto *OffloadEntryTy =
2123       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
2124   llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
2125       M, OffloadEntryTy, /*isConstant=*/true,
2126       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
2127       ".omp_offloading.entries_begin");
2128   llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
2129       M, OffloadEntryTy, /*isConstant=*/true,
2130       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/0,
2131       ".omp_offloading.entries_end");
2132
2133   // Create all device images
2134   llvm::SmallVector<llvm::Constant *, 4> DeviceImagesEntires;
2135   auto *DeviceImageTy = cast<llvm::StructType>(
2136       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
2137
2138   for (unsigned i = 0; i < Devices.size(); ++i) {
2139     StringRef T = Devices[i].getTriple();
2140     auto *ImgBegin = new llvm::GlobalVariable(
2141         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2142         /*Initializer=*/0, Twine(".omp_offloading.img_start.") + Twine(T));
2143     auto *ImgEnd = new llvm::GlobalVariable(
2144         M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
2145         /*Initializer=*/0, Twine(".omp_offloading.img_end.") + Twine(T));
2146
2147     llvm::Constant *Dev =
2148         llvm::ConstantStruct::get(DeviceImageTy, ImgBegin, ImgEnd,
2149                                   HostEntriesBegin, HostEntriesEnd, nullptr);
2150     DeviceImagesEntires.push_back(Dev);
2151   }
2152
2153   // Create device images global array.
2154   llvm::ArrayType *DeviceImagesInitTy =
2155       llvm::ArrayType::get(DeviceImageTy, DeviceImagesEntires.size());
2156   llvm::Constant *DeviceImagesInit =
2157       llvm::ConstantArray::get(DeviceImagesInitTy, DeviceImagesEntires);
2158
2159   llvm::GlobalVariable *DeviceImages = new llvm::GlobalVariable(
2160       M, DeviceImagesInitTy, /*isConstant=*/true,
2161       llvm::GlobalValue::InternalLinkage, DeviceImagesInit,
2162       ".omp_offloading.device_images");
2163   DeviceImages->setUnnamedAddr(true);
2164
2165   // This is a Zero array to be used in the creation of the constant expressions
2166   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
2167                              llvm::Constant::getNullValue(CGM.Int32Ty)};
2168
2169   // Create the target region descriptor.
2170   auto *BinaryDescriptorTy = cast<llvm::StructType>(
2171       CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
2172   llvm::Constant *TargetRegionsDescriptorInit = llvm::ConstantStruct::get(
2173       BinaryDescriptorTy, llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
2174       llvm::ConstantExpr::getGetElementPtr(DeviceImagesInitTy, DeviceImages,
2175                                            Index),
2176       HostEntriesBegin, HostEntriesEnd, nullptr);
2177
2178   auto *Desc = new llvm::GlobalVariable(
2179       M, BinaryDescriptorTy, /*isConstant=*/true,
2180       llvm::GlobalValue::InternalLinkage, TargetRegionsDescriptorInit,
2181       ".omp_offloading.descriptor");
2182
2183   // Emit code to register or unregister the descriptor at execution
2184   // startup or closing, respectively.
2185
2186   // Create a variable to drive the registration and unregistration of the
2187   // descriptor, so we can reuse the logic that emits Ctors and Dtors.
2188   auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
2189   ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
2190                                 IdentInfo, C.CharTy);
2191
2192   auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
2193       CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) {
2194         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
2195                              Desc);
2196       });
2197   auto *RegFn = createOffloadingBinaryDescriptorFunction(
2198       CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) {
2199         CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib),
2200                              Desc);
2201         CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
2202       });
2203   return RegFn;
2204 }
2205
2206 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *Addr, StringRef Name,
2207                                          uint64_t Size) {
2208   auto *TgtOffloadEntryType = cast<llvm::StructType>(
2209       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
2210   llvm::LLVMContext &C = CGM.getModule().getContext();
2211   llvm::Module &M = CGM.getModule();
2212
2213   // Make sure the address has the right type.
2214   llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(Addr, CGM.VoidPtrTy);
2215
2216   // Create constant string with the name.
2217   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
2218
2219   llvm::GlobalVariable *Str =
2220       new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
2221                                llvm::GlobalValue::InternalLinkage, StrPtrInit,
2222                                ".omp_offloading.entry_name");
2223   Str->setUnnamedAddr(true);
2224   llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
2225
2226   // Create the entry struct.
2227   llvm::Constant *EntryInit = llvm::ConstantStruct::get(
2228       TgtOffloadEntryType, AddrPtr, StrPtr,
2229       llvm::ConstantInt::get(CGM.SizeTy, Size), nullptr);
2230   llvm::GlobalVariable *Entry = new llvm::GlobalVariable(
2231       M, TgtOffloadEntryType, true, llvm::GlobalValue::ExternalLinkage,
2232       EntryInit, ".omp_offloading.entry");
2233
2234   // The entry has to be created in the section the linker expects it to be.
2235   Entry->setSection(".omp_offloading.entries");
2236   // We can't have any padding between symbols, so we need to have 1-byte
2237   // alignment.
2238   Entry->setAlignment(1);
2239   return;
2240 }
2241
2242 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2243   // Emit the offloading entries and metadata so that the device codegen side
2244   // can
2245   // easily figure out what to emit. The produced metadata looks like this:
2246   //
2247   // !omp_offload.info = !{!1, ...}
2248   //
2249   // Right now we only generate metadata for function that contain target
2250   // regions.
2251
2252   // If we do not have entries, we dont need to do anything.
2253   if (OffloadEntriesInfoManager.empty())
2254     return;
2255
2256   llvm::Module &M = CGM.getModule();
2257   llvm::LLVMContext &C = M.getContext();
2258   SmallVector<OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
2259       OrderedEntries(OffloadEntriesInfoManager.size());
2260
2261   // Create the offloading info metadata node.
2262   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
2263
2264   // Auxiliar methods to create metadata values and strings.
2265   auto getMDInt = [&](unsigned v) {
2266     return llvm::ConstantAsMetadata::get(
2267         llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
2268   };
2269
2270   auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
2271
2272   // Create function that emits metadata for each target region entry;
2273   auto &&TargetRegionMetadataEmitter = [&](
2274       unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
2275       unsigned Column,
2276       OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
2277     llvm::SmallVector<llvm::Metadata *, 32> Ops;
2278     // Generate metadata for target regions. Each entry of this metadata
2279     // contains:
2280     // - Entry 0 -> Kind of this type of metadata (0).
2281     // - Entry 1 -> Device ID of the file where the entry was identified.
2282     // - Entry 2 -> File ID of the file where the entry was identified.
2283     // - Entry 3 -> Mangled name of the function where the entry was identified.
2284     // - Entry 4 -> Line in the file where the entry was identified.
2285     // - Entry 5 -> Column in the file where the entry was identified.
2286     // - Entry 6 -> Order the entry was created.
2287     // The first element of the metadata node is the kind.
2288     Ops.push_back(getMDInt(E.getKind()));
2289     Ops.push_back(getMDInt(DeviceID));
2290     Ops.push_back(getMDInt(FileID));
2291     Ops.push_back(getMDString(ParentName));
2292     Ops.push_back(getMDInt(Line));
2293     Ops.push_back(getMDInt(Column));
2294     Ops.push_back(getMDInt(E.getOrder()));
2295
2296     // Save this entry in the right position of the ordered entries array.
2297     OrderedEntries[E.getOrder()] = &E;
2298
2299     // Add metadata to the named metadata node.
2300     MD->addOperand(llvm::MDNode::get(C, Ops));
2301   };
2302
2303   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
2304       TargetRegionMetadataEmitter);
2305
2306   for (auto *E : OrderedEntries) {
2307     assert(E && "All ordered entries must exist!");
2308     if (auto *CE =
2309             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
2310                 E)) {
2311       assert(CE->getID() && CE->getAddress() &&
2312              "Entry ID and Addr are invalid!");
2313       createOffloadEntry(CE->getID(), CE->getAddress()->getName(), /*Size=*/0);
2314     } else
2315       llvm_unreachable("Unsupported entry kind.");
2316   }
2317 }
2318
2319 /// \brief Loads all the offload entries information from the host IR
2320 /// metadata.
2321 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
2322   // If we are in target mode, load the metadata from the host IR. This code has
2323   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2324
2325   if (!CGM.getLangOpts().OpenMPIsDevice)
2326     return;
2327
2328   if (CGM.getLangOpts().OMPHostIRFile.empty())
2329     return;
2330
2331   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
2332   if (Buf.getError())
2333     return;
2334
2335   llvm::LLVMContext C;
2336   auto ME = llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C);
2337
2338   if (ME.getError())
2339     return;
2340
2341   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
2342   if (!MD)
2343     return;
2344
2345   for (auto I : MD->operands()) {
2346     llvm::MDNode *MN = cast<llvm::MDNode>(I);
2347
2348     auto getMDInt = [&](unsigned Idx) {
2349       llvm::ConstantAsMetadata *V =
2350           cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
2351       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
2352     };
2353
2354     auto getMDString = [&](unsigned Idx) {
2355       llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
2356       return V->getString();
2357     };
2358
2359     switch (getMDInt(0)) {
2360     default:
2361       llvm_unreachable("Unexpected metadata!");
2362       break;
2363     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
2364         OFFLOAD_ENTRY_INFO_TARGET_REGION:
2365       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
2366           /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
2367           /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
2368           /*Column=*/getMDInt(5), /*Order=*/getMDInt(6));
2369       break;
2370     }
2371   }
2372 }
2373
2374 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2375   if (!KmpRoutineEntryPtrTy) {
2376     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2377     auto &C = CGM.getContext();
2378     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2379     FunctionProtoType::ExtProtoInfo EPI;
2380     KmpRoutineEntryPtrQTy = C.getPointerType(
2381         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2382     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2383   }
2384 }
2385
2386 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
2387                                        QualType FieldTy) {
2388   auto *Field = FieldDecl::Create(
2389       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
2390       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
2391       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
2392   Field->setAccess(AS_public);
2393   DC->addDecl(Field);
2394   return Field;
2395 }
2396
2397 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
2398
2399   // Make sure the type of the entry is already created. This is the type we
2400   // have to create:
2401   // struct __tgt_offload_entry{
2402   //   void      *addr;       // Pointer to the offload entry info.
2403   //                          // (function or global)
2404   //   char      *name;       // Name of the function or global.
2405   //   size_t     size;       // Size of the entry info (0 if it a function).
2406   // };
2407   if (TgtOffloadEntryQTy.isNull()) {
2408     ASTContext &C = CGM.getContext();
2409     auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
2410     RD->startDefinition();
2411     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2412     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
2413     addFieldToRecordDecl(C, RD, C.getSizeType());
2414     RD->completeDefinition();
2415     TgtOffloadEntryQTy = C.getRecordType(RD);
2416   }
2417   return TgtOffloadEntryQTy;
2418 }
2419
2420 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
2421   // These are the types we need to build:
2422   // struct __tgt_device_image{
2423   // void   *ImageStart;       // Pointer to the target code start.
2424   // void   *ImageEnd;         // Pointer to the target code end.
2425   // // We also add the host entries to the device image, as it may be useful
2426   // // for the target runtime to have access to that information.
2427   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
2428   //                                       // the entries.
2429   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2430   //                                       // entries (non inclusive).
2431   // };
2432   if (TgtDeviceImageQTy.isNull()) {
2433     ASTContext &C = CGM.getContext();
2434     auto *RD = C.buildImplicitRecord("__tgt_device_image");
2435     RD->startDefinition();
2436     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2437     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2438     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2439     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2440     RD->completeDefinition();
2441     TgtDeviceImageQTy = C.getRecordType(RD);
2442   }
2443   return TgtDeviceImageQTy;
2444 }
2445
2446 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
2447   // struct __tgt_bin_desc{
2448   //   int32_t              NumDevices;      // Number of devices supported.
2449   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
2450   //                                         // (one per device).
2451   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
2452   //                                         // entries.
2453   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
2454   //                                         // entries (non inclusive).
2455   // };
2456   if (TgtBinaryDescriptorQTy.isNull()) {
2457     ASTContext &C = CGM.getContext();
2458     auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
2459     RD->startDefinition();
2460     addFieldToRecordDecl(
2461         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
2462     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
2463     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2464     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
2465     RD->completeDefinition();
2466     TgtBinaryDescriptorQTy = C.getRecordType(RD);
2467   }
2468   return TgtBinaryDescriptorQTy;
2469 }
2470
2471 namespace {
2472 struct PrivateHelpersTy {
2473   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
2474                    const VarDecl *PrivateElemInit)
2475       : Original(Original), PrivateCopy(PrivateCopy),
2476         PrivateElemInit(PrivateElemInit) {}
2477   const VarDecl *Original;
2478   const VarDecl *PrivateCopy;
2479   const VarDecl *PrivateElemInit;
2480 };
2481 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2482 } // anonymous namespace
2483
2484 static RecordDecl *
2485 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2486   if (!Privates.empty()) {
2487     auto &C = CGM.getContext();
2488     // Build struct .kmp_privates_t. {
2489     //         /*  private vars  */
2490     //       };
2491     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
2492     RD->startDefinition();
2493     for (auto &&Pair : Privates) {
2494       auto *VD = Pair.second.Original;
2495       auto Type = VD->getType();
2496       Type = Type.getNonReferenceType();
2497       auto *FD = addFieldToRecordDecl(C, RD, Type);
2498       if (VD->hasAttrs()) {
2499         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2500              E(VD->getAttrs().end());
2501              I != E; ++I)
2502           FD->addAttr(*I);
2503       }
2504     }
2505     RD->completeDefinition();
2506     return RD;
2507   }
2508   return nullptr;
2509 }
2510
2511 static RecordDecl *
2512 createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
2513                          QualType KmpRoutineEntryPointerQTy) {
2514   auto &C = CGM.getContext();
2515   // Build struct kmp_task_t {
2516   //         void *              shareds;
2517   //         kmp_routine_entry_t routine;
2518   //         kmp_int32           part_id;
2519   //         kmp_routine_entry_t destructors;
2520   //       };
2521   auto *RD = C.buildImplicitRecord("kmp_task_t");
2522   RD->startDefinition();
2523   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2524   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2525   addFieldToRecordDecl(C, RD, KmpInt32Ty);
2526   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2527   RD->completeDefinition();
2528   return RD;
2529 }
2530
2531 static RecordDecl *
2532 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2533                                      ArrayRef<PrivateDataTy> Privates) {
2534   auto &C = CGM.getContext();
2535   // Build struct kmp_task_t_with_privates {
2536   //         kmp_task_t task_data;
2537   //         .kmp_privates_t. privates;
2538   //       };
2539   auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2540   RD->startDefinition();
2541   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2542   if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
2543     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2544   }
2545   RD->completeDefinition();
2546   return RD;
2547 }
2548
2549 /// \brief Emit a proxy function which accepts kmp_task_t as the second
2550 /// argument.
2551 /// \code
2552 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2553 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
2554 ///   tt->shareds);
2555 ///   return 0;
2556 /// }
2557 /// \endcode
2558 static llvm::Value *
2559 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2560                       QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy,
2561                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2562                       QualType SharedsPtrTy, llvm::Value *TaskFunction,
2563                       llvm::Value *TaskPrivatesMap) {
2564   auto &C = CGM.getContext();
2565   FunctionArgList Args;
2566   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2567   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2568                                 /*Id=*/nullptr,
2569                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2570   Args.push_back(&GtidArg);
2571   Args.push_back(&TaskTypeArg);
2572   FunctionType::ExtInfo Info;
2573   auto &TaskEntryFnInfo =
2574       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2575                                                     /*isVariadic=*/false);
2576   auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
2577   auto *TaskEntry =
2578       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
2579                              ".omp_task_entry.", &CGM.getModule());
2580   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
2581   CodeGenFunction CGF(CGM);
2582   CGF.disableDebugInfo();
2583   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
2584
2585   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2586   // tt->task_data.shareds);
2587   auto *GtidParam = CGF.EmitLoadOfScalar(
2588       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
2589   LValue TDBase = emitLoadOfPointerLValue(
2590       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2591   auto *KmpTaskTWithPrivatesQTyRD =
2592       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2593   LValue Base =
2594       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
2595   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2596   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
2597   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
2598   auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
2599
2600   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
2601   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
2602   auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2603       CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
2604       CGF.ConvertTypeForMem(SharedsPtrTy));
2605
2606   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
2607   llvm::Value *PrivatesParam;
2608   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
2609     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
2610     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2611         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
2612   } else {
2613     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2614   }
2615
2616   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
2617                              TaskPrivatesMap, SharedsParam};
2618   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
2619   CGF.EmitStoreThroughLValue(
2620       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
2621       CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
2622   CGF.FinishFunction();
2623   return TaskEntry;
2624 }
2625
2626 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
2627                                             SourceLocation Loc,
2628                                             QualType KmpInt32Ty,
2629                                             QualType KmpTaskTWithPrivatesPtrQTy,
2630                                             QualType KmpTaskTWithPrivatesQTy) {
2631   auto &C = CGM.getContext();
2632   FunctionArgList Args;
2633   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
2634   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
2635                                 /*Id=*/nullptr,
2636                                 KmpTaskTWithPrivatesPtrQTy.withRestrict());
2637   Args.push_back(&GtidArg);
2638   Args.push_back(&TaskTypeArg);
2639   FunctionType::ExtInfo Info;
2640   auto &DestructorFnInfo =
2641       CGM.getTypes().arrangeFreeFunctionDeclaration(KmpInt32Ty, Args, Info,
2642                                                     /*isVariadic=*/false);
2643   auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
2644   auto *DestructorFn =
2645       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
2646                              ".omp_task_destructor.", &CGM.getModule());
2647   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
2648                                     DestructorFnInfo);
2649   CodeGenFunction CGF(CGM);
2650   CGF.disableDebugInfo();
2651   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
2652                     Args);
2653
2654   LValue Base = emitLoadOfPointerLValue(
2655       CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
2656   auto *KmpTaskTWithPrivatesQTyRD =
2657       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
2658   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2659   Base = CGF.EmitLValueForField(Base, *FI);
2660   for (auto *Field :
2661        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
2662     if (auto DtorKind = Field->getType().isDestructedType()) {
2663       auto FieldLValue = CGF.EmitLValueForField(Base, Field);
2664       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
2665     }
2666   }
2667   CGF.FinishFunction();
2668   return DestructorFn;
2669 }
2670
2671 /// \brief Emit a privates mapping function for correct handling of private and
2672 /// firstprivate variables.
2673 /// \code
2674 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
2675 /// **noalias priv1,...,  <tyn> **noalias privn) {
2676 ///   *priv1 = &.privates.priv1;
2677 ///   ...;
2678 ///   *privn = &.privates.privn;
2679 /// }
2680 /// \endcode
2681 static llvm::Value *
2682 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
2683                                ArrayRef<const Expr *> PrivateVars,
2684                                ArrayRef<const Expr *> FirstprivateVars,
2685                                QualType PrivatesQTy,
2686                                ArrayRef<PrivateDataTy> Privates) {
2687   auto &C = CGM.getContext();
2688   FunctionArgList Args;
2689   ImplicitParamDecl TaskPrivatesArg(
2690       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2691       C.getPointerType(PrivatesQTy).withConst().withRestrict());
2692   Args.push_back(&TaskPrivatesArg);
2693   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
2694   unsigned Counter = 1;
2695   for (auto *E: PrivateVars) {
2696     Args.push_back(ImplicitParamDecl::Create(
2697         C, /*DC=*/nullptr, Loc,
2698         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2699                             .withConst()
2700                             .withRestrict()));
2701     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2702     PrivateVarsPos[VD] = Counter;
2703     ++Counter;
2704   }
2705   for (auto *E : FirstprivateVars) {
2706     Args.push_back(ImplicitParamDecl::Create(
2707         C, /*DC=*/nullptr, Loc,
2708         /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
2709                             .withConst()
2710                             .withRestrict()));
2711     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2712     PrivateVarsPos[VD] = Counter;
2713     ++Counter;
2714   }
2715   FunctionType::ExtInfo Info;
2716   auto &TaskPrivatesMapFnInfo =
2717       CGM.getTypes().arrangeFreeFunctionDeclaration(C.VoidTy, Args, Info,
2718                                                     /*isVariadic=*/false);
2719   auto *TaskPrivatesMapTy =
2720       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
2721   auto *TaskPrivatesMap = llvm::Function::Create(
2722       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
2723       ".omp_task_privates_map.", &CGM.getModule());
2724   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
2725                                     TaskPrivatesMapFnInfo);
2726   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
2727   CodeGenFunction CGF(CGM);
2728   CGF.disableDebugInfo();
2729   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
2730                     TaskPrivatesMapFnInfo, Args);
2731
2732   // *privi = &.privates.privi;
2733   LValue Base = emitLoadOfPointerLValue(
2734       CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
2735   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
2736   Counter = 0;
2737   for (auto *Field : PrivatesQTyRD->fields()) {
2738     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
2739     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
2740     auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
2741     auto RefLoadLVal =
2742         emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
2743     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
2744     ++Counter;
2745   }
2746   CGF.FinishFunction();
2747   return TaskPrivatesMap;
2748 }
2749
2750 static int array_pod_sort_comparator(const PrivateDataTy *P1,
2751                                      const PrivateDataTy *P2) {
2752   return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
2753 }
2754
2755 void CGOpenMPRuntime::emitTaskCall(
2756     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
2757     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
2758     llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
2759     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
2760     ArrayRef<const Expr *> PrivateCopies,
2761     ArrayRef<const Expr *> FirstprivateVars,
2762     ArrayRef<const Expr *> FirstprivateCopies,
2763     ArrayRef<const Expr *> FirstprivateInits,
2764     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
2765   if (!CGF.HaveInsertPoint())
2766     return;
2767   auto &C = CGM.getContext();
2768   llvm::SmallVector<PrivateDataTy, 8> Privates;
2769   // Aggregate privates and sort them by the alignment.
2770   auto I = PrivateCopies.begin();
2771   for (auto *E : PrivateVars) {
2772     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2773     Privates.push_back(std::make_pair(
2774         C.getDeclAlign(VD),
2775         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2776                          /*PrivateElemInit=*/nullptr)));
2777     ++I;
2778   }
2779   I = FirstprivateCopies.begin();
2780   auto IElemInitRef = FirstprivateInits.begin();
2781   for (auto *E : FirstprivateVars) {
2782     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2783     Privates.push_back(std::make_pair(
2784         C.getDeclAlign(VD),
2785         PrivateHelpersTy(
2786             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
2787             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
2788     ++I, ++IElemInitRef;
2789   }
2790   llvm::array_pod_sort(Privates.begin(), Privates.end(),
2791                        array_pod_sort_comparator);
2792   auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2793   // Build type kmp_routine_entry_t (if not built yet).
2794   emitKmpRoutineEntryT(KmpInt32Ty);
2795   // Build type kmp_task_t (if not built yet).
2796   if (KmpTaskTQTy.isNull()) {
2797     KmpTaskTQTy = C.getRecordType(
2798         createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy));
2799   }
2800   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
2801   // Build particular struct kmp_task_t for the given task.
2802   auto *KmpTaskTWithPrivatesQTyRD =
2803       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
2804   auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
2805   QualType KmpTaskTWithPrivatesPtrQTy =
2806       C.getPointerType(KmpTaskTWithPrivatesQTy);
2807   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
2808   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
2809   auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
2810   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
2811
2812   // Emit initial values for private copies (if any).
2813   llvm::Value *TaskPrivatesMap = nullptr;
2814   auto *TaskPrivatesMapTy =
2815       std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
2816                 3)
2817           ->getType();
2818   if (!Privates.empty()) {
2819     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2820     TaskPrivatesMap = emitTaskPrivateMappingFunction(
2821         CGM, Loc, PrivateVars, FirstprivateVars, FI->getType(), Privates);
2822     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2823         TaskPrivatesMap, TaskPrivatesMapTy);
2824   } else {
2825     TaskPrivatesMap = llvm::ConstantPointerNull::get(
2826         cast<llvm::PointerType>(TaskPrivatesMapTy));
2827   }
2828   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
2829   // kmp_task_t *tt);
2830   auto *TaskEntry = emitProxyTaskFunction(
2831       CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy,
2832       KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap);
2833
2834   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2835   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2836   // kmp_routine_entry_t *task_entry);
2837   // Task flags. Format is taken from
2838   // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
2839   // description of kmp_tasking_flags struct.
2840   const unsigned TiedFlag = 0x1;
2841   const unsigned FinalFlag = 0x2;
2842   unsigned Flags = Tied ? TiedFlag : 0;
2843   auto *TaskFlags =
2844       Final.getPointer()
2845           ? CGF.Builder.CreateSelect(Final.getPointer(),
2846                                      CGF.Builder.getInt32(FinalFlag),
2847                                      CGF.Builder.getInt32(/*C=*/0))
2848           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
2849   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
2850   auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
2851   llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
2852                               getThreadID(CGF, Loc), TaskFlags,
2853                               KmpTaskTWithPrivatesTySize, SharedsSize,
2854                               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2855                                   TaskEntry, KmpRoutineEntryPtrTy)};
2856   auto *NewTask = CGF.EmitRuntimeCall(
2857       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
2858   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2859       NewTask, KmpTaskTWithPrivatesPtrTy);
2860   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
2861                                                KmpTaskTWithPrivatesQTy);
2862   LValue TDBase =
2863       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
2864   // Fill the data in the resulting kmp_task_t record.
2865   // Copy shareds if there are any.
2866   Address KmpTaskSharedsPtr = Address::invalid();
2867   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
2868     KmpTaskSharedsPtr =
2869         Address(CGF.EmitLoadOfScalar(
2870                     CGF.EmitLValueForField(
2871                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
2872                                            KmpTaskTShareds)),
2873                     Loc),
2874                 CGF.getNaturalTypeAlignment(SharedsTy));
2875     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
2876   }
2877   // Emit initial values for private copies (if any).
2878   bool NeedsCleanup = false;
2879   if (!Privates.empty()) {
2880     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
2881     auto PrivatesBase = CGF.EmitLValueForField(Base, *FI);
2882     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
2883     LValue SharedsBase;
2884     if (!FirstprivateVars.empty()) {
2885       SharedsBase = CGF.MakeAddrLValue(
2886           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2887               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
2888           SharedsTy);
2889     }
2890     CodeGenFunction::CGCapturedStmtInfo CapturesInfo(
2891         cast<CapturedStmt>(*D.getAssociatedStmt()));
2892     for (auto &&Pair : Privates) {
2893       auto *VD = Pair.second.PrivateCopy;
2894       auto *Init = VD->getAnyInitializer();
2895       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
2896       if (Init) {
2897         if (auto *Elem = Pair.second.PrivateElemInit) {
2898           auto *OriginalVD = Pair.second.Original;
2899           auto *SharedField = CapturesInfo.lookup(OriginalVD);
2900           auto SharedRefLValue =
2901               CGF.EmitLValueForField(SharedsBase, SharedField);
2902           SharedRefLValue = CGF.MakeAddrLValue(
2903               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
2904               SharedRefLValue.getType(), AlignmentSource::Decl);
2905           QualType Type = OriginalVD->getType();
2906           if (Type->isArrayType()) {
2907             // Initialize firstprivate array.
2908             if (!isa<CXXConstructExpr>(Init) ||
2909                 CGF.isTrivialInitializer(Init)) {
2910               // Perform simple memcpy.
2911               CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
2912                                       SharedRefLValue.getAddress(), Type);
2913             } else {
2914               // Initialize firstprivate array using element-by-element
2915               // intialization.
2916               CGF.EmitOMPAggregateAssign(
2917                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
2918                   Type, [&CGF, Elem, Init, &CapturesInfo](
2919                             Address DestElement, Address SrcElement) {
2920                     // Clean up any temporaries needed by the initialization.
2921                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
2922                     InitScope.addPrivate(Elem, [SrcElement]() -> Address {
2923                       return SrcElement;
2924                     });
2925                     (void)InitScope.Privatize();
2926                     // Emit initialization for single element.
2927                     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
2928                         CGF, &CapturesInfo);
2929                     CGF.EmitAnyExprToMem(Init, DestElement,
2930                                          Init->getType().getQualifiers(),
2931                                          /*IsInitializer=*/false);
2932                   });
2933             }
2934           } else {
2935             CodeGenFunction::OMPPrivateScope InitScope(CGF);
2936             InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
2937               return SharedRefLValue.getAddress();
2938             });
2939             (void)InitScope.Privatize();
2940             CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
2941             CGF.EmitExprAsInit(Init, VD, PrivateLValue,
2942                                /*capturedByInit=*/false);
2943           }
2944         } else {
2945           CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
2946         }
2947       }
2948       NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType();
2949       ++FI;
2950     }
2951   }
2952   // Provide pointer to function with destructors for privates.
2953   llvm::Value *DestructorFn =
2954       NeedsCleanup ? emitDestructorsFunction(CGM, Loc, KmpInt32Ty,
2955                                              KmpTaskTWithPrivatesPtrQTy,
2956                                              KmpTaskTWithPrivatesQTy)
2957                    : llvm::ConstantPointerNull::get(
2958                          cast<llvm::PointerType>(KmpRoutineEntryPtrTy));
2959   LValue Destructor = CGF.EmitLValueForField(
2960       TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTDestructors));
2961   CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2962                             DestructorFn, KmpRoutineEntryPtrTy),
2963                         Destructor);
2964
2965   // Process list of dependences.
2966   Address DependenciesArray = Address::invalid();
2967   unsigned NumDependencies = Dependences.size();
2968   if (NumDependencies) {
2969     // Dependence kind for RTL.
2970     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
2971     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
2972     RecordDecl *KmpDependInfoRD;
2973     QualType FlagsTy =
2974         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
2975     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
2976     if (KmpDependInfoTy.isNull()) {
2977       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
2978       KmpDependInfoRD->startDefinition();
2979       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
2980       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
2981       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
2982       KmpDependInfoRD->completeDefinition();
2983       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
2984     } else {
2985       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
2986     }
2987     CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
2988     // Define type kmp_depend_info[<Dependences.size()>];
2989     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
2990         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
2991         ArrayType::Normal, /*IndexTypeQuals=*/0);
2992     // kmp_depend_info[<Dependences.size()>] deps;
2993     DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
2994     for (unsigned i = 0; i < NumDependencies; ++i) {
2995       const Expr *E = Dependences[i].second;
2996       auto Addr = CGF.EmitLValue(E);
2997       llvm::Value *Size;
2998       QualType Ty = E->getType();
2999       if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3000         LValue UpAddrLVal =
3001             CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
3002         llvm::Value *UpAddr =
3003             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
3004         llvm::Value *LowIntPtr =
3005             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
3006         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
3007         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3008       } else
3009         Size = getTypeSize(CGF, Ty);
3010       auto Base = CGF.MakeAddrLValue(
3011           CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
3012           KmpDependInfoTy);
3013       // deps[i].base_addr = &<Dependences[i].second>;
3014       auto BaseAddrLVal = CGF.EmitLValueForField(
3015           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
3016       CGF.EmitStoreOfScalar(
3017           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
3018           BaseAddrLVal);
3019       // deps[i].len = sizeof(<Dependences[i].second>);
3020       auto LenLVal = CGF.EmitLValueForField(
3021           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
3022       CGF.EmitStoreOfScalar(Size, LenLVal);
3023       // deps[i].flags = <Dependences[i].first>;
3024       RTLDependenceKindTy DepKind;
3025       switch (Dependences[i].first) {
3026       case OMPC_DEPEND_in:
3027         DepKind = DepIn;
3028         break;
3029       // Out and InOut dependencies must use the same code.
3030       case OMPC_DEPEND_out:
3031       case OMPC_DEPEND_inout:
3032         DepKind = DepInOut;
3033         break;
3034       case OMPC_DEPEND_source:
3035       case OMPC_DEPEND_sink:
3036       case OMPC_DEPEND_unknown:
3037         llvm_unreachable("Unknown task dependence type");
3038       }
3039       auto FlagsLVal = CGF.EmitLValueForField(
3040           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
3041       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
3042                             FlagsLVal);
3043     }
3044     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3045         CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
3046         CGF.VoidPtrTy);
3047   }
3048
3049   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
3050   // libcall.
3051   // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
3052   // *new_task);
3053   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
3054   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
3055   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
3056   // list is not empty
3057   auto *ThreadID = getThreadID(CGF, Loc);
3058   auto *UpLoc = emitUpdateLocation(CGF, Loc);
3059   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
3060   llvm::Value *DepTaskArgs[7];
3061   if (NumDependencies) {
3062     DepTaskArgs[0] = UpLoc;
3063     DepTaskArgs[1] = ThreadID;
3064     DepTaskArgs[2] = NewTask;
3065     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
3066     DepTaskArgs[4] = DependenciesArray.getPointer();
3067     DepTaskArgs[5] = CGF.Builder.getInt32(0);
3068     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3069   }
3070   auto &&ThenCodeGen = [this, NumDependencies,
3071                         &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
3072     // TODO: add check for untied tasks.    
3073     if (NumDependencies) {
3074       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
3075                           DepTaskArgs);
3076     } else {
3077       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
3078                           TaskArgs);
3079     }
3080   };
3081   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
3082       IfCallEndCleanup;
3083
3084   llvm::Value *DepWaitTaskArgs[6];
3085   if (NumDependencies) {
3086     DepWaitTaskArgs[0] = UpLoc;
3087     DepWaitTaskArgs[1] = ThreadID;
3088     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
3089     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
3090     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
3091     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3092   }
3093   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
3094                         NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
3095     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
3096     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
3097     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
3098     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
3099     // is specified.
3100     if (NumDependencies)
3101       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
3102                           DepWaitTaskArgs);
3103     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
3104     // kmp_task_t *new_task);
3105     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0),
3106                         TaskArgs);
3107     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
3108     // kmp_task_t *new_task);
3109     CGF.EHStack.pushCleanup<IfCallEndCleanup>(
3110         NormalAndEHCleanup,
3111         createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0),
3112         llvm::makeArrayRef(TaskArgs));
3113
3114     // Call proxy_task_entry(gtid, new_task);
3115     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
3116     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
3117   };
3118
3119   if (IfCond) {
3120     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
3121   } else {
3122     CodeGenFunction::RunCleanupsScope Scope(CGF);
3123     ThenCodeGen(CGF);
3124   }
3125 }
3126
3127 /// \brief Emit reduction operation for each element of array (required for
3128 /// array sections) LHS op = RHS.
3129 /// \param Type Type of array.
3130 /// \param LHSVar Variable on the left side of the reduction operation
3131 /// (references element of array in original variable).
3132 /// \param RHSVar Variable on the right side of the reduction operation
3133 /// (references element of array in original variable).
3134 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
3135 /// RHSVar.
3136 static void EmitOMPAggregateReduction(
3137     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
3138     const VarDecl *RHSVar,
3139     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
3140                                   const Expr *, const Expr *)> &RedOpGen,
3141     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
3142     const Expr *UpExpr = nullptr) {
3143   // Perform element-by-element initialization.
3144   QualType ElementTy;
3145   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
3146   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
3147
3148   // Drill down to the base element type on both arrays.
3149   auto ArrayTy = Type->getAsArrayTypeUnsafe();
3150   auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
3151
3152   auto RHSBegin = RHSAddr.getPointer();
3153   auto LHSBegin = LHSAddr.getPointer();
3154   // Cast from pointer to array type to pointer to single element.
3155   auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
3156   // The basic structure here is a while-do loop.
3157   auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
3158   auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
3159   auto IsEmpty =
3160       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
3161   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
3162
3163   // Enter the loop body, making that address the current address.
3164   auto EntryBB = CGF.Builder.GetInsertBlock();
3165   CGF.EmitBlock(BodyBB);
3166
3167   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
3168
3169   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
3170       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
3171   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
3172   Address RHSElementCurrent =
3173       Address(RHSElementPHI,
3174               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3175
3176   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
3177       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
3178   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
3179   Address LHSElementCurrent =
3180       Address(LHSElementPHI,
3181               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
3182
3183   // Emit copy.
3184   CodeGenFunction::OMPPrivateScope Scope(CGF);
3185   Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
3186   Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
3187   Scope.Privatize();
3188   RedOpGen(CGF, XExpr, EExpr, UpExpr);
3189   Scope.ForceCleanup();
3190
3191   // Shift the address forward by one element.
3192   auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
3193       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
3194   auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
3195       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
3196   // Check whether we've reached the end.
3197   auto Done =
3198       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
3199   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
3200   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
3201   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
3202
3203   // Done.
3204   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
3205 }
3206
3207 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
3208                                           llvm::Type *ArgsType,
3209                                           ArrayRef<const Expr *> Privates,
3210                                           ArrayRef<const Expr *> LHSExprs,
3211                                           ArrayRef<const Expr *> RHSExprs,
3212                                           ArrayRef<const Expr *> ReductionOps) {
3213   auto &C = CGM.getContext();
3214
3215   // void reduction_func(void *LHSArg, void *RHSArg);
3216   FunctionArgList Args;
3217   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3218                            C.VoidPtrTy);
3219   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
3220                            C.VoidPtrTy);
3221   Args.push_back(&LHSArg);
3222   Args.push_back(&RHSArg);
3223   FunctionType::ExtInfo EI;
3224   auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
3225       C.VoidTy, Args, EI, /*isVariadic=*/false);
3226   auto *Fn = llvm::Function::Create(
3227       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
3228       ".omp.reduction.reduction_func", &CGM.getModule());
3229   CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
3230   CodeGenFunction CGF(CGM);
3231   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
3232
3233   // Dst = (void*[n])(LHSArg);
3234   // Src = (void*[n])(RHSArg);
3235   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3236       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3237       ArgsType), CGF.getPointerAlign());
3238   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3239       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3240       ArgsType), CGF.getPointerAlign());
3241
3242   //  ...
3243   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
3244   //  ...
3245   CodeGenFunction::OMPPrivateScope Scope(CGF);
3246   auto IPriv = Privates.begin();
3247   unsigned Idx = 0;
3248   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
3249     auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
3250     Scope.addPrivate(RHSVar, [&]() -> Address {
3251       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
3252     });
3253     auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
3254     Scope.addPrivate(LHSVar, [&]() -> Address {
3255       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
3256     });
3257     QualType PrivTy = (*IPriv)->getType();
3258     if (PrivTy->isArrayType()) {
3259       // Get array size and emit VLA type.
3260       ++Idx;
3261       Address Elem =
3262           CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
3263       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
3264       CodeGenFunction::OpaqueValueMapping OpaqueMap(
3265           CGF,
3266           cast<OpaqueValueExpr>(
3267               CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
3268           RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
3269       CGF.EmitVariablyModifiedType(PrivTy);
3270     }
3271   }
3272   Scope.Privatize();
3273   IPriv = Privates.begin();
3274   auto ILHS = LHSExprs.begin();
3275   auto IRHS = RHSExprs.begin();
3276   for (auto *E : ReductionOps) {
3277     if ((*IPriv)->getType()->isArrayType()) {
3278       // Emit reduction for array section.
3279       auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3280       auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3281       EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3282                                 [=](CodeGenFunction &CGF, const Expr *,
3283                                     const Expr *,
3284                                     const Expr *) { CGF.EmitIgnoredExpr(E); });
3285     } else
3286       // Emit reduction for array subscript or single variable.
3287       CGF.EmitIgnoredExpr(E);
3288     ++IPriv, ++ILHS, ++IRHS;
3289   }
3290   Scope.ForceCleanup();
3291   CGF.FinishFunction();
3292   return Fn;
3293 }
3294
3295 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
3296                                     ArrayRef<const Expr *> Privates,
3297                                     ArrayRef<const Expr *> LHSExprs,
3298                                     ArrayRef<const Expr *> RHSExprs,
3299                                     ArrayRef<const Expr *> ReductionOps,
3300                                     bool WithNowait, bool SimpleReduction) {
3301   if (!CGF.HaveInsertPoint())
3302     return;
3303   // Next code should be emitted for reduction:
3304   //
3305   // static kmp_critical_name lock = { 0 };
3306   //
3307   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
3308   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
3309   //  ...
3310   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
3311   //  *(Type<n>-1*)rhs[<n>-1]);
3312   // }
3313   //
3314   // ...
3315   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
3316   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3317   // RedList, reduce_func, &<lock>)) {
3318   // case 1:
3319   //  ...
3320   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3321   //  ...
3322   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3323   // break;
3324   // case 2:
3325   //  ...
3326   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3327   //  ...
3328   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
3329   // break;
3330   // default:;
3331   // }
3332   //
3333   // if SimpleReduction is true, only the next code is generated:
3334   //  ...
3335   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3336   //  ...
3337
3338   auto &C = CGM.getContext();
3339
3340   if (SimpleReduction) {
3341     CodeGenFunction::RunCleanupsScope Scope(CGF);
3342     auto IPriv = Privates.begin();
3343     auto ILHS = LHSExprs.begin();
3344     auto IRHS = RHSExprs.begin();
3345     for (auto *E : ReductionOps) {
3346       if ((*IPriv)->getType()->isArrayType()) {
3347         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3348         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3349         EmitOMPAggregateReduction(
3350             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3351             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
3352                 const Expr *) { CGF.EmitIgnoredExpr(E); });
3353       } else
3354         CGF.EmitIgnoredExpr(E);
3355       ++IPriv, ++ILHS, ++IRHS;
3356     }
3357     return;
3358   }
3359
3360   // 1. Build a list of reduction variables.
3361   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
3362   auto Size = RHSExprs.size();
3363   for (auto *E : Privates) {
3364     if (E->getType()->isArrayType())
3365       // Reserve place for array size.
3366       ++Size;
3367   }
3368   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
3369   QualType ReductionArrayTy =
3370       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3371                              /*IndexTypeQuals=*/0);
3372   Address ReductionList =
3373       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
3374   auto IPriv = Privates.begin();
3375   unsigned Idx = 0;
3376   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
3377     Address Elem =
3378       CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
3379     CGF.Builder.CreateStore(
3380         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3381             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
3382         Elem);
3383     if ((*IPriv)->getType()->isArrayType()) {
3384       // Store array size.
3385       ++Idx;
3386       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
3387                                              CGF.getPointerSize());
3388       CGF.Builder.CreateStore(
3389           CGF.Builder.CreateIntToPtr(
3390               CGF.Builder.CreateIntCast(
3391                   CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
3392                                      (*IPriv)->getType()))
3393                       .first,
3394                   CGF.SizeTy, /*isSigned=*/false),
3395               CGF.VoidPtrTy),
3396           Elem);
3397     }
3398   }
3399
3400   // 2. Emit reduce_func().
3401   auto *ReductionFn = emitReductionFunction(
3402       CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
3403       LHSExprs, RHSExprs, ReductionOps);
3404
3405   // 3. Create static kmp_critical_name lock = { 0 };
3406   auto *Lock = getCriticalRegionLock(".reduction");
3407
3408   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
3409   // RedList, reduce_func, &<lock>);
3410   auto *IdentTLoc = emitUpdateLocation(
3411       CGF, Loc,
3412       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
3413   auto *ThreadId = getThreadID(CGF, Loc);
3414   auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
3415   auto *RL =
3416     CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
3417                                                     CGF.VoidPtrTy);
3418   llvm::Value *Args[] = {
3419       IdentTLoc,                             // ident_t *<loc>
3420       ThreadId,                              // i32 <gtid>
3421       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
3422       ReductionArrayTySize,                  // size_type sizeof(RedList)
3423       RL,                                    // void *RedList
3424       ReductionFn, // void (*) (void *, void *) <reduce_func>
3425       Lock         // kmp_critical_name *&<lock>
3426   };
3427   auto Res = CGF.EmitRuntimeCall(
3428       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
3429                                        : OMPRTL__kmpc_reduce),
3430       Args);
3431
3432   // 5. Build switch(res)
3433   auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
3434   auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
3435
3436   // 6. Build case 1:
3437   //  ...
3438   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
3439   //  ...
3440   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3441   // break;
3442   auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
3443   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
3444   CGF.EmitBlock(Case1BB);
3445
3446   {
3447     CodeGenFunction::RunCleanupsScope Scope(CGF);
3448     // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
3449     llvm::Value *EndArgs[] = {
3450         IdentTLoc, // ident_t *<loc>
3451         ThreadId,  // i32 <gtid>
3452         Lock       // kmp_critical_name *&<lock>
3453     };
3454     CGF.EHStack
3455         .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3456             NormalAndEHCleanup,
3457             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
3458                                              : OMPRTL__kmpc_end_reduce),
3459             llvm::makeArrayRef(EndArgs));
3460     auto IPriv = Privates.begin();
3461     auto ILHS = LHSExprs.begin();
3462     auto IRHS = RHSExprs.begin();
3463     for (auto *E : ReductionOps) {
3464       if ((*IPriv)->getType()->isArrayType()) {
3465         // Emit reduction for array section.
3466         auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3467         auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3468         EmitOMPAggregateReduction(
3469             CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3470             [=](CodeGenFunction &CGF, const Expr *, const Expr *,
3471                 const Expr *) { CGF.EmitIgnoredExpr(E); });
3472       } else
3473         // Emit reduction for array subscript or single variable.
3474         CGF.EmitIgnoredExpr(E);
3475       ++IPriv, ++ILHS, ++IRHS;
3476     }
3477   }
3478
3479   CGF.EmitBranch(DefaultBB);
3480
3481   // 7. Build case 2:
3482   //  ...
3483   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
3484   //  ...
3485   // break;
3486   auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
3487   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
3488   CGF.EmitBlock(Case2BB);
3489
3490   {
3491     CodeGenFunction::RunCleanupsScope Scope(CGF);
3492     if (!WithNowait) {
3493       // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
3494       llvm::Value *EndArgs[] = {
3495           IdentTLoc, // ident_t *<loc>
3496           ThreadId,  // i32 <gtid>
3497           Lock       // kmp_critical_name *&<lock>
3498       };
3499       CGF.EHStack
3500           .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>(
3501               NormalAndEHCleanup,
3502               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
3503               llvm::makeArrayRef(EndArgs));
3504     }
3505     auto ILHS = LHSExprs.begin();
3506     auto IRHS = RHSExprs.begin();
3507     auto IPriv = Privates.begin();
3508     for (auto *E : ReductionOps) {
3509         const Expr *XExpr = nullptr;
3510         const Expr *EExpr = nullptr;
3511         const Expr *UpExpr = nullptr;
3512         BinaryOperatorKind BO = BO_Comma;
3513         if (auto *BO = dyn_cast<BinaryOperator>(E)) {
3514           if (BO->getOpcode() == BO_Assign) {
3515             XExpr = BO->getLHS();
3516             UpExpr = BO->getRHS();
3517           }
3518         }
3519         // Try to emit update expression as a simple atomic.
3520         auto *RHSExpr = UpExpr;
3521         if (RHSExpr) {
3522           // Analyze RHS part of the whole expression.
3523           if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
3524                   RHSExpr->IgnoreParenImpCasts())) {
3525             // If this is a conditional operator, analyze its condition for
3526             // min/max reduction operator.
3527             RHSExpr = ACO->getCond();
3528           }
3529           if (auto *BORHS =
3530                   dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
3531             EExpr = BORHS->getRHS();
3532             BO = BORHS->getOpcode();
3533           }
3534         }
3535         if (XExpr) {
3536           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3537           auto &&AtomicRedGen = [this, BO, VD, IPriv,
3538                                  Loc](CodeGenFunction &CGF, const Expr *XExpr,
3539                                       const Expr *EExpr, const Expr *UpExpr) {
3540             LValue X = CGF.EmitLValue(XExpr);
3541             RValue E;
3542             if (EExpr)
3543               E = CGF.EmitAnyExpr(EExpr);
3544             CGF.EmitOMPAtomicSimpleUpdateExpr(
3545                 X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
3546                 [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
3547                   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3548                   PrivateScope.addPrivate(
3549                       VD, [&CGF, VD, XRValue, Loc]() -> Address {
3550                         Address LHSTemp = CGF.CreateMemTemp(VD->getType());
3551                         CGF.emitOMPSimpleStore(
3552                             CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
3553                             VD->getType().getNonReferenceType(), Loc);
3554                         return LHSTemp;
3555                       });
3556                   (void)PrivateScope.Privatize();
3557                   return CGF.EmitAnyExpr(UpExpr);
3558                 });
3559           };
3560           if ((*IPriv)->getType()->isArrayType()) {
3561             // Emit atomic reduction for array section.
3562             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3563             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
3564                                       AtomicRedGen, XExpr, EExpr, UpExpr);
3565           } else
3566             // Emit atomic reduction for array subscript or single variable.
3567             AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
3568         } else {
3569           // Emit as a critical region.
3570           auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
3571                                              const Expr *, const Expr *) {
3572             emitCriticalRegion(
3573                 CGF, ".atomic_reduction",
3574                 [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
3575           };
3576           if ((*IPriv)->getType()->isArrayType()) {
3577             auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3578             auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3579             EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
3580                                       CritRedGen);
3581           } else
3582             CritRedGen(CGF, nullptr, nullptr, nullptr);
3583         }
3584       ++ILHS, ++IRHS, ++IPriv;
3585     }
3586   }
3587
3588   CGF.EmitBranch(DefaultBB);
3589   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
3590 }
3591
3592 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
3593                                        SourceLocation Loc) {
3594   if (!CGF.HaveInsertPoint())
3595     return;
3596   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
3597   // global_tid);
3598   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3599   // Ignore return result until untied tasks are supported.
3600   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
3601 }
3602
3603 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
3604                                            OpenMPDirectiveKind InnerKind,
3605                                            const RegionCodeGenTy &CodeGen,
3606                                            bool HasCancel) {
3607   if (!CGF.HaveInsertPoint())
3608     return;
3609   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
3610   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
3611 }
3612
3613 namespace {
3614 enum RTCancelKind {
3615   CancelNoreq = 0,
3616   CancelParallel = 1,
3617   CancelLoop = 2,
3618   CancelSections = 3,
3619   CancelTaskgroup = 4
3620 };
3621 }
3622
3623 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
3624   RTCancelKind CancelKind = CancelNoreq;
3625   if (CancelRegion == OMPD_parallel)
3626     CancelKind = CancelParallel;
3627   else if (CancelRegion == OMPD_for)
3628     CancelKind = CancelLoop;
3629   else if (CancelRegion == OMPD_sections)
3630     CancelKind = CancelSections;
3631   else {
3632     assert(CancelRegion == OMPD_taskgroup);
3633     CancelKind = CancelTaskgroup;
3634   }
3635   return CancelKind;
3636 }
3637
3638 void CGOpenMPRuntime::emitCancellationPointCall(
3639     CodeGenFunction &CGF, SourceLocation Loc,
3640     OpenMPDirectiveKind CancelRegion) {
3641   if (!CGF.HaveInsertPoint())
3642     return;
3643   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
3644   // global_tid, kmp_int32 cncl_kind);
3645   if (auto *OMPRegionInfo =
3646           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3647     if (OMPRegionInfo->hasCancel()) {
3648       llvm::Value *Args[] = {
3649           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3650           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3651       // Ignore return result until untied tasks are supported.
3652       auto *Result = CGF.EmitRuntimeCall(
3653           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
3654       // if (__kmpc_cancellationpoint()) {
3655       //  __kmpc_cancel_barrier();
3656       //   exit from construct;
3657       // }
3658       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3659       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3660       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3661       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3662       CGF.EmitBlock(ExitBB);
3663       // __kmpc_cancel_barrier();
3664       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3665       // exit from construct;
3666       auto CancelDest =
3667           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3668       CGF.EmitBranchThroughCleanup(CancelDest);
3669       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3670     }
3671   }
3672 }
3673
3674 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
3675                                      const Expr *IfCond,
3676                                      OpenMPDirectiveKind CancelRegion) {
3677   if (!CGF.HaveInsertPoint())
3678     return;
3679   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
3680   // kmp_int32 cncl_kind);
3681   if (auto *OMPRegionInfo =
3682           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3683     auto &&ThenGen = [this, Loc, CancelRegion,
3684                       OMPRegionInfo](CodeGenFunction &CGF) {
3685       llvm::Value *Args[] = {
3686           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3687           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
3688       // Ignore return result until untied tasks are supported.
3689       auto *Result =
3690           CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
3691       // if (__kmpc_cancel()) {
3692       //  __kmpc_cancel_barrier();
3693       //   exit from construct;
3694       // }
3695       auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
3696       auto *ContBB = CGF.createBasicBlock(".cancel.continue");
3697       auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
3698       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3699       CGF.EmitBlock(ExitBB);
3700       // __kmpc_cancel_barrier();
3701       emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
3702       // exit from construct;
3703       auto CancelDest =
3704           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3705       CGF.EmitBranchThroughCleanup(CancelDest);
3706       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3707     };
3708     if (IfCond)
3709       emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
3710     else
3711       ThenGen(CGF);
3712   }
3713 }
3714
3715 /// \brief Obtain information that uniquely identifies a target entry. This
3716 /// consists of the file and device IDs as well as line and column numbers
3717 /// associated with the relevant entry source location.
3718 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
3719                                      unsigned &DeviceID, unsigned &FileID,
3720                                      unsigned &LineNum, unsigned &ColumnNum) {
3721
3722   auto &SM = C.getSourceManager();
3723
3724   // The loc should be always valid and have a file ID (the user cannot use
3725   // #pragma directives in macros)
3726
3727   assert(Loc.isValid() && "Source location is expected to be always valid.");
3728   assert(Loc.isFileID() && "Source location is expected to refer to a file.");
3729
3730   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
3731   assert(PLoc.isValid() && "Source location is expected to be always valid.");
3732
3733   llvm::sys::fs::UniqueID ID;
3734   if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
3735     llvm_unreachable("Source file with target region no longer exists!");
3736
3737   DeviceID = ID.getDevice();
3738   FileID = ID.getFile();
3739   LineNum = PLoc.getLine();
3740   ColumnNum = PLoc.getColumn();
3741   return;
3742 }
3743
3744 void CGOpenMPRuntime::emitTargetOutlinedFunction(
3745     const OMPExecutableDirective &D, StringRef ParentName,
3746     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
3747     bool IsOffloadEntry) {
3748
3749   assert(!ParentName.empty() && "Invalid target region parent name!");
3750
3751   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3752
3753   // Emit target region as a standalone region.
3754   auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
3755     CGF.EmitStmt(CS.getCapturedStmt());
3756   };
3757
3758   // Create a unique name for the proxy/entry function that using the source
3759   // location information of the current target region. The name will be
3760   // something like:
3761   //
3762   // .omp_offloading.DD_FFFF.PP.lBB.cCC
3763   //
3764   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
3765   // mangled name of the function that encloses the target region, BB is the
3766   // line number of the target region, and CC is the column number of the target
3767   // region.
3768
3769   unsigned DeviceID;
3770   unsigned FileID;
3771   unsigned Line;
3772   unsigned Column;
3773   getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
3774                            Line, Column);
3775   SmallString<64> EntryFnName;
3776   {
3777     llvm::raw_svector_ostream OS(EntryFnName);
3778     OS << ".omp_offloading" << llvm::format(".%x", DeviceID)
3779        << llvm::format(".%x.", FileID) << ParentName << ".l" << Line << ".c"
3780        << Column;
3781   }
3782
3783   CodeGenFunction CGF(CGM, true);
3784   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
3785   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
3786
3787   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
3788
3789   // If this target outline function is not an offload entry, we don't need to
3790   // register it.
3791   if (!IsOffloadEntry)
3792     return;
3793
3794   // The target region ID is used by the runtime library to identify the current
3795   // target region, so it only has to be unique and not necessarily point to
3796   // anything. It could be the pointer to the outlined function that implements
3797   // the target region, but we aren't using that so that the compiler doesn't
3798   // need to keep that, and could therefore inline the host function if proven
3799   // worthwhile during optimization. In the other hand, if emitting code for the
3800   // device, the ID has to be the function address so that it can retrieved from
3801   // the offloading entry and launched by the runtime library. We also mark the
3802   // outlined function to have external linkage in case we are emitting code for
3803   // the device, because these functions will be entry points to the device.
3804
3805   if (CGM.getLangOpts().OpenMPIsDevice) {
3806     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
3807     OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
3808   } else
3809     OutlinedFnID = new llvm::GlobalVariable(
3810         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3811         llvm::GlobalValue::PrivateLinkage,
3812         llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
3813
3814   // Register the information for the entry associated with this target region.
3815   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3816       DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
3817   return;
3818 }
3819
3820 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
3821                                      const OMPExecutableDirective &D,
3822                                      llvm::Value *OutlinedFn,
3823                                      llvm::Value *OutlinedFnID,
3824                                      const Expr *IfCond, const Expr *Device,
3825                                      ArrayRef<llvm::Value *> CapturedVars) {
3826   if (!CGF.HaveInsertPoint())
3827     return;
3828   /// \brief Values for bit flags used to specify the mapping type for
3829   /// offloading.
3830   enum OpenMPOffloadMappingFlags {
3831     /// \brief Allocate memory on the device and move data from host to device.
3832     OMP_MAP_TO = 0x01,
3833     /// \brief Allocate memory on the device and move data from device to host.
3834     OMP_MAP_FROM = 0x02,
3835     /// \brief The element passed to the device is a pointer.
3836     OMP_MAP_PTR = 0x20,
3837     /// \brief Pass the element to the device by value.
3838     OMP_MAP_BYCOPY = 0x80,
3839   };
3840
3841   enum OpenMPOffloadingReservedDeviceIDs {
3842     /// \brief Device ID if the device was not defined, runtime should get it
3843     /// from environment variables in the spec.
3844     OMP_DEVICEID_UNDEF = -1,
3845   };
3846
3847   assert(OutlinedFn && "Invalid outlined function!");
3848
3849   auto &Ctx = CGF.getContext();
3850
3851   // Fill up the arrays with the all the captured variables.
3852   SmallVector<llvm::Value *, 16> BasePointers;
3853   SmallVector<llvm::Value *, 16> Pointers;
3854   SmallVector<llvm::Value *, 16> Sizes;
3855   SmallVector<unsigned, 16> MapTypes;
3856
3857   bool hasVLACaptures = false;
3858
3859   const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
3860   auto RI = CS.getCapturedRecordDecl()->field_begin();
3861   // auto II = CS.capture_init_begin();
3862   auto CV = CapturedVars.begin();
3863   for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
3864                                             CE = CS.capture_end();
3865        CI != CE; ++CI, ++RI, ++CV) {
3866     StringRef Name;
3867     QualType Ty;
3868     llvm::Value *BasePointer;
3869     llvm::Value *Pointer;
3870     llvm::Value *Size;
3871     unsigned MapType;
3872
3873     // VLA sizes are passed to the outlined region by copy.
3874     if (CI->capturesVariableArrayType()) {
3875       BasePointer = Pointer = *CV;
3876       Size = getTypeSize(CGF, RI->getType());
3877       // Copy to the device as an argument. No need to retrieve it.
3878       MapType = OMP_MAP_BYCOPY;
3879       hasVLACaptures = true;
3880     } else if (CI->capturesThis()) {
3881       BasePointer = Pointer = *CV;
3882       const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
3883       Size = getTypeSize(CGF, PtrTy->getPointeeType());
3884       // Default map type.
3885       MapType = OMP_MAP_TO | OMP_MAP_FROM;
3886     } else if (CI->capturesVariableByCopy()) {
3887       MapType = OMP_MAP_BYCOPY;
3888       if (!RI->getType()->isAnyPointerType()) {
3889         // If the field is not a pointer, we need to save the actual value and
3890         // load it as a void pointer.
3891         auto DstAddr = CGF.CreateMemTemp(
3892             Ctx.getUIntPtrType(),
3893             Twine(CI->getCapturedVar()->getName()) + ".casted");
3894         LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
3895
3896         auto *SrcAddrVal = CGF.EmitScalarConversion(
3897             DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
3898             Ctx.getPointerType(RI->getType()), SourceLocation());
3899         LValue SrcLV =
3900             CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
3901
3902         // Store the value using the source type pointer.
3903         CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
3904
3905         // Load the value using the destination type pointer.
3906         BasePointer = Pointer =
3907             CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
3908       } else {
3909         MapType |= OMP_MAP_PTR;
3910         BasePointer = Pointer = *CV;
3911       }
3912       Size = getTypeSize(CGF, RI->getType());
3913     } else {
3914       assert(CI->capturesVariable() && "Expected captured reference.");
3915       BasePointer = Pointer = *CV;
3916
3917       const ReferenceType *PtrTy =
3918           cast<ReferenceType>(RI->getType().getTypePtr());
3919       QualType ElementType = PtrTy->getPointeeType();
3920       Size = getTypeSize(CGF, ElementType);
3921       // The default map type for a scalar/complex type is 'to' because by
3922       // default the value doesn't have to be retrieved. For an aggregate type,
3923       // the default is 'tofrom'.
3924       MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
3925                                                : OMP_MAP_TO;
3926       if (ElementType->isAnyPointerType())
3927         MapType |= OMP_MAP_PTR;
3928     }
3929
3930     BasePointers.push_back(BasePointer);
3931     Pointers.push_back(Pointer);
3932     Sizes.push_back(Size);
3933     MapTypes.push_back(MapType);
3934   }
3935
3936   // Keep track on whether the host function has to be executed.
3937   auto OffloadErrorQType =
3938       Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
3939   auto OffloadError = CGF.MakeAddrLValue(
3940       CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
3941       OffloadErrorQType);
3942   CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
3943                         OffloadError);
3944
3945   // Fill up the pointer arrays and transfer execution to the device.
3946   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
3947                     hasVLACaptures, Device, OutlinedFnID, OffloadError,
3948                     OffloadErrorQType](CodeGenFunction &CGF) {
3949     unsigned PointerNumVal = BasePointers.size();
3950     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
3951     llvm::Value *BasePointersArray;
3952     llvm::Value *PointersArray;
3953     llvm::Value *SizesArray;
3954     llvm::Value *MapTypesArray;
3955
3956     if (PointerNumVal) {
3957       llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
3958       QualType PointerArrayType = Ctx.getConstantArrayType(
3959           Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
3960           /*IndexTypeQuals=*/0);
3961
3962       BasePointersArray =
3963           CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
3964       PointersArray =
3965           CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
3966
3967       // If we don't have any VLA types, we can use a constant array for the map
3968       // sizes, otherwise we need to fill up the arrays as we do for the
3969       // pointers.
3970       if (hasVLACaptures) {
3971         QualType SizeArrayType = Ctx.getConstantArrayType(
3972             Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
3973             /*IndexTypeQuals=*/0);
3974         SizesArray =
3975             CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
3976       } else {
3977         // We expect all the sizes to be constant, so we collect them to create
3978         // a constant array.
3979         SmallVector<llvm::Constant *, 16> ConstSizes;
3980         for (auto S : Sizes)
3981           ConstSizes.push_back(cast<llvm::Constant>(S));
3982
3983         auto *SizesArrayInit = llvm::ConstantArray::get(
3984             llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
3985         auto *SizesArrayGbl = new llvm::GlobalVariable(
3986             CGM.getModule(), SizesArrayInit->getType(),
3987             /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
3988             SizesArrayInit, ".offload_sizes");
3989         SizesArrayGbl->setUnnamedAddr(true);
3990         SizesArray = SizesArrayGbl;
3991       }
3992
3993       // The map types are always constant so we don't need to generate code to
3994       // fill arrays. Instead, we create an array constant.
3995       llvm::Constant *MapTypesArrayInit =
3996           llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
3997       auto *MapTypesArrayGbl = new llvm::GlobalVariable(
3998           CGM.getModule(), MapTypesArrayInit->getType(),
3999           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
4000           MapTypesArrayInit, ".offload_maptypes");
4001       MapTypesArrayGbl->setUnnamedAddr(true);
4002       MapTypesArray = MapTypesArrayGbl;
4003
4004       for (unsigned i = 0; i < PointerNumVal; ++i) {
4005
4006         llvm::Value *BPVal = BasePointers[i];
4007         if (BPVal->getType()->isPointerTy())
4008           BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
4009         else {
4010           assert(BPVal->getType()->isIntegerTy() &&
4011                  "If not a pointer, the value type must be an integer.");
4012           BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
4013         }
4014         llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
4015             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
4016             BasePointersArray, 0, i);
4017         Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
4018         CGF.Builder.CreateStore(BPVal, BPAddr);
4019
4020         llvm::Value *PVal = Pointers[i];
4021         if (PVal->getType()->isPointerTy())
4022           PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
4023         else {
4024           assert(PVal->getType()->isIntegerTy() &&
4025                  "If not a pointer, the value type must be an integer.");
4026           PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
4027         }
4028         llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
4029             llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
4030             0, i);
4031         Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
4032         CGF.Builder.CreateStore(PVal, PAddr);
4033
4034         if (hasVLACaptures) {
4035           llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
4036               llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
4037               /*Idx0=*/0,
4038               /*Idx1=*/i);
4039           Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
4040           CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
4041                                       Sizes[i], CGM.SizeTy, /*isSigned=*/true),
4042                                   SAddr);
4043         }
4044       }
4045
4046       BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4047           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
4048           /*Idx0=*/0, /*Idx1=*/0);
4049       PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4050           llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
4051           /*Idx0=*/0,
4052           /*Idx1=*/0);
4053       SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4054           llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
4055           /*Idx0=*/0, /*Idx1=*/0);
4056       MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
4057           llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
4058           /*Idx0=*/0,
4059           /*Idx1=*/0);
4060
4061     } else {
4062       BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
4063       PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
4064       SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
4065       MapTypesArray =
4066           llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
4067     }
4068
4069     // On top of the arrays that were filled up, the target offloading call
4070     // takes as arguments the device id as well as the host pointer. The host
4071     // pointer is used by the runtime library to identify the current target
4072     // region, so it only has to be unique and not necessarily point to
4073     // anything. It could be the pointer to the outlined function that
4074     // implements the target region, but we aren't using that so that the
4075     // compiler doesn't need to keep that, and could therefore inline the host
4076     // function if proven worthwhile during optimization.
4077
4078     // From this point on, we need to have an ID of the target region defined.
4079     assert(OutlinedFnID && "Invalid outlined function ID!");
4080
4081     // Emit device ID if any.
4082     llvm::Value *DeviceID;
4083     if (Device)
4084       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4085                                            CGM.Int32Ty, /*isSigned=*/true);
4086     else
4087       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
4088
4089     llvm::Value *OffloadingArgs[] = {
4090         DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
4091         PointersArray, SizesArray,   MapTypesArray};
4092     auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
4093                                       OffloadingArgs);
4094
4095     CGF.EmitStoreOfScalar(Return, OffloadError);
4096   };
4097
4098   // Notify that the host version must be executed.
4099   auto &&ElseGen = [this, OffloadError,
4100                     OffloadErrorQType](CodeGenFunction &CGF) {
4101     CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
4102                           OffloadError);
4103   };
4104
4105   // If we have a target function ID it means that we need to support
4106   // offloading, otherwise, just execute on the host. We need to execute on host
4107   // regardless of the conditional in the if clause if, e.g., the user do not
4108   // specify target triples.
4109   if (OutlinedFnID) {
4110     if (IfCond) {
4111       emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
4112     } else {
4113       CodeGenFunction::RunCleanupsScope Scope(CGF);
4114       ThenGen(CGF);
4115     }
4116   } else {
4117     CodeGenFunction::RunCleanupsScope Scope(CGF);
4118     ElseGen(CGF);
4119   }
4120
4121   // Check the error code and execute the host version if required.
4122   auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
4123   auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
4124   auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
4125   auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
4126   CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
4127
4128   CGF.EmitBlock(OffloadFailedBlock);
4129   CGF.Builder.CreateCall(OutlinedFn, BasePointers);
4130   CGF.EmitBranch(OffloadContBlock);
4131
4132   CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
4133   return;
4134 }
4135
4136 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
4137                                                     StringRef ParentName) {
4138   if (!S)
4139     return;
4140
4141   // If we find a OMP target directive, codegen the outline function and
4142   // register the result.
4143   // FIXME: Add other directives with target when they become supported.
4144   bool isTargetDirective = isa<OMPTargetDirective>(S);
4145
4146   if (isTargetDirective) {
4147     auto *E = cast<OMPExecutableDirective>(S);
4148     unsigned DeviceID;
4149     unsigned FileID;
4150     unsigned Line;
4151     unsigned Column;
4152     getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
4153                              FileID, Line, Column);
4154
4155     // Is this a target region that should not be emitted as an entry point? If
4156     // so just signal we are done with this target region.
4157     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(
4158             DeviceID, FileID, ParentName, Line, Column))
4159       return;
4160
4161     llvm::Function *Fn;
4162     llvm::Constant *Addr;
4163     emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
4164                                /*isOffloadEntry=*/true);
4165     assert(Fn && Addr && "Target region emission failed.");
4166     return;
4167   }
4168
4169   if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
4170     if (!E->getAssociatedStmt())
4171       return;
4172
4173     scanForTargetRegionsFunctions(
4174         cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
4175         ParentName);
4176     return;
4177   }
4178
4179   // If this is a lambda function, look into its body.
4180   if (auto *L = dyn_cast<LambdaExpr>(S))
4181     S = L->getBody();
4182
4183   // Keep looking for target regions recursively.
4184   for (auto *II : S->children())
4185     scanForTargetRegionsFunctions(II, ParentName);
4186
4187   return;
4188 }
4189
4190 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
4191   auto &FD = *cast<FunctionDecl>(GD.getDecl());
4192
4193   // If emitting code for the host, we do not process FD here. Instead we do
4194   // the normal code generation.
4195   if (!CGM.getLangOpts().OpenMPIsDevice)
4196     return false;
4197
4198   // Try to detect target regions in the function.
4199   scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
4200
4201   // We should not emit any function othen that the ones created during the
4202   // scanning. Therefore, we signal that this function is completely dealt
4203   // with.
4204   return true;
4205 }
4206
4207 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
4208   if (!CGM.getLangOpts().OpenMPIsDevice)
4209     return false;
4210
4211   // Check if there are Ctors/Dtors in this declaration and look for target
4212   // regions in it. We use the complete variant to produce the kernel name
4213   // mangling.
4214   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
4215   if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
4216     for (auto *Ctor : RD->ctors()) {
4217       StringRef ParentName =
4218           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
4219       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
4220     }
4221     auto *Dtor = RD->getDestructor();
4222     if (Dtor) {
4223       StringRef ParentName =
4224           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
4225       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
4226     }
4227   }
4228
4229   // If we are in target mode we do not emit any global (declare target is not
4230   // implemented yet). Therefore we signal that GD was processed in this case.
4231   return true;
4232 }
4233
4234 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
4235   auto *VD = GD.getDecl();
4236   if (isa<FunctionDecl>(VD))
4237     return emitTargetFunctions(GD);
4238
4239   return emitTargetGlobalVariable(GD);
4240 }
4241
4242 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
4243   // If we have offloading in the current module, we need to emit the entries
4244   // now and register the offloading descriptor.
4245   createOffloadEntriesAndInfoMetadata();
4246
4247   // Create and register the offloading binary descriptors. This is the main
4248   // entity that captures all the information about offloading in the current
4249   // compilation unit.
4250   return createOffloadingBinaryDescriptorRegistration();
4251 }