]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
MFV r353143 (phillip):
[FreeBSD/FreeBSD.git] / contrib / llvm-project / clang / lib / CodeGen / CGOpenMPRuntime.cpp
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30
31 using namespace clang;
32 using namespace CodeGen;
33
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38   /// Kinds of OpenMP regions used in codegen.
39   enum CGOpenMPRegionKind {
40     /// Region with outlined function for standalone 'parallel'
41     /// directive.
42     ParallelOutlinedRegion,
43     /// Region with outlined function for standalone 'task' directive.
44     TaskOutlinedRegion,
45     /// Region for constructs that do not require function outlining,
46     /// like 'for', 'sections', 'atomic' etc. directives.
47     InlinedRegion,
48     /// Region with outlined function for standalone 'target' directive.
49     TargetRegion,
50   };
51
52   CGOpenMPRegionInfo(const CapturedStmt &CS,
53                      const CGOpenMPRegionKind RegionKind,
54                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
55                      bool HasCancel)
56       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57         CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58
59   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60                      const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
61                      bool HasCancel)
62       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63         Kind(Kind), HasCancel(HasCancel) {}
64
65   /// Get a variable or parameter for storing global thread id
66   /// inside OpenMP construct.
67   virtual const VarDecl *getThreadIDVariable() const = 0;
68
69   /// Emit the captured statement body.
70   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71
72   /// Get an LValue for the current ThreadID variable.
73   /// \return LValue for thread id variable. This LValue always has type int32*.
74   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75
76   virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77
78   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79
80   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81
82   bool hasCancel() const { return HasCancel; }
83
84   static bool classof(const CGCapturedStmtInfo *Info) {
85     return Info->getKind() == CR_OpenMP;
86   }
87
88   ~CGOpenMPRegionInfo() override = default;
89
90 protected:
91   CGOpenMPRegionKind RegionKind;
92   RegionCodeGenTy CodeGen;
93   OpenMPDirectiveKind Kind;
94   bool HasCancel;
95 };
96
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101                              const RegionCodeGenTy &CodeGen,
102                              OpenMPDirectiveKind Kind, bool HasCancel,
103                              StringRef HelperName)
104       : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105                            HasCancel),
106         ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108   }
109
110   /// Get a variable or parameter for storing global thread id
111   /// inside OpenMP construct.
112   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113
114   /// Get the name of the capture helper.
115   StringRef getHelperName() const override { return HelperName; }
116
117   static bool classof(const CGCapturedStmtInfo *Info) {
118     return CGOpenMPRegionInfo::classof(Info) &&
119            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120                ParallelOutlinedRegion;
121   }
122
123 private:
124   /// A variable or parameter storing global thread id for OpenMP
125   /// constructs.
126   const VarDecl *ThreadIDVar;
127   StringRef HelperName;
128 };
129
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133   class UntiedTaskActionTy final : public PrePostActionTy {
134     bool Untied;
135     const VarDecl *PartIDVar;
136     const RegionCodeGenTy UntiedCodeGen;
137     llvm::SwitchInst *UntiedSwitch = nullptr;
138
139   public:
140     UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141                        const RegionCodeGenTy &UntiedCodeGen)
142         : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143     void Enter(CodeGenFunction &CGF) override {
144       if (Untied) {
145         // Emit task switching point.
146         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147             CGF.GetAddrOfLocalVar(PartIDVar),
148             PartIDVar->getType()->castAs<PointerType>());
149         llvm::Value *Res =
150             CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151         llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152         UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153         CGF.EmitBlock(DoneBB);
154         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156         UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157                               CGF.Builder.GetInsertBlock());
158         emitUntiedSwitch(CGF);
159       }
160     }
161     void emitUntiedSwitch(CodeGenFunction &CGF) const {
162       if (Untied) {
163         LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164             CGF.GetAddrOfLocalVar(PartIDVar),
165             PartIDVar->getType()->castAs<PointerType>());
166         CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167                               PartIdLVal);
168         UntiedCodeGen(CGF);
169         CodeGenFunction::JumpDest CurPoint =
170             CGF.getJumpDestInCurrentScope(".untied.next.");
171         CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172         CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173         UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174                               CGF.Builder.GetInsertBlock());
175         CGF.EmitBranchThroughCleanup(CurPoint);
176         CGF.EmitBlock(CurPoint.getBlock());
177       }
178     }
179     unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180   };
181   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182                                  const VarDecl *ThreadIDVar,
183                                  const RegionCodeGenTy &CodeGen,
184                                  OpenMPDirectiveKind Kind, bool HasCancel,
185                                  const UntiedTaskActionTy &Action)
186       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187         ThreadIDVar(ThreadIDVar), Action(Action) {
188     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189   }
190
191   /// Get a variable or parameter for storing global thread id
192   /// inside OpenMP construct.
193   const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194
195   /// Get an LValue for the current ThreadID variable.
196   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197
198   /// Get the name of the capture helper.
199   StringRef getHelperName() const override { return ".omp_outlined."; }
200
201   void emitUntiedSwitch(CodeGenFunction &CGF) override {
202     Action.emitUntiedSwitch(CGF);
203   }
204
205   static bool classof(const CGCapturedStmtInfo *Info) {
206     return CGOpenMPRegionInfo::classof(Info) &&
207            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208                TaskOutlinedRegion;
209   }
210
211 private:
212   /// A variable or parameter storing global thread id for OpenMP
213   /// constructs.
214   const VarDecl *ThreadIDVar;
215   /// Action for emitting code for untied tasks.
216   const UntiedTaskActionTy &Action;
217 };
218
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224                             const RegionCodeGenTy &CodeGen,
225                             OpenMPDirectiveKind Kind, bool HasCancel)
226       : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227         OldCSI(OldCSI),
228         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229
230   // Retrieve the value of the context parameter.
231   llvm::Value *getContextValue() const override {
232     if (OuterRegionInfo)
233       return OuterRegionInfo->getContextValue();
234     llvm_unreachable("No context value for inlined OpenMP region");
235   }
236
237   void setContextValue(llvm::Value *V) override {
238     if (OuterRegionInfo) {
239       OuterRegionInfo->setContextValue(V);
240       return;
241     }
242     llvm_unreachable("No context value for inlined OpenMP region");
243   }
244
245   /// Lookup the captured field decl for a variable.
246   const FieldDecl *lookup(const VarDecl *VD) const override {
247     if (OuterRegionInfo)
248       return OuterRegionInfo->lookup(VD);
249     // If there is no outer outlined region,no need to lookup in a list of
250     // captured variables, we can use the original one.
251     return nullptr;
252   }
253
254   FieldDecl *getThisFieldDecl() const override {
255     if (OuterRegionInfo)
256       return OuterRegionInfo->getThisFieldDecl();
257     return nullptr;
258   }
259
260   /// Get a variable or parameter for storing global thread id
261   /// inside OpenMP construct.
262   const VarDecl *getThreadIDVariable() const override {
263     if (OuterRegionInfo)
264       return OuterRegionInfo->getThreadIDVariable();
265     return nullptr;
266   }
267
268   /// Get an LValue for the current ThreadID variable.
269   LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270     if (OuterRegionInfo)
271       return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272     llvm_unreachable("No LValue for inlined OpenMP construct");
273   }
274
275   /// Get the name of the capture helper.
276   StringRef getHelperName() const override {
277     if (auto *OuterRegionInfo = getOldCSI())
278       return OuterRegionInfo->getHelperName();
279     llvm_unreachable("No helper name for inlined OpenMP construct");
280   }
281
282   void emitUntiedSwitch(CodeGenFunction &CGF) override {
283     if (OuterRegionInfo)
284       OuterRegionInfo->emitUntiedSwitch(CGF);
285   }
286
287   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288
289   static bool classof(const CGCapturedStmtInfo *Info) {
290     return CGOpenMPRegionInfo::classof(Info) &&
291            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292   }
293
294   ~CGOpenMPInlinedRegionInfo() override = default;
295
296 private:
297   /// CodeGen info about outer OpenMP region.
298   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299   CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
311       : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312                            /*HasCancel=*/false),
313         HelperName(HelperName) {}
314
315   /// This is unused for target regions because each starts executing
316   /// with a single thread.
317   const VarDecl *getThreadIDVariable() const override { return nullptr; }
318
319   /// Get the name of the capture helper.
320   StringRef getHelperName() const override { return HelperName; }
321
322   static bool classof(const CGCapturedStmtInfo *Info) {
323     return CGOpenMPRegionInfo::classof(Info) &&
324            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325   }
326
327 private:
328   StringRef HelperName;
329 };
330
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332   llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340                                   OMPD_unknown,
341                                   /*HasCancel=*/false),
342         PrivScope(CGF) {
343     // Make sure the globals captured in the provided statement are local by
344     // using the privatization logic. We assume the same variable is not
345     // captured more than once.
346     for (const auto &C : CS.captures()) {
347       if (!C.capturesVariable() && !C.capturesVariableByCopy())
348         continue;
349
350       const VarDecl *VD = C.getCapturedVar();
351       if (VD->isLocalVarDeclOrParm())
352         continue;
353
354       DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355                       /*RefersToEnclosingVariableOrCapture=*/false,
356                       VD->getType().getNonReferenceType(), VK_LValue,
357                       C.getLocation());
358       PrivScope.addPrivate(
359           VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360     }
361     (void)PrivScope.Privatize();
362   }
363
364   /// Lookup the captured field decl for a variable.
365   const FieldDecl *lookup(const VarDecl *VD) const override {
366     if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367       return FD;
368     return nullptr;
369   }
370
371   /// Emit the captured statement body.
372   void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373     llvm_unreachable("No body for expressions");
374   }
375
376   /// Get a variable or parameter for storing global thread id
377   /// inside OpenMP construct.
378   const VarDecl *getThreadIDVariable() const override {
379     llvm_unreachable("No thread id for expressions");
380   }
381
382   /// Get the name of the capture helper.
383   StringRef getHelperName() const override {
384     llvm_unreachable("No helper name for expressions");
385   }
386
387   static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388
389 private:
390   /// Private scope to capture global variables.
391   CodeGenFunction::OMPPrivateScope PrivScope;
392 };
393
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396   CodeGenFunction &CGF;
397   llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398   FieldDecl *LambdaThisCaptureField = nullptr;
399   const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400
401 public:
402   /// Constructs region for combined constructs.
403   /// \param CodeGen Code generation sequence for combined directives. Includes
404   /// a list of functions used for code generation of implicitly inlined
405   /// regions.
406   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407                           OpenMPDirectiveKind Kind, bool HasCancel)
408       : CGF(CGF) {
409     // Start emission for the construct.
410     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413     LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414     CGF.LambdaThisCaptureField = nullptr;
415     BlockInfo = CGF.BlockInfo;
416     CGF.BlockInfo = nullptr;
417   }
418
419   ~InlinedOpenMPRegionRAII() {
420     // Restore original CapturedStmtInfo only if we're done with code emission.
421     auto *OldCSI =
422         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423     delete CGF.CapturedStmtInfo;
424     CGF.CapturedStmtInfo = OldCSI;
425     std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426     CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427     CGF.BlockInfo = BlockInfo;
428   }
429 };
430
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435   /// Use trampoline for internal microtask.
436   OMP_IDENT_IMD = 0x01,
437   /// Use c-style ident structure.
438   OMP_IDENT_KMPC = 0x02,
439   /// Atomic reduction option for kmpc_reduce.
440   OMP_ATOMIC_REDUCE = 0x10,
441   /// Explicit 'barrier' directive.
442   OMP_IDENT_BARRIER_EXPL = 0x20,
443   /// Implicit barrier in code.
444   OMP_IDENT_BARRIER_IMPL = 0x40,
445   /// Implicit barrier in 'for' directive.
446   OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447   /// Implicit barrier in 'sections' directive.
448   OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449   /// Implicit barrier in 'single' directive.
450   OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451   /// Call of __kmp_for_static_init for static loop.
452   OMP_IDENT_WORK_LOOP = 0x200,
453   /// Call of __kmp_for_static_init for sections.
454   OMP_IDENT_WORK_SECTIONS = 0x400,
455   /// Call of __kmp_for_static_init for distribute.
456   OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459
460 namespace {
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
464   /// flag undefined.
465   OMP_REQ_UNDEFINED               = 0x000,
466   /// no requires clause present.
467   OMP_REQ_NONE                    = 0x001,
468   /// reverse_offload clause.
469   OMP_REQ_REVERSE_OFFLOAD         = 0x002,
470   /// unified_address clause.
471   OMP_REQ_UNIFIED_ADDRESS         = 0x004,
472   /// unified_shared_memory clause.
473   OMP_REQ_UNIFIED_SHARED_MEMORY   = 0x008,
474   /// dynamic_allocators clause.
475   OMP_REQ_DYNAMIC_ALLOCATORS      = 0x010,
476   LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478
479 enum OpenMPOffloadingReservedDeviceIDs {
480   /// Device ID if the device was not defined, runtime should get it
481   /// from environment variables in the spec.
482   OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 ///    kmp_int32 reserved_1;   /**<  might be used in Fortran;
492 ///                                  see above  */
493 ///    kmp_int32 flags;        /**<  also f.flags; KMP_IDENT_xxx flags;
494 ///                                  KMP_IDENT_KMPC identifies this union
495 ///                                  member  */
496 ///    kmp_int32 reserved_2;   /**<  not really used in Fortran any more;
497 ///                                  see above */
498 ///#if USE_ITT_BUILD
499 ///                            /*  but currently used for storing
500 ///                                region-specific ITT */
501 ///                            /*  contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 ///    kmp_int32 reserved_3;   /**< source[4] in Fortran, do not use for
504 ///                                 C++  */
505 ///    char const *psource;    /**< String describing the source location.
506 ///                            The string is composed of semi-colon separated
507 //                             fields which describe the source file,
508 ///                            the function and a pair of line numbers that
509 ///                            delimit the construct.
510 ///                             */
511 /// } ident_t;
512 enum IdentFieldIndex {
513   /// might be used in Fortran
514   IdentField_Reserved_1,
515   /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516   IdentField_Flags,
517   /// Not really used in Fortran any more
518   IdentField_Reserved_2,
519   /// Source[4] in Fortran, do not use for C++
520   IdentField_Reserved_3,
521   /// String describing the source location. The string is composed of
522   /// semi-colon separated fields which describe the source file, the function
523   /// and a pair of line numbers that delimit the construct.
524   IdentField_PSource
525 };
526
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530   /// Lower bound for default (unordered) versions.
531   OMP_sch_lower = 32,
532   OMP_sch_static_chunked = 33,
533   OMP_sch_static = 34,
534   OMP_sch_dynamic_chunked = 35,
535   OMP_sch_guided_chunked = 36,
536   OMP_sch_runtime = 37,
537   OMP_sch_auto = 38,
538   /// static with chunk adjustment (e.g., simd)
539   OMP_sch_static_balanced_chunked = 45,
540   /// Lower bound for 'ordered' versions.
541   OMP_ord_lower = 64,
542   OMP_ord_static_chunked = 65,
543   OMP_ord_static = 66,
544   OMP_ord_dynamic_chunked = 67,
545   OMP_ord_guided_chunked = 68,
546   OMP_ord_runtime = 69,
547   OMP_ord_auto = 70,
548   OMP_sch_default = OMP_sch_static,
549   /// dist_schedule types
550   OMP_dist_sch_static_chunked = 91,
551   OMP_dist_sch_static = 92,
552   /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553   /// Set if the monotonic schedule modifier was present.
554   OMP_sch_modifier_monotonic = (1 << 29),
555   /// Set if the nonmonotonic schedule modifier was present.
556   OMP_sch_modifier_nonmonotonic = (1 << 30),
557 };
558
559 enum OpenMPRTLFunction {
560   /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561   /// kmpc_micro microtask, ...);
562   OMPRTL__kmpc_fork_call,
563   /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564   /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565   OMPRTL__kmpc_threadprivate_cached,
566   /// Call to void __kmpc_threadprivate_register( ident_t *,
567   /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568   OMPRTL__kmpc_threadprivate_register,
569   // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570   OMPRTL__kmpc_global_thread_num,
571   // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572   // kmp_critical_name *crit);
573   OMPRTL__kmpc_critical,
574   // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575   // global_tid, kmp_critical_name *crit, uintptr_t hint);
576   OMPRTL__kmpc_critical_with_hint,
577   // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578   // kmp_critical_name *crit);
579   OMPRTL__kmpc_end_critical,
580   // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581   // global_tid);
582   OMPRTL__kmpc_cancel_barrier,
583   // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584   OMPRTL__kmpc_barrier,
585   // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586   OMPRTL__kmpc_for_static_fini,
587   // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588   // global_tid);
589   OMPRTL__kmpc_serialized_parallel,
590   // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591   // global_tid);
592   OMPRTL__kmpc_end_serialized_parallel,
593   // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594   // kmp_int32 num_threads);
595   OMPRTL__kmpc_push_num_threads,
596   // Call to void __kmpc_flush(ident_t *loc);
597   OMPRTL__kmpc_flush,
598   // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
599   OMPRTL__kmpc_master,
600   // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601   OMPRTL__kmpc_end_master,
602   // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603   // int end_part);
604   OMPRTL__kmpc_omp_taskyield,
605   // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
606   OMPRTL__kmpc_single,
607   // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608   OMPRTL__kmpc_end_single,
609   // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611   // kmp_routine_entry_t *task_entry);
612   OMPRTL__kmpc_omp_task_alloc,
613   // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614   // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615   // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616   // kmp_int64 device_id);
617   OMPRTL__kmpc_omp_target_task_alloc,
618   // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619   // new_task);
620   OMPRTL__kmpc_omp_task,
621   // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622   // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623   // kmp_int32 didit);
624   OMPRTL__kmpc_copyprivate,
625   // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626   // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627   // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
628   OMPRTL__kmpc_reduce,
629   // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630   // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631   // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632   // *lck);
633   OMPRTL__kmpc_reduce_nowait,
634   // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635   // kmp_critical_name *lck);
636   OMPRTL__kmpc_end_reduce,
637   // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638   // kmp_critical_name *lck);
639   OMPRTL__kmpc_end_reduce_nowait,
640   // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641   // kmp_task_t * new_task);
642   OMPRTL__kmpc_omp_task_begin_if0,
643   // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644   // kmp_task_t * new_task);
645   OMPRTL__kmpc_omp_task_complete_if0,
646   // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647   OMPRTL__kmpc_ordered,
648   // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649   OMPRTL__kmpc_end_ordered,
650   // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651   // global_tid);
652   OMPRTL__kmpc_omp_taskwait,
653   // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654   OMPRTL__kmpc_taskgroup,
655   // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656   OMPRTL__kmpc_end_taskgroup,
657   // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658   // int proc_bind);
659   OMPRTL__kmpc_push_proc_bind,
660   // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661   // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662   // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663   OMPRTL__kmpc_omp_task_with_deps,
664   // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665   // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666   // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667   OMPRTL__kmpc_omp_wait_deps,
668   // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669   // global_tid, kmp_int32 cncl_kind);
670   OMPRTL__kmpc_cancellationpoint,
671   // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672   // kmp_int32 cncl_kind);
673   OMPRTL__kmpc_cancel,
674   // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675   // kmp_int32 num_teams, kmp_int32 thread_limit);
676   OMPRTL__kmpc_push_num_teams,
677   // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678   // microtask, ...);
679   OMPRTL__kmpc_fork_teams,
680   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682   // sched, kmp_uint64 grainsize, void *task_dup);
683   OMPRTL__kmpc_taskloop,
684   // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685   // num_dims, struct kmp_dim *dims);
686   OMPRTL__kmpc_doacross_init,
687   // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688   OMPRTL__kmpc_doacross_fini,
689   // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690   // *vec);
691   OMPRTL__kmpc_doacross_post,
692   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693   // *vec);
694   OMPRTL__kmpc_doacross_wait,
695   // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696   // *data);
697   OMPRTL__kmpc_task_reduction_init,
698   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699   // *d);
700   OMPRTL__kmpc_task_reduction_get_th_data,
701   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
702   OMPRTL__kmpc_alloc,
703   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
704   OMPRTL__kmpc_free,
705
706   //
707   // Offloading related calls
708   //
709   // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710   // size);
711   OMPRTL__kmpc_push_target_tripcount,
712   // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714   // *arg_types);
715   OMPRTL__tgt_target,
716   // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718   // *arg_types);
719   OMPRTL__tgt_target_nowait,
720   // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721   // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722   // *arg_types, int32_t num_teams, int32_t thread_limit);
723   OMPRTL__tgt_target_teams,
724   // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725   // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726   // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727   OMPRTL__tgt_target_teams_nowait,
728   // Call to void __tgt_register_requires(int64_t flags);
729   OMPRTL__tgt_register_requires,
730   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731   OMPRTL__tgt_register_lib,
732   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733   OMPRTL__tgt_unregister_lib,
734   // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736   OMPRTL__tgt_target_data_begin,
737   // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739   // *arg_types);
740   OMPRTL__tgt_target_data_begin_nowait,
741   // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742   // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743   OMPRTL__tgt_target_data_end,
744   // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746   // *arg_types);
747   OMPRTL__tgt_target_data_end_nowait,
748   // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749   // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750   OMPRTL__tgt_target_data_update,
751   // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752   // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753   // *arg_types);
754   OMPRTL__tgt_target_data_update_nowait,
755 };
756
757 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
758 /// region.
759 class CleanupTy final : public EHScopeStack::Cleanup {
760   PrePostActionTy *Action;
761
762 public:
763   explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
764   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
765     if (!CGF.HaveInsertPoint())
766       return;
767     Action->Exit(CGF);
768   }
769 };
770
771 } // anonymous namespace
772
773 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
774   CodeGenFunction::RunCleanupsScope Scope(CGF);
775   if (PrePostAction) {
776     CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
777     Callback(CodeGen, CGF, *PrePostAction);
778   } else {
779     PrePostActionTy Action;
780     Callback(CodeGen, CGF, Action);
781   }
782 }
783
784 /// Check if the combiner is a call to UDR combiner and if it is so return the
785 /// UDR decl used for reduction.
786 static const OMPDeclareReductionDecl *
787 getReductionInit(const Expr *ReductionOp) {
788   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
789     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
790       if (const auto *DRE =
791               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
792         if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
793           return DRD;
794   return nullptr;
795 }
796
797 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
798                                              const OMPDeclareReductionDecl *DRD,
799                                              const Expr *InitOp,
800                                              Address Private, Address Original,
801                                              QualType Ty) {
802   if (DRD->getInitializer()) {
803     std::pair<llvm::Function *, llvm::Function *> Reduction =
804         CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
805     const auto *CE = cast<CallExpr>(InitOp);
806     const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
807     const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
808     const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
809     const auto *LHSDRE =
810         cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
811     const auto *RHSDRE =
812         cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
813     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
814     PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
815                             [=]() { return Private; });
816     PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
817                             [=]() { return Original; });
818     (void)PrivateScope.Privatize();
819     RValue Func = RValue::get(Reduction.second);
820     CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
821     CGF.EmitIgnoredExpr(InitOp);
822   } else {
823     llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
824     std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
825     auto *GV = new llvm::GlobalVariable(
826         CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
827         llvm::GlobalValue::PrivateLinkage, Init, Name);
828     LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
829     RValue InitRVal;
830     switch (CGF.getEvaluationKind(Ty)) {
831     case TEK_Scalar:
832       InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
833       break;
834     case TEK_Complex:
835       InitRVal =
836           RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
837       break;
838     case TEK_Aggregate:
839       InitRVal = RValue::getAggregate(LV.getAddress());
840       break;
841     }
842     OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
843     CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
844     CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
845                          /*IsInitializer=*/false);
846   }
847 }
848
849 /// Emit initialization of arrays of complex types.
850 /// \param DestAddr Address of the array.
851 /// \param Type Type of array.
852 /// \param Init Initial expression of array.
853 /// \param SrcAddr Address of the original array.
854 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
855                                  QualType Type, bool EmitDeclareReductionInit,
856                                  const Expr *Init,
857                                  const OMPDeclareReductionDecl *DRD,
858                                  Address SrcAddr = Address::invalid()) {
859   // Perform element-by-element initialization.
860   QualType ElementTy;
861
862   // Drill down to the base element type on both arrays.
863   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
864   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
865   DestAddr =
866       CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
867   if (DRD)
868     SrcAddr =
869         CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
870
871   llvm::Value *SrcBegin = nullptr;
872   if (DRD)
873     SrcBegin = SrcAddr.getPointer();
874   llvm::Value *DestBegin = DestAddr.getPointer();
875   // Cast from pointer to array type to pointer to single element.
876   llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
877   // The basic structure here is a while-do loop.
878   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
879   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
880   llvm::Value *IsEmpty =
881       CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
882   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
883
884   // Enter the loop body, making that address the current address.
885   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
886   CGF.EmitBlock(BodyBB);
887
888   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
889
890   llvm::PHINode *SrcElementPHI = nullptr;
891   Address SrcElementCurrent = Address::invalid();
892   if (DRD) {
893     SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
894                                           "omp.arraycpy.srcElementPast");
895     SrcElementPHI->addIncoming(SrcBegin, EntryBB);
896     SrcElementCurrent =
897         Address(SrcElementPHI,
898                 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
899   }
900   llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
901       DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
902   DestElementPHI->addIncoming(DestBegin, EntryBB);
903   Address DestElementCurrent =
904       Address(DestElementPHI,
905               DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906
907   // Emit copy.
908   {
909     CodeGenFunction::RunCleanupsScope InitScope(CGF);
910     if (EmitDeclareReductionInit) {
911       emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
912                                        SrcElementCurrent, ElementTy);
913     } else
914       CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
915                            /*IsInitializer=*/false);
916   }
917
918   if (DRD) {
919     // Shift the address forward by one element.
920     llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
921         SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
922     SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
923   }
924
925   // Shift the address forward by one element.
926   llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
927       DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
928   // Check whether we've reached the end.
929   llvm::Value *Done =
930       CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
931   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
932   DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
933
934   // Done.
935   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
936 }
937
938 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
939   return CGF.EmitOMPSharedLValue(E);
940 }
941
942 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
943                                             const Expr *E) {
944   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
945     return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
946   return LValue();
947 }
948
949 void ReductionCodeGen::emitAggregateInitialization(
950     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
951     const OMPDeclareReductionDecl *DRD) {
952   // Emit VarDecl with copy init for arrays.
953   // Get the address of the original variable captured in current
954   // captured region.
955   const auto *PrivateVD =
956       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
957   bool EmitDeclareReductionInit =
958       DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
959   EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
960                        EmitDeclareReductionInit,
961                        EmitDeclareReductionInit ? ClausesData[N].ReductionOp
962                                                 : PrivateVD->getInit(),
963                        DRD, SharedLVal.getAddress());
964 }
965
966 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
967                                    ArrayRef<const Expr *> Privates,
968                                    ArrayRef<const Expr *> ReductionOps) {
969   ClausesData.reserve(Shareds.size());
970   SharedAddresses.reserve(Shareds.size());
971   Sizes.reserve(Shareds.size());
972   BaseDecls.reserve(Shareds.size());
973   auto IPriv = Privates.begin();
974   auto IRed = ReductionOps.begin();
975   for (const Expr *Ref : Shareds) {
976     ClausesData.emplace_back(Ref, *IPriv, *IRed);
977     std::advance(IPriv, 1);
978     std::advance(IRed, 1);
979   }
980 }
981
982 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
983   assert(SharedAddresses.size() == N &&
984          "Number of generated lvalues must be exactly N.");
985   LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
986   LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
987   SharedAddresses.emplace_back(First, Second);
988 }
989
990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
991   const auto *PrivateVD =
992       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
993   QualType PrivateType = PrivateVD->getType();
994   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
995   if (!PrivateType->isVariablyModifiedType()) {
996     Sizes.emplace_back(
997         CGF.getTypeSize(
998             SharedAddresses[N].first.getType().getNonReferenceType()),
999         nullptr);
1000     return;
1001   }
1002   llvm::Value *Size;
1003   llvm::Value *SizeInChars;
1004   auto *ElemType =
1005       cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1006           ->getElementType();
1007   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1008   if (AsArraySection) {
1009     Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1010                                      SharedAddresses[N].first.getPointer());
1011     Size = CGF.Builder.CreateNUWAdd(
1012         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1013     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1014   } else {
1015     SizeInChars = CGF.getTypeSize(
1016         SharedAddresses[N].first.getType().getNonReferenceType());
1017     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1018   }
1019   Sizes.emplace_back(SizeInChars, Size);
1020   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1021       CGF,
1022       cast<OpaqueValueExpr>(
1023           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1024       RValue::get(Size));
1025   CGF.EmitVariablyModifiedType(PrivateType);
1026 }
1027
1028 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1029                                          llvm::Value *Size) {
1030   const auto *PrivateVD =
1031       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1032   QualType PrivateType = PrivateVD->getType();
1033   if (!PrivateType->isVariablyModifiedType()) {
1034     assert(!Size && !Sizes[N].second &&
1035            "Size should be nullptr for non-variably modified reduction "
1036            "items.");
1037     return;
1038   }
1039   CodeGenFunction::OpaqueValueMapping OpaqueMap(
1040       CGF,
1041       cast<OpaqueValueExpr>(
1042           CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1043       RValue::get(Size));
1044   CGF.EmitVariablyModifiedType(PrivateType);
1045 }
1046
1047 void ReductionCodeGen::emitInitialization(
1048     CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1049     llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1050   assert(SharedAddresses.size() > N && "No variable was generated");
1051   const auto *PrivateVD =
1052       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1053   const OMPDeclareReductionDecl *DRD =
1054       getReductionInit(ClausesData[N].ReductionOp);
1055   QualType PrivateType = PrivateVD->getType();
1056   PrivateAddr = CGF.Builder.CreateElementBitCast(
1057       PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1058   QualType SharedType = SharedAddresses[N].first.getType();
1059   SharedLVal = CGF.MakeAddrLValue(
1060       CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1061                                        CGF.ConvertTypeForMem(SharedType)),
1062       SharedType, SharedAddresses[N].first.getBaseInfo(),
1063       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1064   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1065     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1066   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1067     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1068                                      PrivateAddr, SharedLVal.getAddress(),
1069                                      SharedLVal.getType());
1070   } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1071              !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1072     CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1073                          PrivateVD->getType().getQualifiers(),
1074                          /*IsInitializer=*/false);
1075   }
1076 }
1077
1078 bool ReductionCodeGen::needCleanups(unsigned N) {
1079   const auto *PrivateVD =
1080       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1081   QualType PrivateType = PrivateVD->getType();
1082   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1083   return DTorKind != QualType::DK_none;
1084 }
1085
1086 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1087                                     Address PrivateAddr) {
1088   const auto *PrivateVD =
1089       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1090   QualType PrivateType = PrivateVD->getType();
1091   QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1092   if (needCleanups(N)) {
1093     PrivateAddr = CGF.Builder.CreateElementBitCast(
1094         PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1095     CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1096   }
1097 }
1098
1099 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1100                           LValue BaseLV) {
1101   BaseTy = BaseTy.getNonReferenceType();
1102   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1103          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1104     if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1105       BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1106     } else {
1107       LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1108       BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1109     }
1110     BaseTy = BaseTy->getPointeeType();
1111   }
1112   return CGF.MakeAddrLValue(
1113       CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1114                                        CGF.ConvertTypeForMem(ElTy)),
1115       BaseLV.getType(), BaseLV.getBaseInfo(),
1116       CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1117 }
1118
1119 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1120                           llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1121                           llvm::Value *Addr) {
1122   Address Tmp = Address::invalid();
1123   Address TopTmp = Address::invalid();
1124   Address MostTopTmp = Address::invalid();
1125   BaseTy = BaseTy.getNonReferenceType();
1126   while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1127          !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1128     Tmp = CGF.CreateMemTemp(BaseTy);
1129     if (TopTmp.isValid())
1130       CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1131     else
1132       MostTopTmp = Tmp;
1133     TopTmp = Tmp;
1134     BaseTy = BaseTy->getPointeeType();
1135   }
1136   llvm::Type *Ty = BaseLVType;
1137   if (Tmp.isValid())
1138     Ty = Tmp.getElementType();
1139   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1140   if (Tmp.isValid()) {
1141     CGF.Builder.CreateStore(Addr, Tmp);
1142     return MostTopTmp;
1143   }
1144   return Address(Addr, BaseLVAlignment);
1145 }
1146
1147 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1148   const VarDecl *OrigVD = nullptr;
1149   if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1150     const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1151     while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1152       Base = TempOASE->getBase()->IgnoreParenImpCasts();
1153     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1154       Base = TempASE->getBase()->IgnoreParenImpCasts();
1155     DE = cast<DeclRefExpr>(Base);
1156     OrigVD = cast<VarDecl>(DE->getDecl());
1157   } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1158     const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1159     while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1160       Base = TempASE->getBase()->IgnoreParenImpCasts();
1161     DE = cast<DeclRefExpr>(Base);
1162     OrigVD = cast<VarDecl>(DE->getDecl());
1163   }
1164   return OrigVD;
1165 }
1166
1167 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1168                                                Address PrivateAddr) {
1169   const DeclRefExpr *DE;
1170   if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1171     BaseDecls.emplace_back(OrigVD);
1172     LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1173     LValue BaseLValue =
1174         loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1175                     OriginalBaseLValue);
1176     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1177         BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1178     llvm::Value *PrivatePointer =
1179         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1180             PrivateAddr.getPointer(),
1181             SharedAddresses[N].first.getAddress().getType());
1182     llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1183     return castToBase(CGF, OrigVD->getType(),
1184                       SharedAddresses[N].first.getType(),
1185                       OriginalBaseLValue.getAddress().getType(),
1186                       OriginalBaseLValue.getAlignment(), Ptr);
1187   }
1188   BaseDecls.emplace_back(
1189       cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1190   return PrivateAddr;
1191 }
1192
1193 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1194   const OMPDeclareReductionDecl *DRD =
1195       getReductionInit(ClausesData[N].ReductionOp);
1196   return DRD && DRD->getInitializer();
1197 }
1198
1199 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1200   return CGF.EmitLoadOfPointerLValue(
1201       CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1202       getThreadIDVariable()->getType()->castAs<PointerType>());
1203 }
1204
1205 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1206   if (!CGF.HaveInsertPoint())
1207     return;
1208   // 1.2.2 OpenMP Language Terminology
1209   // Structured block - An executable statement with a single entry at the
1210   // top and a single exit at the bottom.
1211   // The point of exit cannot be a branch out of the structured block.
1212   // longjmp() and throw() must not violate the entry/exit criteria.
1213   CGF.EHStack.pushTerminate();
1214   CodeGen(CGF);
1215   CGF.EHStack.popTerminate();
1216 }
1217
1218 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1219     CodeGenFunction &CGF) {
1220   return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1221                             getThreadIDVariable()->getType(),
1222                             AlignmentSource::Decl);
1223 }
1224
1225 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1226                                        QualType FieldTy) {
1227   auto *Field = FieldDecl::Create(
1228       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1229       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1230       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1231   Field->setAccess(AS_public);
1232   DC->addDecl(Field);
1233   return Field;
1234 }
1235
1236 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1237                                  StringRef Separator)
1238     : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1239       OffloadEntriesInfoManager(CGM) {
1240   ASTContext &C = CGM.getContext();
1241   RecordDecl *RD = C.buildImplicitRecord("ident_t");
1242   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1243   RD->startDefinition();
1244   // reserved_1
1245   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1246   // flags
1247   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1248   // reserved_2
1249   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1250   // reserved_3
1251   addFieldToRecordDecl(C, RD, KmpInt32Ty);
1252   // psource
1253   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1254   RD->completeDefinition();
1255   IdentQTy = C.getRecordType(RD);
1256   IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1257   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1258
1259   loadOffloadInfoMetadata();
1260 }
1261
1262 void CGOpenMPRuntime::clear() {
1263   InternalVars.clear();
1264   // Clean non-target variable declarations possibly used only in debug info.
1265   for (const auto &Data : EmittedNonTargetVariables) {
1266     if (!Data.getValue().pointsToAliveValue())
1267       continue;
1268     auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1269     if (!GV)
1270       continue;
1271     if (!GV->isDeclaration() || GV->getNumUses() > 0)
1272       continue;
1273     GV->eraseFromParent();
1274   }
1275 }
1276
1277 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1278   SmallString<128> Buffer;
1279   llvm::raw_svector_ostream OS(Buffer);
1280   StringRef Sep = FirstSeparator;
1281   for (StringRef Part : Parts) {
1282     OS << Sep << Part;
1283     Sep = Separator;
1284   }
1285   return OS.str();
1286 }
1287
1288 static llvm::Function *
1289 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1290                           const Expr *CombinerInitializer, const VarDecl *In,
1291                           const VarDecl *Out, bool IsCombiner) {
1292   // void .omp_combiner.(Ty *in, Ty *out);
1293   ASTContext &C = CGM.getContext();
1294   QualType PtrTy = C.getPointerType(Ty).withRestrict();
1295   FunctionArgList Args;
1296   ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1297                                /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1298   ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1299                               /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1300   Args.push_back(&OmpOutParm);
1301   Args.push_back(&OmpInParm);
1302   const CGFunctionInfo &FnInfo =
1303       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1304   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1305   std::string Name = CGM.getOpenMPRuntime().getName(
1306       {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1307   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1308                                     Name, &CGM.getModule());
1309   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1310   if (CGM.getLangOpts().Optimize) {
1311     Fn->removeFnAttr(llvm::Attribute::NoInline);
1312     Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1313     Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1314   }
1315   CodeGenFunction CGF(CGM);
1316   // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1317   // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1318   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1319                     Out->getLocation());
1320   CodeGenFunction::OMPPrivateScope Scope(CGF);
1321   Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1322   Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1323     return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1324         .getAddress();
1325   });
1326   Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1327   Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1328     return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1329         .getAddress();
1330   });
1331   (void)Scope.Privatize();
1332   if (!IsCombiner && Out->hasInit() &&
1333       !CGF.isTrivialInitializer(Out->getInit())) {
1334     CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1335                          Out->getType().getQualifiers(),
1336                          /*IsInitializer=*/true);
1337   }
1338   if (CombinerInitializer)
1339     CGF.EmitIgnoredExpr(CombinerInitializer);
1340   Scope.ForceCleanup();
1341   CGF.FinishFunction();
1342   return Fn;
1343 }
1344
1345 void CGOpenMPRuntime::emitUserDefinedReduction(
1346     CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1347   if (UDRMap.count(D) > 0)
1348     return;
1349   llvm::Function *Combiner = emitCombinerOrInitializer(
1350       CGM, D->getType(), D->getCombiner(),
1351       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1352       cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1353       /*IsCombiner=*/true);
1354   llvm::Function *Initializer = nullptr;
1355   if (const Expr *Init = D->getInitializer()) {
1356     Initializer = emitCombinerOrInitializer(
1357         CGM, D->getType(),
1358         D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1359                                                                      : nullptr,
1360         cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1361         cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1362         /*IsCombiner=*/false);
1363   }
1364   UDRMap.try_emplace(D, Combiner, Initializer);
1365   if (CGF) {
1366     auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1367     Decls.second.push_back(D);
1368   }
1369 }
1370
1371 std::pair<llvm::Function *, llvm::Function *>
1372 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1373   auto I = UDRMap.find(D);
1374   if (I != UDRMap.end())
1375     return I->second;
1376   emitUserDefinedReduction(/*CGF=*/nullptr, D);
1377   return UDRMap.lookup(D);
1378 }
1379
1380 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1381     CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1382     const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1383     const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1384   assert(ThreadIDVar->getType()->isPointerType() &&
1385          "thread id variable must be of type kmp_int32 *");
1386   CodeGenFunction CGF(CGM, true);
1387   bool HasCancel = false;
1388   if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1389     HasCancel = OPD->hasCancel();
1390   else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1391     HasCancel = OPSD->hasCancel();
1392   else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1393     HasCancel = OPFD->hasCancel();
1394   else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1395     HasCancel = OPFD->hasCancel();
1396   else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1397     HasCancel = OPFD->hasCancel();
1398   else if (const auto *OPFD =
1399                dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1400     HasCancel = OPFD->hasCancel();
1401   else if (const auto *OPFD =
1402                dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1403     HasCancel = OPFD->hasCancel();
1404   CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1405                                     HasCancel, OutlinedHelperName);
1406   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1407   return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1408 }
1409
1410 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1411     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1412     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1413   const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1414   return emitParallelOrTeamsOutlinedFunction(
1415       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1416 }
1417
1418 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1419     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1420     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1421   const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1422   return emitParallelOrTeamsOutlinedFunction(
1423       CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1424 }
1425
1426 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1427     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1428     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1429     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1430     bool Tied, unsigned &NumberOfParts) {
1431   auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1432                                               PrePostActionTy &) {
1433     llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1434     llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1435     llvm::Value *TaskArgs[] = {
1436         UpLoc, ThreadID,
1437         CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1438                                     TaskTVar->getType()->castAs<PointerType>())
1439             .getPointer()};
1440     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1441   };
1442   CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1443                                                             UntiedCodeGen);
1444   CodeGen.setAction(Action);
1445   assert(!ThreadIDVar->getType()->isPointerType() &&
1446          "thread id variable must be of type kmp_int32 for tasks");
1447   const OpenMPDirectiveKind Region =
1448       isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1449                                                       : OMPD_task;
1450   const CapturedStmt *CS = D.getCapturedStmt(Region);
1451   const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1452   CodeGenFunction CGF(CGM, true);
1453   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1454                                         InnermostKind,
1455                                         TD ? TD->hasCancel() : false, Action);
1456   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1457   llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1458   if (!Tied)
1459     NumberOfParts = Action.getNumberOfParts();
1460   return Res;
1461 }
1462
1463 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1464                              const RecordDecl *RD, const CGRecordLayout &RL,
1465                              ArrayRef<llvm::Constant *> Data) {
1466   llvm::StructType *StructTy = RL.getLLVMType();
1467   unsigned PrevIdx = 0;
1468   ConstantInitBuilder CIBuilder(CGM);
1469   auto DI = Data.begin();
1470   for (const FieldDecl *FD : RD->fields()) {
1471     unsigned Idx = RL.getLLVMFieldNo(FD);
1472     // Fill the alignment.
1473     for (unsigned I = PrevIdx; I < Idx; ++I)
1474       Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1475     PrevIdx = Idx + 1;
1476     Fields.add(*DI);
1477     ++DI;
1478   }
1479 }
1480
1481 template <class... As>
1482 static llvm::GlobalVariable *
1483 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1484                    ArrayRef<llvm::Constant *> Data, const Twine &Name,
1485                    As &&... Args) {
1486   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488   ConstantInitBuilder CIBuilder(CGM);
1489   ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490   buildStructValue(Fields, CGM, RD, RL, Data);
1491   return Fields.finishAndCreateGlobal(
1492       Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493       std::forward<As>(Args)...);
1494 }
1495
1496 template <typename T>
1497 static void
1498 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1499                                          ArrayRef<llvm::Constant *> Data,
1500                                          T &Parent) {
1501   const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1502   const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1503   ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1504   buildStructValue(Fields, CGM, RD, RL, Data);
1505   Fields.finishAndAddTo(Parent);
1506 }
1507
1508 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1509   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1510   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1511   FlagsTy FlagsKey(Flags, Reserved2Flags);
1512   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1513   if (!Entry) {
1514     if (!DefaultOpenMPPSource) {
1515       // Initialize default location for psource field of ident_t structure of
1516       // all ident_t objects. Format is ";file;function;line;column;;".
1517       // Taken from
1518       // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1519       DefaultOpenMPPSource =
1520           CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1521       DefaultOpenMPPSource =
1522           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1523     }
1524
1525     llvm::Constant *Data[] = {
1526         llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1527         llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1528         llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1529         llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1530     llvm::GlobalValue *DefaultOpenMPLocation =
1531         createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1532                            llvm::GlobalValue::PrivateLinkage);
1533     DefaultOpenMPLocation->setUnnamedAddr(
1534         llvm::GlobalValue::UnnamedAddr::Global);
1535
1536     OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1537   }
1538   return Address(Entry, Align);
1539 }
1540
1541 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1542                                              bool AtCurrentPoint) {
1543   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1544   assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1545
1546   llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1547   if (AtCurrentPoint) {
1548     Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1549         Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1550   } else {
1551     Elem.second.ServiceInsertPt =
1552         new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1553     Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1554   }
1555 }
1556
1557 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1558   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1559   if (Elem.second.ServiceInsertPt) {
1560     llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1561     Elem.second.ServiceInsertPt = nullptr;
1562     Ptr->eraseFromParent();
1563   }
1564 }
1565
1566 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1567                                                  SourceLocation Loc,
1568                                                  unsigned Flags) {
1569   Flags |= OMP_IDENT_KMPC;
1570   // If no debug info is generated - return global default location.
1571   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1572       Loc.isInvalid())
1573     return getOrCreateDefaultLocation(Flags).getPointer();
1574
1575   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576
1577   CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1578   Address LocValue = Address::invalid();
1579   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1580   if (I != OpenMPLocThreadIDMap.end())
1581     LocValue = Address(I->second.DebugLoc, Align);
1582
1583   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1584   // GetOpenMPThreadID was called before this routine.
1585   if (!LocValue.isValid()) {
1586     // Generate "ident_t .kmpc_loc.addr;"
1587     Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1588     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1589     Elem.second.DebugLoc = AI.getPointer();
1590     LocValue = AI;
1591
1592     if (!Elem.second.ServiceInsertPt)
1593       setLocThreadIdInsertPt(CGF);
1594     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1595     CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1596     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1597                              CGF.getTypeSize(IdentQTy));
1598   }
1599
1600   // char **psource = &.kmpc_loc_<flags>.addr.psource;
1601   LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1602   auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1603   LValue PSource =
1604       CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1605
1606   llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1607   if (OMPDebugLoc == nullptr) {
1608     SmallString<128> Buffer2;
1609     llvm::raw_svector_ostream OS2(Buffer2);
1610     // Build debug location
1611     PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1612     OS2 << ";" << PLoc.getFilename() << ";";
1613     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1614       OS2 << FD->getQualifiedNameAsString();
1615     OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1616     OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1617     OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1618   }
1619   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1620   CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1621
1622   // Our callers always pass this to a runtime function, so for
1623   // convenience, go ahead and return a naked pointer.
1624   return LocValue.getPointer();
1625 }
1626
1627 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1628                                           SourceLocation Loc) {
1629   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1630
1631   llvm::Value *ThreadID = nullptr;
1632   // Check whether we've already cached a load of the thread id in this
1633   // function.
1634   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1635   if (I != OpenMPLocThreadIDMap.end()) {
1636     ThreadID = I->second.ThreadID;
1637     if (ThreadID != nullptr)
1638       return ThreadID;
1639   }
1640   // If exceptions are enabled, do not use parameter to avoid possible crash.
1641   if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1642       !CGF.getLangOpts().CXXExceptions ||
1643       CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1644     if (auto *OMPRegionInfo =
1645             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1646       if (OMPRegionInfo->getThreadIDVariable()) {
1647         // Check if this an outlined function with thread id passed as argument.
1648         LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1649         ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1650         // If value loaded in entry block, cache it and use it everywhere in
1651         // function.
1652         if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1653           auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1654           Elem.second.ThreadID = ThreadID;
1655         }
1656         return ThreadID;
1657       }
1658     }
1659   }
1660
1661   // This is not an outlined function region - need to call __kmpc_int32
1662   // kmpc_global_thread_num(ident_t *loc).
1663   // Generate thread id value and cache this value for use across the
1664   // function.
1665   auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1666   if (!Elem.second.ServiceInsertPt)
1667     setLocThreadIdInsertPt(CGF);
1668   CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1669   CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1670   llvm::CallInst *Call = CGF.Builder.CreateCall(
1671       createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1672       emitUpdateLocation(CGF, Loc));
1673   Call->setCallingConv(CGF.getRuntimeCC());
1674   Elem.second.ThreadID = Call;
1675   return Call;
1676 }
1677
1678 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1679   assert(CGF.CurFn && "No function in current CodeGenFunction.");
1680   if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1681     clearLocThreadIdInsertPt(CGF);
1682     OpenMPLocThreadIDMap.erase(CGF.CurFn);
1683   }
1684   if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1685     for(auto *D : FunctionUDRMap[CGF.CurFn])
1686       UDRMap.erase(D);
1687     FunctionUDRMap.erase(CGF.CurFn);
1688   }
1689 }
1690
1691 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1692   return IdentTy->getPointerTo();
1693 }
1694
1695 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1696   if (!Kmpc_MicroTy) {
1697     // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1698     llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1699                                  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1700     Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1701   }
1702   return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1703 }
1704
1705 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1706   llvm::FunctionCallee RTLFn = nullptr;
1707   switch (static_cast<OpenMPRTLFunction>(Function)) {
1708   case OMPRTL__kmpc_fork_call: {
1709     // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1710     // microtask, ...);
1711     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1712                                 getKmpc_MicroPointerTy()};
1713     auto *FnTy =
1714         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1715     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1716     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1717       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1718         llvm::LLVMContext &Ctx = F->getContext();
1719         llvm::MDBuilder MDB(Ctx);
1720         // Annotate the callback behavior of the __kmpc_fork_call:
1721         //  - The callback callee is argument number 2 (microtask).
1722         //  - The first two arguments of the callback callee are unknown (-1).
1723         //  - All variadic arguments to the __kmpc_fork_call are passed to the
1724         //    callback callee.
1725         F->addMetadata(
1726             llvm::LLVMContext::MD_callback,
1727             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1728                                         2, {-1, -1},
1729                                         /* VarArgsArePassed */ true)}));
1730       }
1731     }
1732     break;
1733   }
1734   case OMPRTL__kmpc_global_thread_num: {
1735     // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1736     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1737     auto *FnTy =
1738         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1739     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1740     break;
1741   }
1742   case OMPRTL__kmpc_threadprivate_cached: {
1743     // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1744     // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1745     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1746                                 CGM.VoidPtrTy, CGM.SizeTy,
1747                                 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1748     auto *FnTy =
1749         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1750     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1751     break;
1752   }
1753   case OMPRTL__kmpc_critical: {
1754     // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1755     // kmp_critical_name *crit);
1756     llvm::Type *TypeParams[] = {
1757         getIdentTyPointerTy(), CGM.Int32Ty,
1758         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1759     auto *FnTy =
1760         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1762     break;
1763   }
1764   case OMPRTL__kmpc_critical_with_hint: {
1765     // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1766     // kmp_critical_name *crit, uintptr_t hint);
1767     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1768                                 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1769                                 CGM.IntPtrTy};
1770     auto *FnTy =
1771         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1773     break;
1774   }
1775   case OMPRTL__kmpc_threadprivate_register: {
1776     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1777     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1778     // typedef void *(*kmpc_ctor)(void *);
1779     auto *KmpcCtorTy =
1780         llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1781                                 /*isVarArg*/ false)->getPointerTo();
1782     // typedef void *(*kmpc_cctor)(void *, void *);
1783     llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1784     auto *KmpcCopyCtorTy =
1785         llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1786                                 /*isVarArg*/ false)
1787             ->getPointerTo();
1788     // typedef void (*kmpc_dtor)(void *);
1789     auto *KmpcDtorTy =
1790         llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1791             ->getPointerTo();
1792     llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1793                               KmpcCopyCtorTy, KmpcDtorTy};
1794     auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1795                                         /*isVarArg*/ false);
1796     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1797     break;
1798   }
1799   case OMPRTL__kmpc_end_critical: {
1800     // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1801     // kmp_critical_name *crit);
1802     llvm::Type *TypeParams[] = {
1803         getIdentTyPointerTy(), CGM.Int32Ty,
1804         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1805     auto *FnTy =
1806         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1808     break;
1809   }
1810   case OMPRTL__kmpc_cancel_barrier: {
1811     // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1812     // global_tid);
1813     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814     auto *FnTy =
1815         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1816     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1817     break;
1818   }
1819   case OMPRTL__kmpc_barrier: {
1820     // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1821     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1822     auto *FnTy =
1823         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1824     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1825     break;
1826   }
1827   case OMPRTL__kmpc_for_static_fini: {
1828     // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1829     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1830     auto *FnTy =
1831         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1832     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1833     break;
1834   }
1835   case OMPRTL__kmpc_push_num_threads: {
1836     // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1837     // kmp_int32 num_threads)
1838     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1839                                 CGM.Int32Ty};
1840     auto *FnTy =
1841         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1843     break;
1844   }
1845   case OMPRTL__kmpc_serialized_parallel: {
1846     // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1847     // global_tid);
1848     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1849     auto *FnTy =
1850         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1851     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1852     break;
1853   }
1854   case OMPRTL__kmpc_end_serialized_parallel: {
1855     // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1856     // global_tid);
1857     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1858     auto *FnTy =
1859         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1861     break;
1862   }
1863   case OMPRTL__kmpc_flush: {
1864     // Build void __kmpc_flush(ident_t *loc);
1865     llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1866     auto *FnTy =
1867         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1868     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1869     break;
1870   }
1871   case OMPRTL__kmpc_master: {
1872     // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1873     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1874     auto *FnTy =
1875         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1876     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1877     break;
1878   }
1879   case OMPRTL__kmpc_end_master: {
1880     // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1881     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1882     auto *FnTy =
1883         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1884     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1885     break;
1886   }
1887   case OMPRTL__kmpc_omp_taskyield: {
1888     // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1889     // int end_part);
1890     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1891     auto *FnTy =
1892         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1893     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1894     break;
1895   }
1896   case OMPRTL__kmpc_single: {
1897     // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1898     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899     auto *FnTy =
1900         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1902     break;
1903   }
1904   case OMPRTL__kmpc_end_single: {
1905     // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1906     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1907     auto *FnTy =
1908         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1910     break;
1911   }
1912   case OMPRTL__kmpc_omp_task_alloc: {
1913     // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1914     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1915     // kmp_routine_entry_t *task_entry);
1916     assert(KmpRoutineEntryPtrTy != nullptr &&
1917            "Type kmp_routine_entry_t must be created.");
1918     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1919                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1920     // Return void * and then cast to particular kmp_task_t type.
1921     auto *FnTy =
1922         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1923     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1924     break;
1925   }
1926   case OMPRTL__kmpc_omp_target_task_alloc: {
1927     // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1928     // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1929     // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1930     assert(KmpRoutineEntryPtrTy != nullptr &&
1931            "Type kmp_routine_entry_t must be created.");
1932     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1933                                 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1934                                 CGM.Int64Ty};
1935     // Return void * and then cast to particular kmp_task_t type.
1936     auto *FnTy =
1937         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1938     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1939     break;
1940   }
1941   case OMPRTL__kmpc_omp_task: {
1942     // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1943     // *new_task);
1944     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1945                                 CGM.VoidPtrTy};
1946     auto *FnTy =
1947         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1948     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1949     break;
1950   }
1951   case OMPRTL__kmpc_copyprivate: {
1952     // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1953     // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1954     // kmp_int32 didit);
1955     llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1956     auto *CpyFnTy =
1957         llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1958     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1959                                 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1960                                 CGM.Int32Ty};
1961     auto *FnTy =
1962         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1963     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1964     break;
1965   }
1966   case OMPRTL__kmpc_reduce: {
1967     // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1968     // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1969     // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1970     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1971     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1972                                                /*isVarArg=*/false);
1973     llvm::Type *TypeParams[] = {
1974         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1975         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1976         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1977     auto *FnTy =
1978         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1979     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1980     break;
1981   }
1982   case OMPRTL__kmpc_reduce_nowait: {
1983     // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1984     // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1985     // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1986     // *lck);
1987     llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1988     auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1989                                                /*isVarArg=*/false);
1990     llvm::Type *TypeParams[] = {
1991         getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1992         CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1993         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1994     auto *FnTy =
1995         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1996     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1997     break;
1998   }
1999   case OMPRTL__kmpc_end_reduce: {
2000     // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2001     // kmp_critical_name *lck);
2002     llvm::Type *TypeParams[] = {
2003         getIdentTyPointerTy(), CGM.Int32Ty,
2004         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005     auto *FnTy =
2006         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2008     break;
2009   }
2010   case OMPRTL__kmpc_end_reduce_nowait: {
2011     // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2012     // kmp_critical_name *lck);
2013     llvm::Type *TypeParams[] = {
2014         getIdentTyPointerTy(), CGM.Int32Ty,
2015         llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016     auto *FnTy =
2017         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018     RTLFn =
2019         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2020     break;
2021   }
2022   case OMPRTL__kmpc_omp_task_begin_if0: {
2023     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2024     // *new_task);
2025     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2026                                 CGM.VoidPtrTy};
2027     auto *FnTy =
2028         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029     RTLFn =
2030         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2031     break;
2032   }
2033   case OMPRTL__kmpc_omp_task_complete_if0: {
2034     // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035     // *new_task);
2036     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037                                 CGM.VoidPtrTy};
2038     auto *FnTy =
2039         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040     RTLFn = CGM.CreateRuntimeFunction(FnTy,
2041                                       /*Name=*/"__kmpc_omp_task_complete_if0");
2042     break;
2043   }
2044   case OMPRTL__kmpc_ordered: {
2045     // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2046     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2047     auto *FnTy =
2048         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2050     break;
2051   }
2052   case OMPRTL__kmpc_end_ordered: {
2053     // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2054     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2055     auto *FnTy =
2056         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2057     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2058     break;
2059   }
2060   case OMPRTL__kmpc_omp_taskwait: {
2061     // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2062     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2063     auto *FnTy =
2064         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2065     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2066     break;
2067   }
2068   case OMPRTL__kmpc_taskgroup: {
2069     // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2070     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2071     auto *FnTy =
2072         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2074     break;
2075   }
2076   case OMPRTL__kmpc_end_taskgroup: {
2077     // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2078     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2079     auto *FnTy =
2080         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2081     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2082     break;
2083   }
2084   case OMPRTL__kmpc_push_proc_bind: {
2085     // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2086     // int proc_bind)
2087     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2088     auto *FnTy =
2089         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2090     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2091     break;
2092   }
2093   case OMPRTL__kmpc_omp_task_with_deps: {
2094     // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2095     // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2096     // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2097     llvm::Type *TypeParams[] = {
2098         getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2099         CGM.VoidPtrTy,         CGM.Int32Ty, CGM.VoidPtrTy};
2100     auto *FnTy =
2101         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2102     RTLFn =
2103         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2104     break;
2105   }
2106   case OMPRTL__kmpc_omp_wait_deps: {
2107     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2108     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2109     // kmp_depend_info_t *noalias_dep_list);
2110     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2111                                 CGM.Int32Ty,           CGM.VoidPtrTy,
2112                                 CGM.Int32Ty,           CGM.VoidPtrTy};
2113     auto *FnTy =
2114         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2115     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2116     break;
2117   }
2118   case OMPRTL__kmpc_cancellationpoint: {
2119     // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2120     // global_tid, kmp_int32 cncl_kind)
2121     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2122     auto *FnTy =
2123         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2124     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2125     break;
2126   }
2127   case OMPRTL__kmpc_cancel: {
2128     // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2129     // kmp_int32 cncl_kind)
2130     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2131     auto *FnTy =
2132         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2133     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2134     break;
2135   }
2136   case OMPRTL__kmpc_push_num_teams: {
2137     // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2138     // kmp_int32 num_teams, kmp_int32 num_threads)
2139     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2140         CGM.Int32Ty};
2141     auto *FnTy =
2142         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2143     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2144     break;
2145   }
2146   case OMPRTL__kmpc_fork_teams: {
2147     // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2148     // microtask, ...);
2149     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150                                 getKmpc_MicroPointerTy()};
2151     auto *FnTy =
2152         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2153     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2154     if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2155       if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2156         llvm::LLVMContext &Ctx = F->getContext();
2157         llvm::MDBuilder MDB(Ctx);
2158         // Annotate the callback behavior of the __kmpc_fork_teams:
2159         //  - The callback callee is argument number 2 (microtask).
2160         //  - The first two arguments of the callback callee are unknown (-1).
2161         //  - All variadic arguments to the __kmpc_fork_teams are passed to the
2162         //    callback callee.
2163         F->addMetadata(
2164             llvm::LLVMContext::MD_callback,
2165             *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2166                                         2, {-1, -1},
2167                                         /* VarArgsArePassed */ true)}));
2168       }
2169     }
2170     break;
2171   }
2172   case OMPRTL__kmpc_taskloop: {
2173     // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2174     // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2175     // sched, kmp_uint64 grainsize, void *task_dup);
2176     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2177                                 CGM.IntTy,
2178                                 CGM.VoidPtrTy,
2179                                 CGM.IntTy,
2180                                 CGM.Int64Ty->getPointerTo(),
2181                                 CGM.Int64Ty->getPointerTo(),
2182                                 CGM.Int64Ty,
2183                                 CGM.IntTy,
2184                                 CGM.IntTy,
2185                                 CGM.Int64Ty,
2186                                 CGM.VoidPtrTy};
2187     auto *FnTy =
2188         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2189     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2190     break;
2191   }
2192   case OMPRTL__kmpc_doacross_init: {
2193     // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2194     // num_dims, struct kmp_dim *dims);
2195     llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2196                                 CGM.Int32Ty,
2197                                 CGM.Int32Ty,
2198                                 CGM.VoidPtrTy};
2199     auto *FnTy =
2200         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2201     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2202     break;
2203   }
2204   case OMPRTL__kmpc_doacross_fini: {
2205     // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2206     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2207     auto *FnTy =
2208         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2209     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2210     break;
2211   }
2212   case OMPRTL__kmpc_doacross_post: {
2213     // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2214     // *vec);
2215     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2216                                 CGM.Int64Ty->getPointerTo()};
2217     auto *FnTy =
2218         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2220     break;
2221   }
2222   case OMPRTL__kmpc_doacross_wait: {
2223     // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2224     // *vec);
2225     llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2226                                 CGM.Int64Ty->getPointerTo()};
2227     auto *FnTy =
2228         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2229     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2230     break;
2231   }
2232   case OMPRTL__kmpc_task_reduction_init: {
2233     // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2234     // *data);
2235     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2236     auto *FnTy =
2237         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2238     RTLFn =
2239         CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2240     break;
2241   }
2242   case OMPRTL__kmpc_task_reduction_get_th_data: {
2243     // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2244     // *d);
2245     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2246     auto *FnTy =
2247         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2248     RTLFn = CGM.CreateRuntimeFunction(
2249         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2250     break;
2251   }
2252   case OMPRTL__kmpc_alloc: {
2253     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2254     // al); omp_allocator_handle_t type is void *.
2255     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2256     auto *FnTy =
2257         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2258     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2259     break;
2260   }
2261   case OMPRTL__kmpc_free: {
2262     // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2263     // al); omp_allocator_handle_t type is void *.
2264     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2265     auto *FnTy =
2266         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2267     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2268     break;
2269   }
2270   case OMPRTL__kmpc_push_target_tripcount: {
2271     // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2272     // size);
2273     llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2274     llvm::FunctionType *FnTy =
2275         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2276     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2277     break;
2278   }
2279   case OMPRTL__tgt_target: {
2280     // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2281     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2282     // *arg_types);
2283     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284                                 CGM.VoidPtrTy,
2285                                 CGM.Int32Ty,
2286                                 CGM.VoidPtrPtrTy,
2287                                 CGM.VoidPtrPtrTy,
2288                                 CGM.Int64Ty->getPointerTo(),
2289                                 CGM.Int64Ty->getPointerTo()};
2290     auto *FnTy =
2291         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2292     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2293     break;
2294   }
2295   case OMPRTL__tgt_target_nowait: {
2296     // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2297     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2298     // int64_t *arg_types);
2299     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2300                                 CGM.VoidPtrTy,
2301                                 CGM.Int32Ty,
2302                                 CGM.VoidPtrPtrTy,
2303                                 CGM.VoidPtrPtrTy,
2304                                 CGM.Int64Ty->getPointerTo(),
2305                                 CGM.Int64Ty->getPointerTo()};
2306     auto *FnTy =
2307         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2308     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2309     break;
2310   }
2311   case OMPRTL__tgt_target_teams: {
2312     // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2313     // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2314     // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2315     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2316                                 CGM.VoidPtrTy,
2317                                 CGM.Int32Ty,
2318                                 CGM.VoidPtrPtrTy,
2319                                 CGM.VoidPtrPtrTy,
2320                                 CGM.Int64Ty->getPointerTo(),
2321                                 CGM.Int64Ty->getPointerTo(),
2322                                 CGM.Int32Ty,
2323                                 CGM.Int32Ty};
2324     auto *FnTy =
2325         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2326     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2327     break;
2328   }
2329   case OMPRTL__tgt_target_teams_nowait: {
2330     // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2331     // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2332     // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2333     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2334                                 CGM.VoidPtrTy,
2335                                 CGM.Int32Ty,
2336                                 CGM.VoidPtrPtrTy,
2337                                 CGM.VoidPtrPtrTy,
2338                                 CGM.Int64Ty->getPointerTo(),
2339                                 CGM.Int64Ty->getPointerTo(),
2340                                 CGM.Int32Ty,
2341                                 CGM.Int32Ty};
2342     auto *FnTy =
2343         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2344     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2345     break;
2346   }
2347   case OMPRTL__tgt_register_requires: {
2348     // Build void __tgt_register_requires(int64_t flags);
2349     llvm::Type *TypeParams[] = {CGM.Int64Ty};
2350     auto *FnTy =
2351         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2352     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2353     break;
2354   }
2355   case OMPRTL__tgt_register_lib: {
2356     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2357     QualType ParamTy =
2358         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2359     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2360     auto *FnTy =
2361         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2363     break;
2364   }
2365   case OMPRTL__tgt_unregister_lib: {
2366     // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2367     QualType ParamTy =
2368         CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2369     llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2370     auto *FnTy =
2371         llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2372     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2373     break;
2374   }
2375   case OMPRTL__tgt_target_data_begin: {
2376     // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2377     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2378     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379                                 CGM.Int32Ty,
2380                                 CGM.VoidPtrPtrTy,
2381                                 CGM.VoidPtrPtrTy,
2382                                 CGM.Int64Ty->getPointerTo(),
2383                                 CGM.Int64Ty->getPointerTo()};
2384     auto *FnTy =
2385         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2387     break;
2388   }
2389   case OMPRTL__tgt_target_data_begin_nowait: {
2390     // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2391     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2392     // *arg_types);
2393     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394                                 CGM.Int32Ty,
2395                                 CGM.VoidPtrPtrTy,
2396                                 CGM.VoidPtrPtrTy,
2397                                 CGM.Int64Ty->getPointerTo(),
2398                                 CGM.Int64Ty->getPointerTo()};
2399     auto *FnTy =
2400         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2402     break;
2403   }
2404   case OMPRTL__tgt_target_data_end: {
2405     // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2406     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2407     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408                                 CGM.Int32Ty,
2409                                 CGM.VoidPtrPtrTy,
2410                                 CGM.VoidPtrPtrTy,
2411                                 CGM.Int64Ty->getPointerTo(),
2412                                 CGM.Int64Ty->getPointerTo()};
2413     auto *FnTy =
2414         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2415     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2416     break;
2417   }
2418   case OMPRTL__tgt_target_data_end_nowait: {
2419     // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2420     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2421     // *arg_types);
2422     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2423                                 CGM.Int32Ty,
2424                                 CGM.VoidPtrPtrTy,
2425                                 CGM.VoidPtrPtrTy,
2426                                 CGM.Int64Ty->getPointerTo(),
2427                                 CGM.Int64Ty->getPointerTo()};
2428     auto *FnTy =
2429         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2430     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2431     break;
2432   }
2433   case OMPRTL__tgt_target_data_update: {
2434     // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2435     // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2436     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2437                                 CGM.Int32Ty,
2438                                 CGM.VoidPtrPtrTy,
2439                                 CGM.VoidPtrPtrTy,
2440                                 CGM.Int64Ty->getPointerTo(),
2441                                 CGM.Int64Ty->getPointerTo()};
2442     auto *FnTy =
2443         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2444     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2445     break;
2446   }
2447   case OMPRTL__tgt_target_data_update_nowait: {
2448     // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2449     // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2450     // *arg_types);
2451     llvm::Type *TypeParams[] = {CGM.Int64Ty,
2452                                 CGM.Int32Ty,
2453                                 CGM.VoidPtrPtrTy,
2454                                 CGM.VoidPtrPtrTy,
2455                                 CGM.Int64Ty->getPointerTo(),
2456                                 CGM.Int64Ty->getPointerTo()};
2457     auto *FnTy =
2458         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2459     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2460     break;
2461   }
2462   }
2463   assert(RTLFn && "Unable to find OpenMP runtime function");
2464   return RTLFn;
2465 }
2466
2467 llvm::FunctionCallee
2468 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2469   assert((IVSize == 32 || IVSize == 64) &&
2470          "IV size is not compatible with the omp runtime");
2471   StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2472                                             : "__kmpc_for_static_init_4u")
2473                                 : (IVSigned ? "__kmpc_for_static_init_8"
2474                                             : "__kmpc_for_static_init_8u");
2475   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2476   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2477   llvm::Type *TypeParams[] = {
2478     getIdentTyPointerTy(),                     // loc
2479     CGM.Int32Ty,                               // tid
2480     CGM.Int32Ty,                               // schedtype
2481     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2482     PtrTy,                                     // p_lower
2483     PtrTy,                                     // p_upper
2484     PtrTy,                                     // p_stride
2485     ITy,                                       // incr
2486     ITy                                        // chunk
2487   };
2488   auto *FnTy =
2489       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490   return CGM.CreateRuntimeFunction(FnTy, Name);
2491 }
2492
2493 llvm::FunctionCallee
2494 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2495   assert((IVSize == 32 || IVSize == 64) &&
2496          "IV size is not compatible with the omp runtime");
2497   StringRef Name =
2498       IVSize == 32
2499           ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2500           : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2501   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2502   llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2503                                CGM.Int32Ty,           // tid
2504                                CGM.Int32Ty,           // schedtype
2505                                ITy,                   // lower
2506                                ITy,                   // upper
2507                                ITy,                   // stride
2508                                ITy                    // chunk
2509   };
2510   auto *FnTy =
2511       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512   return CGM.CreateRuntimeFunction(FnTy, Name);
2513 }
2514
2515 llvm::FunctionCallee
2516 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2517   assert((IVSize == 32 || IVSize == 64) &&
2518          "IV size is not compatible with the omp runtime");
2519   StringRef Name =
2520       IVSize == 32
2521           ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2522           : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2523   llvm::Type *TypeParams[] = {
2524       getIdentTyPointerTy(), // loc
2525       CGM.Int32Ty,           // tid
2526   };
2527   auto *FnTy =
2528       llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529   return CGM.CreateRuntimeFunction(FnTy, Name);
2530 }
2531
2532 llvm::FunctionCallee
2533 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2534   assert((IVSize == 32 || IVSize == 64) &&
2535          "IV size is not compatible with the omp runtime");
2536   StringRef Name =
2537       IVSize == 32
2538           ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2539           : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2540   llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2541   auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2542   llvm::Type *TypeParams[] = {
2543     getIdentTyPointerTy(),                     // loc
2544     CGM.Int32Ty,                               // tid
2545     llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2546     PtrTy,                                     // p_lower
2547     PtrTy,                                     // p_upper
2548     PtrTy                                      // p_stride
2549   };
2550   auto *FnTy =
2551       llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2552   return CGM.CreateRuntimeFunction(FnTy, Name);
2553 }
2554
2555 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2556   if (CGM.getLangOpts().OpenMPSimd)
2557     return Address::invalid();
2558   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2559       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2560   if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2561               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2562                HasRequiresUnifiedSharedMemory))) {
2563     SmallString<64> PtrName;
2564     {
2565       llvm::raw_svector_ostream OS(PtrName);
2566       OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
2567     }
2568     llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2569     if (!Ptr) {
2570       QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2571       Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2572                                         PtrName);
2573       if (!CGM.getLangOpts().OpenMPIsDevice) {
2574         auto *GV = cast<llvm::GlobalVariable>(Ptr);
2575         GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2576         GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2577       }
2578       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2579       registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2580     }
2581     return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2582   }
2583   return Address::invalid();
2584 }
2585
2586 llvm::Constant *
2587 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2588   assert(!CGM.getLangOpts().OpenMPUseTLS ||
2589          !CGM.getContext().getTargetInfo().isTLSSupported());
2590   // Lookup the entry, lazily creating it if necessary.
2591   std::string Suffix = getName({"cache", ""});
2592   return getOrCreateInternalVariable(
2593       CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2594 }
2595
2596 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2597                                                 const VarDecl *VD,
2598                                                 Address VDAddr,
2599                                                 SourceLocation Loc) {
2600   if (CGM.getLangOpts().OpenMPUseTLS &&
2601       CGM.getContext().getTargetInfo().isTLSSupported())
2602     return VDAddr;
2603
2604   llvm::Type *VarTy = VDAddr.getElementType();
2605   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2606                          CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2607                                                        CGM.Int8PtrTy),
2608                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2609                          getOrCreateThreadPrivateCache(VD)};
2610   return Address(CGF.EmitRuntimeCall(
2611       createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2612                  VDAddr.getAlignment());
2613 }
2614
2615 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2616     CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2617     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2618   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2619   // library.
2620   llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2621   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2622                       OMPLoc);
2623   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2624   // to register constructor/destructor for variable.
2625   llvm::Value *Args[] = {
2626       OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2627       Ctor, CopyCtor, Dtor};
2628   CGF.EmitRuntimeCall(
2629       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2630 }
2631
2632 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2633     const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2634     bool PerformInit, CodeGenFunction *CGF) {
2635   if (CGM.getLangOpts().OpenMPUseTLS &&
2636       CGM.getContext().getTargetInfo().isTLSSupported())
2637     return nullptr;
2638
2639   VD = VD->getDefinition(CGM.getContext());
2640   if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2641     QualType ASTTy = VD->getType();
2642
2643     llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2644     const Expr *Init = VD->getAnyInitializer();
2645     if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2646       // Generate function that re-emits the declaration's initializer into the
2647       // threadprivate copy of the variable VD
2648       CodeGenFunction CtorCGF(CGM);
2649       FunctionArgList Args;
2650       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2651                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2652                             ImplicitParamDecl::Other);
2653       Args.push_back(&Dst);
2654
2655       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2656           CGM.getContext().VoidPtrTy, Args);
2657       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2658       std::string Name = getName({"__kmpc_global_ctor_", ""});
2659       llvm::Function *Fn =
2660           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2661       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2662                             Args, Loc, Loc);
2663       llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2664           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2665           CGM.getContext().VoidPtrTy, Dst.getLocation());
2666       Address Arg = Address(ArgVal, VDAddr.getAlignment());
2667       Arg = CtorCGF.Builder.CreateElementBitCast(
2668           Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2669       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2670                                /*IsInitializer=*/true);
2671       ArgVal = CtorCGF.EmitLoadOfScalar(
2672           CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2673           CGM.getContext().VoidPtrTy, Dst.getLocation());
2674       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2675       CtorCGF.FinishFunction();
2676       Ctor = Fn;
2677     }
2678     if (VD->getType().isDestructedType() != QualType::DK_none) {
2679       // Generate function that emits destructor call for the threadprivate copy
2680       // of the variable VD
2681       CodeGenFunction DtorCGF(CGM);
2682       FunctionArgList Args;
2683       ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2684                             /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2685                             ImplicitParamDecl::Other);
2686       Args.push_back(&Dst);
2687
2688       const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2689           CGM.getContext().VoidTy, Args);
2690       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2691       std::string Name = getName({"__kmpc_global_dtor_", ""});
2692       llvm::Function *Fn =
2693           CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2694       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2695       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2696                             Loc, Loc);
2697       // Create a scope with an artificial location for the body of this function.
2698       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2699       llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2700           DtorCGF.GetAddrOfLocalVar(&Dst),
2701           /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2702       DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2703                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2704                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2705       DtorCGF.FinishFunction();
2706       Dtor = Fn;
2707     }
2708     // Do not emit init function if it is not required.
2709     if (!Ctor && !Dtor)
2710       return nullptr;
2711
2712     llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2713     auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2714                                                /*isVarArg=*/false)
2715                            ->getPointerTo();
2716     // Copying constructor for the threadprivate variable.
2717     // Must be NULL - reserved by runtime, but currently it requires that this
2718     // parameter is always NULL. Otherwise it fires assertion.
2719     CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2720     if (Ctor == nullptr) {
2721       auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2722                                              /*isVarArg=*/false)
2723                          ->getPointerTo();
2724       Ctor = llvm::Constant::getNullValue(CtorTy);
2725     }
2726     if (Dtor == nullptr) {
2727       auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2728                                              /*isVarArg=*/false)
2729                          ->getPointerTo();
2730       Dtor = llvm::Constant::getNullValue(DtorTy);
2731     }
2732     if (!CGF) {
2733       auto *InitFunctionTy =
2734           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2735       std::string Name = getName({"__omp_threadprivate_init_", ""});
2736       llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2737           InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2738       CodeGenFunction InitCGF(CGM);
2739       FunctionArgList ArgList;
2740       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2741                             CGM.getTypes().arrangeNullaryFunction(), ArgList,
2742                             Loc, Loc);
2743       emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2744       InitCGF.FinishFunction();
2745       return InitFunction;
2746     }
2747     emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2748   }
2749   return nullptr;
2750 }
2751
2752 /// Obtain information that uniquely identifies a target entry. This
2753 /// consists of the file and device IDs as well as line number associated with
2754 /// the relevant entry source location.
2755 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2756                                      unsigned &DeviceID, unsigned &FileID,
2757                                      unsigned &LineNum) {
2758   SourceManager &SM = C.getSourceManager();
2759
2760   // The loc should be always valid and have a file ID (the user cannot use
2761   // #pragma directives in macros)
2762
2763   assert(Loc.isValid() && "Source location is expected to be always valid.");
2764
2765   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2766   assert(PLoc.isValid() && "Source location is expected to be always valid.");
2767
2768   llvm::sys::fs::UniqueID ID;
2769   if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2770     SM.getDiagnostics().Report(diag::err_cannot_open_file)
2771         << PLoc.getFilename() << EC.message();
2772
2773   DeviceID = ID.getDevice();
2774   FileID = ID.getFile();
2775   LineNum = PLoc.getLine();
2776 }
2777
2778 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2779                                                      llvm::GlobalVariable *Addr,
2780                                                      bool PerformInit) {
2781   Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2782       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2783   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2784       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2785        HasRequiresUnifiedSharedMemory))
2786     return CGM.getLangOpts().OpenMPIsDevice;
2787   VD = VD->getDefinition(CGM.getContext());
2788   if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2789     return CGM.getLangOpts().OpenMPIsDevice;
2790
2791   QualType ASTTy = VD->getType();
2792
2793   SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2794   // Produce the unique prefix to identify the new target regions. We use
2795   // the source location of the variable declaration which we know to not
2796   // conflict with any target region.
2797   unsigned DeviceID;
2798   unsigned FileID;
2799   unsigned Line;
2800   getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2801   SmallString<128> Buffer, Out;
2802   {
2803     llvm::raw_svector_ostream OS(Buffer);
2804     OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2805        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2806   }
2807
2808   const Expr *Init = VD->getAnyInitializer();
2809   if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2810     llvm::Constant *Ctor;
2811     llvm::Constant *ID;
2812     if (CGM.getLangOpts().OpenMPIsDevice) {
2813       // Generate function that re-emits the declaration's initializer into
2814       // the threadprivate copy of the variable VD
2815       CodeGenFunction CtorCGF(CGM);
2816
2817       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2818       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2819       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2820           FTy, Twine(Buffer, "_ctor"), FI, Loc);
2821       auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2822       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2823                             FunctionArgList(), Loc, Loc);
2824       auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2825       CtorCGF.EmitAnyExprToMem(Init,
2826                                Address(Addr, CGM.getContext().getDeclAlign(VD)),
2827                                Init->getType().getQualifiers(),
2828                                /*IsInitializer=*/true);
2829       CtorCGF.FinishFunction();
2830       Ctor = Fn;
2831       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2832       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2833     } else {
2834       Ctor = new llvm::GlobalVariable(
2835           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2836           llvm::GlobalValue::PrivateLinkage,
2837           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2838       ID = Ctor;
2839     }
2840
2841     // Register the information for the entry associated with the constructor.
2842     Out.clear();
2843     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2844         DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2845         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2846   }
2847   if (VD->getType().isDestructedType() != QualType::DK_none) {
2848     llvm::Constant *Dtor;
2849     llvm::Constant *ID;
2850     if (CGM.getLangOpts().OpenMPIsDevice) {
2851       // Generate function that emits destructor call for the threadprivate
2852       // copy of the variable VD
2853       CodeGenFunction DtorCGF(CGM);
2854
2855       const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2856       llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2857       llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2858           FTy, Twine(Buffer, "_dtor"), FI, Loc);
2859       auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2860       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2861                             FunctionArgList(), Loc, Loc);
2862       // Create a scope with an artificial location for the body of this
2863       // function.
2864       auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2865       DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2866                           ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2867                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2868       DtorCGF.FinishFunction();
2869       Dtor = Fn;
2870       ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2871       CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2872     } else {
2873       Dtor = new llvm::GlobalVariable(
2874           CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2875           llvm::GlobalValue::PrivateLinkage,
2876           llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2877       ID = Dtor;
2878     }
2879     // Register the information for the entry associated with the destructor.
2880     Out.clear();
2881     OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2882         DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2883         ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2884   }
2885   return CGM.getLangOpts().OpenMPIsDevice;
2886 }
2887
2888 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2889                                                           QualType VarType,
2890                                                           StringRef Name) {
2891   std::string Suffix = getName({"artificial", ""});
2892   std::string CacheSuffix = getName({"cache", ""});
2893   llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2894   llvm::Value *GAddr =
2895       getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2896   llvm::Value *Args[] = {
2897       emitUpdateLocation(CGF, SourceLocation()),
2898       getThreadID(CGF, SourceLocation()),
2899       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2900       CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2901                                 /*isSigned=*/false),
2902       getOrCreateInternalVariable(
2903           CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2904   return Address(
2905       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2906           CGF.EmitRuntimeCall(
2907               createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2908           VarLVType->getPointerTo(/*AddrSpace=*/0)),
2909       CGM.getPointerAlign());
2910 }
2911
2912 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2913                                       const RegionCodeGenTy &ThenGen,
2914                                       const RegionCodeGenTy &ElseGen) {
2915   CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2916
2917   // If the condition constant folds and can be elided, try to avoid emitting
2918   // the condition and the dead arm of the if/else.
2919   bool CondConstant;
2920   if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2921     if (CondConstant)
2922       ThenGen(CGF);
2923     else
2924       ElseGen(CGF);
2925     return;
2926   }
2927
2928   // Otherwise, the condition did not fold, or we couldn't elide it.  Just
2929   // emit the conditional branch.
2930   llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2931   llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2932   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2933   CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2934
2935   // Emit the 'then' code.
2936   CGF.EmitBlock(ThenBlock);
2937   ThenGen(CGF);
2938   CGF.EmitBranch(ContBlock);
2939   // Emit the 'else' code if present.
2940   // There is no need to emit line number for unconditional branch.
2941   (void)ApplyDebugLocation::CreateEmpty(CGF);
2942   CGF.EmitBlock(ElseBlock);
2943   ElseGen(CGF);
2944   // There is no need to emit line number for unconditional branch.
2945   (void)ApplyDebugLocation::CreateEmpty(CGF);
2946   CGF.EmitBranch(ContBlock);
2947   // Emit the continuation block for code after the if.
2948   CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2949 }
2950
2951 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2952                                        llvm::Function *OutlinedFn,
2953                                        ArrayRef<llvm::Value *> CapturedVars,
2954                                        const Expr *IfCond) {
2955   if (!CGF.HaveInsertPoint())
2956     return;
2957   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2958   auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2959                                                      PrePostActionTy &) {
2960     // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2961     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2962     llvm::Value *Args[] = {
2963         RTLoc,
2964         CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2965         CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2966     llvm::SmallVector<llvm::Value *, 16> RealArgs;
2967     RealArgs.append(std::begin(Args), std::end(Args));
2968     RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2969
2970     llvm::FunctionCallee RTLFn =
2971         RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2972     CGF.EmitRuntimeCall(RTLFn, RealArgs);
2973   };
2974   auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2975                                                           PrePostActionTy &) {
2976     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2977     llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2978     // Build calls:
2979     // __kmpc_serialized_parallel(&Loc, GTid);
2980     llvm::Value *Args[] = {RTLoc, ThreadID};
2981     CGF.EmitRuntimeCall(
2982         RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2983
2984     // OutlinedFn(&GTid, &zero, CapturedStruct);
2985     Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2986                                                         /*Name*/ ".zero.addr");
2987     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2988     llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2989     // ThreadId for serialized parallels is 0.
2990     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2991     OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2992     OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2993     RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2994
2995     // __kmpc_end_serialized_parallel(&Loc, GTid);
2996     llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2997     CGF.EmitRuntimeCall(
2998         RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2999         EndArgs);
3000   };
3001   if (IfCond) {
3002     emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3003   } else {
3004     RegionCodeGenTy ThenRCG(ThenGen);
3005     ThenRCG(CGF);
3006   }
3007 }
3008
3009 // If we're inside an (outlined) parallel region, use the region info's
3010 // thread-ID variable (it is passed in a first argument of the outlined function
3011 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3012 // regular serial code region, get thread ID by calling kmp_int32
3013 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3014 // return the address of that temp.
3015 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3016                                              SourceLocation Loc) {
3017   if (auto *OMPRegionInfo =
3018           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3019     if (OMPRegionInfo->getThreadIDVariable())
3020       return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3021
3022   llvm::Value *ThreadID = getThreadID(CGF, Loc);
3023   QualType Int32Ty =
3024       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3025   Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3026   CGF.EmitStoreOfScalar(ThreadID,
3027                         CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3028
3029   return ThreadIDTemp;
3030 }
3031
3032 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3033     llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3034   SmallString<256> Buffer;
3035   llvm::raw_svector_ostream Out(Buffer);
3036   Out << Name;
3037   StringRef RuntimeName = Out.str();
3038   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3039   if (Elem.second) {
3040     assert(Elem.second->getType()->getPointerElementType() == Ty &&
3041            "OMP internal variable has different type than requested");
3042     return &*Elem.second;
3043   }
3044
3045   return Elem.second = new llvm::GlobalVariable(
3046              CGM.getModule(), Ty, /*IsConstant*/ false,
3047              llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3048              Elem.first(), /*InsertBefore=*/nullptr,
3049              llvm::GlobalValue::NotThreadLocal, AddressSpace);
3050 }
3051
3052 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3053   std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3054   std::string Name = getName({Prefix, "var"});
3055   return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3056 }
3057
3058 namespace {
3059 /// Common pre(post)-action for different OpenMP constructs.
3060 class CommonActionTy final : public PrePostActionTy {
3061   llvm::FunctionCallee EnterCallee;
3062   ArrayRef<llvm::Value *> EnterArgs;
3063   llvm::FunctionCallee ExitCallee;
3064   ArrayRef<llvm::Value *> ExitArgs;
3065   bool Conditional;
3066   llvm::BasicBlock *ContBlock = nullptr;
3067
3068 public:
3069   CommonActionTy(llvm::FunctionCallee EnterCallee,
3070                  ArrayRef<llvm::Value *> EnterArgs,
3071                  llvm::FunctionCallee ExitCallee,
3072                  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3073       : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3074         ExitArgs(ExitArgs), Conditional(Conditional) {}
3075   void Enter(CodeGenFunction &CGF) override {
3076     llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3077     if (Conditional) {
3078       llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3079       auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3080       ContBlock = CGF.createBasicBlock("omp_if.end");
3081       // Generate the branch (If-stmt)
3082       CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3083       CGF.EmitBlock(ThenBlock);
3084     }
3085   }
3086   void Done(CodeGenFunction &CGF) {
3087     // Emit the rest of blocks/branches
3088     CGF.EmitBranch(ContBlock);
3089     CGF.EmitBlock(ContBlock, true);
3090   }
3091   void Exit(CodeGenFunction &CGF) override {
3092     CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3093   }
3094 };
3095 } // anonymous namespace
3096
3097 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3098                                          StringRef CriticalName,
3099                                          const RegionCodeGenTy &CriticalOpGen,
3100                                          SourceLocation Loc, const Expr *Hint) {
3101   // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3102   // CriticalOpGen();
3103   // __kmpc_end_critical(ident_t *, gtid, Lock);
3104   // Prepare arguments and build a call to __kmpc_critical
3105   if (!CGF.HaveInsertPoint())
3106     return;
3107   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3108                          getCriticalRegionLock(CriticalName)};
3109   llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3110                                                 std::end(Args));
3111   if (Hint) {
3112     EnterArgs.push_back(CGF.Builder.CreateIntCast(
3113         CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3114   }
3115   CommonActionTy Action(
3116       createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3117                                  : OMPRTL__kmpc_critical),
3118       EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3119   CriticalOpGen.setAction(Action);
3120   emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3121 }
3122
3123 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3124                                        const RegionCodeGenTy &MasterOpGen,
3125                                        SourceLocation Loc) {
3126   if (!CGF.HaveInsertPoint())
3127     return;
3128   // if(__kmpc_master(ident_t *, gtid)) {
3129   //   MasterOpGen();
3130   //   __kmpc_end_master(ident_t *, gtid);
3131   // }
3132   // Prepare arguments and build a call to __kmpc_master
3133   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3134   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3135                         createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3136                         /*Conditional=*/true);
3137   MasterOpGen.setAction(Action);
3138   emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3139   Action.Done(CGF);
3140 }
3141
3142 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3143                                         SourceLocation Loc) {
3144   if (!CGF.HaveInsertPoint())
3145     return;
3146   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3147   llvm::Value *Args[] = {
3148       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3149       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3150   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3151   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3152     Region->emitUntiedSwitch(CGF);
3153 }
3154
3155 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3156                                           const RegionCodeGenTy &TaskgroupOpGen,
3157                                           SourceLocation Loc) {
3158   if (!CGF.HaveInsertPoint())
3159     return;
3160   // __kmpc_taskgroup(ident_t *, gtid);
3161   // TaskgroupOpGen();
3162   // __kmpc_end_taskgroup(ident_t *, gtid);
3163   // Prepare arguments and build a call to __kmpc_taskgroup
3164   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3165   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3166                         createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3167                         Args);
3168   TaskgroupOpGen.setAction(Action);
3169   emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3170 }
3171
3172 /// Given an array of pointers to variables, project the address of a
3173 /// given variable.
3174 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3175                                       unsigned Index, const VarDecl *Var) {
3176   // Pull out the pointer to the variable.
3177   Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3178   llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3179
3180   Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3181   Addr = CGF.Builder.CreateElementBitCast(
3182       Addr, CGF.ConvertTypeForMem(Var->getType()));
3183   return Addr;
3184 }
3185
3186 static llvm::Value *emitCopyprivateCopyFunction(
3187     CodeGenModule &CGM, llvm::Type *ArgsType,
3188     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3189     ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3190     SourceLocation Loc) {
3191   ASTContext &C = CGM.getContext();
3192   // void copy_func(void *LHSArg, void *RHSArg);
3193   FunctionArgList Args;
3194   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3195                            ImplicitParamDecl::Other);
3196   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3197                            ImplicitParamDecl::Other);
3198   Args.push_back(&LHSArg);
3199   Args.push_back(&RHSArg);
3200   const auto &CGFI =
3201       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3202   std::string Name =
3203       CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3204   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3205                                     llvm::GlobalValue::InternalLinkage, Name,
3206                                     &CGM.getModule());
3207   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3208   Fn->setDoesNotRecurse();
3209   CodeGenFunction CGF(CGM);
3210   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3211   // Dest = (void*[n])(LHSArg);
3212   // Src = (void*[n])(RHSArg);
3213   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3214       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3215       ArgsType), CGF.getPointerAlign());
3216   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3217       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3218       ArgsType), CGF.getPointerAlign());
3219   // *(Type0*)Dst[0] = *(Type0*)Src[0];
3220   // *(Type1*)Dst[1] = *(Type1*)Src[1];
3221   // ...
3222   // *(Typen*)Dst[n] = *(Typen*)Src[n];
3223   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3224     const auto *DestVar =
3225         cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3226     Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3227
3228     const auto *SrcVar =
3229         cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3230     Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3231
3232     const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3233     QualType Type = VD->getType();
3234     CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3235   }
3236   CGF.FinishFunction();
3237   return Fn;
3238 }
3239
3240 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3241                                        const RegionCodeGenTy &SingleOpGen,
3242                                        SourceLocation Loc,
3243                                        ArrayRef<const Expr *> CopyprivateVars,
3244                                        ArrayRef<const Expr *> SrcExprs,
3245                                        ArrayRef<const Expr *> DstExprs,
3246                                        ArrayRef<const Expr *> AssignmentOps) {
3247   if (!CGF.HaveInsertPoint())
3248     return;
3249   assert(CopyprivateVars.size() == SrcExprs.size() &&
3250          CopyprivateVars.size() == DstExprs.size() &&
3251          CopyprivateVars.size() == AssignmentOps.size());
3252   ASTContext &C = CGM.getContext();
3253   // int32 did_it = 0;
3254   // if(__kmpc_single(ident_t *, gtid)) {
3255   //   SingleOpGen();
3256   //   __kmpc_end_single(ident_t *, gtid);
3257   //   did_it = 1;
3258   // }
3259   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3260   // <copy_func>, did_it);
3261
3262   Address DidIt = Address::invalid();
3263   if (!CopyprivateVars.empty()) {
3264     // int32 did_it = 0;
3265     QualType KmpInt32Ty =
3266         C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3267     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3268     CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3269   }
3270   // Prepare arguments and build a call to __kmpc_single
3271   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3272   CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3273                         createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3274                         /*Conditional=*/true);
3275   SingleOpGen.setAction(Action);
3276   emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3277   if (DidIt.isValid()) {
3278     // did_it = 1;
3279     CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3280   }
3281   Action.Done(CGF);
3282   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3283   // <copy_func>, did_it);
3284   if (DidIt.isValid()) {
3285     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3286     QualType CopyprivateArrayTy =
3287         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3288                                /*IndexTypeQuals=*/0);
3289     // Create a list of all private variables for copyprivate.
3290     Address CopyprivateList =
3291         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3292     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3293       Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3294       CGF.Builder.CreateStore(
3295           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3296               CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3297           Elem);
3298     }
3299     // Build function that copies private values from single region to all other
3300     // threads in the corresponding parallel region.
3301     llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3302         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3303         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3304     llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3305     Address CL =
3306       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3307                                                       CGF.VoidPtrTy);
3308     llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3309     llvm::Value *Args[] = {
3310         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3311         getThreadID(CGF, Loc),        // i32 <gtid>
3312         BufSize,                      // size_t <buf_size>
3313         CL.getPointer(),              // void *<copyprivate list>
3314         CpyFn,                        // void (*) (void *, void *) <copy_func>
3315         DidItVal                      // i32 did_it
3316     };
3317     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3318   }
3319 }
3320
3321 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3322                                         const RegionCodeGenTy &OrderedOpGen,
3323                                         SourceLocation Loc, bool IsThreads) {
3324   if (!CGF.HaveInsertPoint())
3325     return;
3326   // __kmpc_ordered(ident_t *, gtid);
3327   // OrderedOpGen();
3328   // __kmpc_end_ordered(ident_t *, gtid);
3329   // Prepare arguments and build a call to __kmpc_ordered
3330   if (IsThreads) {
3331     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3332     CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3333                           createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3334                           Args);
3335     OrderedOpGen.setAction(Action);
3336     emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3337     return;
3338   }
3339   emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3340 }
3341
3342 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3343   unsigned Flags;
3344   if (Kind == OMPD_for)
3345     Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3346   else if (Kind == OMPD_sections)
3347     Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3348   else if (Kind == OMPD_single)
3349     Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3350   else if (Kind == OMPD_barrier)
3351     Flags = OMP_IDENT_BARRIER_EXPL;
3352   else
3353     Flags = OMP_IDENT_BARRIER_IMPL;
3354   return Flags;
3355 }
3356
3357 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3358     CodeGenFunction &CGF, const OMPLoopDirective &S,
3359     OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3360   // Check if the loop directive is actually a doacross loop directive. In this
3361   // case choose static, 1 schedule.
3362   if (llvm::any_of(
3363           S.getClausesOfKind<OMPOrderedClause>(),
3364           [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3365     ScheduleKind = OMPC_SCHEDULE_static;
3366     // Chunk size is 1 in this case.
3367     llvm::APInt ChunkSize(32, 1);
3368     ChunkExpr = IntegerLiteral::Create(
3369         CGF.getContext(), ChunkSize,
3370         CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3371         SourceLocation());
3372   }
3373 }
3374
3375 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3376                                       OpenMPDirectiveKind Kind, bool EmitChecks,
3377                                       bool ForceSimpleCall) {
3378   if (!CGF.HaveInsertPoint())
3379     return;
3380   // Build call __kmpc_cancel_barrier(loc, thread_id);
3381   // Build call __kmpc_barrier(loc, thread_id);
3382   unsigned Flags = getDefaultFlagsForBarriers(Kind);
3383   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3384   // thread_id);
3385   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3386                          getThreadID(CGF, Loc)};
3387   if (auto *OMPRegionInfo =
3388           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3389     if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3390       llvm::Value *Result = CGF.EmitRuntimeCall(
3391           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3392       if (EmitChecks) {
3393         // if (__kmpc_cancel_barrier()) {
3394         //   exit from construct;
3395         // }
3396         llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3397         llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3398         llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3399         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3400         CGF.EmitBlock(ExitBB);
3401         //   exit from construct;
3402         CodeGenFunction::JumpDest CancelDestination =
3403             CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3404         CGF.EmitBranchThroughCleanup(CancelDestination);
3405         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3406       }
3407       return;
3408     }
3409   }
3410   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3411 }
3412
3413 /// Map the OpenMP loop schedule to the runtime enumeration.
3414 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3415                                           bool Chunked, bool Ordered) {
3416   switch (ScheduleKind) {
3417   case OMPC_SCHEDULE_static:
3418     return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3419                    : (Ordered ? OMP_ord_static : OMP_sch_static);
3420   case OMPC_SCHEDULE_dynamic:
3421     return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3422   case OMPC_SCHEDULE_guided:
3423     return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3424   case OMPC_SCHEDULE_runtime:
3425     return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3426   case OMPC_SCHEDULE_auto:
3427     return Ordered ? OMP_ord_auto : OMP_sch_auto;
3428   case OMPC_SCHEDULE_unknown:
3429     assert(!Chunked && "chunk was specified but schedule kind not known");
3430     return Ordered ? OMP_ord_static : OMP_sch_static;
3431   }
3432   llvm_unreachable("Unexpected runtime schedule");
3433 }
3434
3435 /// Map the OpenMP distribute schedule to the runtime enumeration.
3436 static OpenMPSchedType
3437 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3438   // only static is allowed for dist_schedule
3439   return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3440 }
3441
3442 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3443                                          bool Chunked) const {
3444   OpenMPSchedType Schedule =
3445       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3446   return Schedule == OMP_sch_static;
3447 }
3448
3449 bool CGOpenMPRuntime::isStaticNonchunked(
3450     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3451   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3452   return Schedule == OMP_dist_sch_static;
3453 }
3454
3455 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3456                                       bool Chunked) const {
3457   OpenMPSchedType Schedule =
3458       getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3459   return Schedule == OMP_sch_static_chunked;
3460 }
3461
3462 bool CGOpenMPRuntime::isStaticChunked(
3463     OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3464   OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3465   return Schedule == OMP_dist_sch_static_chunked;
3466 }
3467
3468 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3469   OpenMPSchedType Schedule =
3470       getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3471   assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3472   return Schedule != OMP_sch_static;
3473 }
3474
3475 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3476                                   OpenMPScheduleClauseModifier M1,
3477                                   OpenMPScheduleClauseModifier M2) {
3478   int Modifier = 0;
3479   switch (M1) {
3480   case OMPC_SCHEDULE_MODIFIER_monotonic:
3481     Modifier = OMP_sch_modifier_monotonic;
3482     break;
3483   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3484     Modifier = OMP_sch_modifier_nonmonotonic;
3485     break;
3486   case OMPC_SCHEDULE_MODIFIER_simd:
3487     if (Schedule == OMP_sch_static_chunked)
3488       Schedule = OMP_sch_static_balanced_chunked;
3489     break;
3490   case OMPC_SCHEDULE_MODIFIER_last:
3491   case OMPC_SCHEDULE_MODIFIER_unknown:
3492     break;
3493   }
3494   switch (M2) {
3495   case OMPC_SCHEDULE_MODIFIER_monotonic:
3496     Modifier = OMP_sch_modifier_monotonic;
3497     break;
3498   case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3499     Modifier = OMP_sch_modifier_nonmonotonic;
3500     break;
3501   case OMPC_SCHEDULE_MODIFIER_simd:
3502     if (Schedule == OMP_sch_static_chunked)
3503       Schedule = OMP_sch_static_balanced_chunked;
3504     break;
3505   case OMPC_SCHEDULE_MODIFIER_last:
3506   case OMPC_SCHEDULE_MODIFIER_unknown:
3507     break;
3508   }
3509   return Schedule | Modifier;
3510 }
3511
3512 void CGOpenMPRuntime::emitForDispatchInit(
3513     CodeGenFunction &CGF, SourceLocation Loc,
3514     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3515     bool Ordered, const DispatchRTInput &DispatchValues) {
3516   if (!CGF.HaveInsertPoint())
3517     return;
3518   OpenMPSchedType Schedule = getRuntimeSchedule(
3519       ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3520   assert(Ordered ||
3521          (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3522           Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3523           Schedule != OMP_sch_static_balanced_chunked));
3524   // Call __kmpc_dispatch_init(
3525   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3526   //          kmp_int[32|64] lower, kmp_int[32|64] upper,
3527   //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
3528
3529   // If the Chunk was not specified in the clause - use default value 1.
3530   llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3531                                             : CGF.Builder.getIntN(IVSize, 1);
3532   llvm::Value *Args[] = {
3533       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3534       CGF.Builder.getInt32(addMonoNonMonoModifier(
3535           Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3536       DispatchValues.LB,                                // Lower
3537       DispatchValues.UB,                                // Upper
3538       CGF.Builder.getIntN(IVSize, 1),                   // Stride
3539       Chunk                                             // Chunk
3540   };
3541   CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3542 }
3543
3544 static void emitForStaticInitCall(
3545     CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3546     llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3547     OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3548     const CGOpenMPRuntime::StaticRTInput &Values) {
3549   if (!CGF.HaveInsertPoint())
3550     return;
3551
3552   assert(!Values.Ordered);
3553   assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3554          Schedule == OMP_sch_static_balanced_chunked ||
3555          Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3556          Schedule == OMP_dist_sch_static ||
3557          Schedule == OMP_dist_sch_static_chunked);
3558
3559   // Call __kmpc_for_static_init(
3560   //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3561   //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3562   //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3563   //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
3564   llvm::Value *Chunk = Values.Chunk;
3565   if (Chunk == nullptr) {
3566     assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3567             Schedule == OMP_dist_sch_static) &&
3568            "expected static non-chunked schedule");
3569     // If the Chunk was not specified in the clause - use default value 1.
3570     Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3571   } else {
3572     assert((Schedule == OMP_sch_static_chunked ||
3573             Schedule == OMP_sch_static_balanced_chunked ||
3574             Schedule == OMP_ord_static_chunked ||
3575             Schedule == OMP_dist_sch_static_chunked) &&
3576            "expected static chunked schedule");
3577   }
3578   llvm::Value *Args[] = {
3579       UpdateLocation,
3580       ThreadId,
3581       CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3582                                                   M2)), // Schedule type
3583       Values.IL.getPointer(),                           // &isLastIter
3584       Values.LB.getPointer(),                           // &LB
3585       Values.UB.getPointer(),                           // &UB
3586       Values.ST.getPointer(),                           // &Stride
3587       CGF.Builder.getIntN(Values.IVSize, 1),            // Incr
3588       Chunk                                             // Chunk
3589   };
3590   CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3591 }
3592
3593 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3594                                         SourceLocation Loc,
3595                                         OpenMPDirectiveKind DKind,
3596                                         const OpenMPScheduleTy &ScheduleKind,
3597                                         const StaticRTInput &Values) {
3598   OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3599       ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3600   assert(isOpenMPWorksharingDirective(DKind) &&
3601          "Expected loop-based or sections-based directive.");
3602   llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3603                                              isOpenMPLoopDirective(DKind)
3604                                                  ? OMP_IDENT_WORK_LOOP
3605                                                  : OMP_IDENT_WORK_SECTIONS);
3606   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3607   llvm::FunctionCallee StaticInitFunction =
3608       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3609   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3610                         ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3611 }
3612
3613 void CGOpenMPRuntime::emitDistributeStaticInit(
3614     CodeGenFunction &CGF, SourceLocation Loc,
3615     OpenMPDistScheduleClauseKind SchedKind,
3616     const CGOpenMPRuntime::StaticRTInput &Values) {
3617   OpenMPSchedType ScheduleNum =
3618       getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3619   llvm::Value *UpdatedLocation =
3620       emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3621   llvm::Value *ThreadId = getThreadID(CGF, Loc);
3622   llvm::FunctionCallee StaticInitFunction =
3623       createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3624   emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3625                         ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3626                         OMPC_SCHEDULE_MODIFIER_unknown, Values);
3627 }
3628
3629 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3630                                           SourceLocation Loc,
3631                                           OpenMPDirectiveKind DKind) {
3632   if (!CGF.HaveInsertPoint())
3633     return;
3634   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3635   llvm::Value *Args[] = {
3636       emitUpdateLocation(CGF, Loc,
3637                          isOpenMPDistributeDirective(DKind)
3638                              ? OMP_IDENT_WORK_DISTRIBUTE
3639                              : isOpenMPLoopDirective(DKind)
3640                                    ? OMP_IDENT_WORK_LOOP
3641                                    : OMP_IDENT_WORK_SECTIONS),
3642       getThreadID(CGF, Loc)};
3643   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3644                       Args);
3645 }
3646
3647 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3648                                                  SourceLocation Loc,
3649                                                  unsigned IVSize,
3650                                                  bool IVSigned) {
3651   if (!CGF.HaveInsertPoint())
3652     return;
3653   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3654   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3655   CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3656 }
3657
3658 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3659                                           SourceLocation Loc, unsigned IVSize,
3660                                           bool IVSigned, Address IL,
3661                                           Address LB, Address UB,
3662                                           Address ST) {
3663   // Call __kmpc_dispatch_next(
3664   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3665   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3666   //          kmp_int[32|64] *p_stride);
3667   llvm::Value *Args[] = {
3668       emitUpdateLocation(CGF, Loc),
3669       getThreadID(CGF, Loc),
3670       IL.getPointer(), // &isLastIter
3671       LB.getPointer(), // &Lower
3672       UB.getPointer(), // &Upper
3673       ST.getPointer()  // &Stride
3674   };
3675   llvm::Value *Call =
3676       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3677   return CGF.EmitScalarConversion(
3678       Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3679       CGF.getContext().BoolTy, Loc);
3680 }
3681
3682 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3683                                            llvm::Value *NumThreads,
3684                                            SourceLocation Loc) {
3685   if (!CGF.HaveInsertPoint())
3686     return;
3687   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3688   llvm::Value *Args[] = {
3689       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3690       CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3691   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3692                       Args);
3693 }
3694
3695 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3696                                          OpenMPProcBindClauseKind ProcBind,
3697                                          SourceLocation Loc) {
3698   if (!CGF.HaveInsertPoint())
3699     return;
3700   // Constants for proc bind value accepted by the runtime.
3701   enum ProcBindTy {
3702     ProcBindFalse = 0,
3703     ProcBindTrue,
3704     ProcBindMaster,
3705     ProcBindClose,
3706     ProcBindSpread,
3707     ProcBindIntel,
3708     ProcBindDefault
3709   } RuntimeProcBind;
3710   switch (ProcBind) {
3711   case OMPC_PROC_BIND_master:
3712     RuntimeProcBind = ProcBindMaster;
3713     break;
3714   case OMPC_PROC_BIND_close:
3715     RuntimeProcBind = ProcBindClose;
3716     break;
3717   case OMPC_PROC_BIND_spread:
3718     RuntimeProcBind = ProcBindSpread;
3719     break;
3720   case OMPC_PROC_BIND_unknown:
3721     llvm_unreachable("Unsupported proc_bind value.");
3722   }
3723   // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3724   llvm::Value *Args[] = {
3725       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3726       llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3727   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3728 }
3729
3730 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3731                                 SourceLocation Loc) {
3732   if (!CGF.HaveInsertPoint())
3733     return;
3734   // Build call void __kmpc_flush(ident_t *loc)
3735   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3736                       emitUpdateLocation(CGF, Loc));
3737 }
3738
3739 namespace {
3740 /// Indexes of fields for type kmp_task_t.
3741 enum KmpTaskTFields {
3742   /// List of shared variables.
3743   KmpTaskTShareds,
3744   /// Task routine.
3745   KmpTaskTRoutine,
3746   /// Partition id for the untied tasks.
3747   KmpTaskTPartId,
3748   /// Function with call of destructors for private variables.
3749   Data1,
3750   /// Task priority.
3751   Data2,
3752   /// (Taskloops only) Lower bound.
3753   KmpTaskTLowerBound,
3754   /// (Taskloops only) Upper bound.
3755   KmpTaskTUpperBound,
3756   /// (Taskloops only) Stride.
3757   KmpTaskTStride,
3758   /// (Taskloops only) Is last iteration flag.
3759   KmpTaskTLastIter,
3760   /// (Taskloops only) Reduction data.
3761   KmpTaskTReductions,
3762 };
3763 } // anonymous namespace
3764
3765 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3766   return OffloadEntriesTargetRegion.empty() &&
3767          OffloadEntriesDeviceGlobalVar.empty();
3768 }
3769
3770 /// Initialize target region entry.
3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3772     initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3773                                     StringRef ParentName, unsigned LineNum,
3774                                     unsigned Order) {
3775   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3776                                              "only required for the device "
3777                                              "code generation.");
3778   OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3779       OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3780                                    OMPTargetRegionEntryTargetRegion);
3781   ++OffloadingEntriesNum;
3782 }
3783
3784 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3785     registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3786                                   StringRef ParentName, unsigned LineNum,
3787                                   llvm::Constant *Addr, llvm::Constant *ID,
3788                                   OMPTargetRegionEntryKind Flags) {
3789   // If we are emitting code for a target, the entry is already initialized,
3790   // only has to be registered.
3791   if (CGM.getLangOpts().OpenMPIsDevice) {
3792     if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3793       unsigned DiagID = CGM.getDiags().getCustomDiagID(
3794           DiagnosticsEngine::Error,
3795           "Unable to find target region on line '%0' in the device code.");
3796       CGM.getDiags().Report(DiagID) << LineNum;
3797       return;
3798     }
3799     auto &Entry =
3800         OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3801     assert(Entry.isValid() && "Entry not initialized!");
3802     Entry.setAddress(Addr);
3803     Entry.setID(ID);
3804     Entry.setFlags(Flags);
3805   } else {
3806     OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3807     OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3808     ++OffloadingEntriesNum;
3809   }
3810 }
3811
3812 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3813     unsigned DeviceID, unsigned FileID, StringRef ParentName,
3814     unsigned LineNum) const {
3815   auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3816   if (PerDevice == OffloadEntriesTargetRegion.end())
3817     return false;
3818   auto PerFile = PerDevice->second.find(FileID);
3819   if (PerFile == PerDevice->second.end())
3820     return false;
3821   auto PerParentName = PerFile->second.find(ParentName);
3822   if (PerParentName == PerFile->second.end())
3823     return false;
3824   auto PerLine = PerParentName->second.find(LineNum);
3825   if (PerLine == PerParentName->second.end())
3826     return false;
3827   // Fail if this entry is already registered.
3828   if (PerLine->second.getAddress() || PerLine->second.getID())
3829     return false;
3830   return true;
3831 }
3832
3833 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3834     const OffloadTargetRegionEntryInfoActTy &Action) {
3835   // Scan all target region entries and perform the provided action.
3836   for (const auto &D : OffloadEntriesTargetRegion)
3837     for (const auto &F : D.second)
3838       for (const auto &P : F.second)
3839         for (const auto &L : P.second)
3840           Action(D.first, F.first, P.first(), L.first, L.second);
3841 }
3842
3843 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3844     initializeDeviceGlobalVarEntryInfo(StringRef Name,
3845                                        OMPTargetGlobalVarEntryKind Flags,
3846                                        unsigned Order) {
3847   assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3848                                              "only required for the device "
3849                                              "code generation.");
3850   OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3851   ++OffloadingEntriesNum;
3852 }
3853
3854 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3855     registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3856                                      CharUnits VarSize,
3857                                      OMPTargetGlobalVarEntryKind Flags,
3858                                      llvm::GlobalValue::LinkageTypes Linkage) {
3859   if (CGM.getLangOpts().OpenMPIsDevice) {
3860     auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3861     assert(Entry.isValid() && Entry.getFlags() == Flags &&
3862            "Entry not initialized!");
3863     assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3864            "Resetting with the new address.");
3865     if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3866       if (Entry.getVarSize().isZero()) {
3867         Entry.setVarSize(VarSize);
3868         Entry.setLinkage(Linkage);
3869       }
3870       return;
3871     }
3872     Entry.setVarSize(VarSize);
3873     Entry.setLinkage(Linkage);
3874     Entry.setAddress(Addr);
3875   } else {
3876     if (hasDeviceGlobalVarEntryInfo(VarName)) {
3877       auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3878       assert(Entry.isValid() && Entry.getFlags() == Flags &&
3879              "Entry not initialized!");
3880       assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3881              "Resetting with the new address.");
3882       if (Entry.getVarSize().isZero()) {
3883         Entry.setVarSize(VarSize);
3884         Entry.setLinkage(Linkage);
3885       }
3886       return;
3887     }
3888     OffloadEntriesDeviceGlobalVar.try_emplace(
3889         VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3890     ++OffloadingEntriesNum;
3891   }
3892 }
3893
3894 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3895     actOnDeviceGlobalVarEntriesInfo(
3896         const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3897   // Scan all target region entries and perform the provided action.
3898   for (const auto &E : OffloadEntriesDeviceGlobalVar)
3899     Action(E.getKey(), E.getValue());
3900 }
3901
3902 llvm::Function *
3903 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3904   // If we don't have entries or if we are emitting code for the device, we
3905   // don't need to do anything.
3906   if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3907     return nullptr;
3908
3909   llvm::Module &M = CGM.getModule();
3910   ASTContext &C = CGM.getContext();
3911
3912   // Get list of devices we care about
3913   const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3914
3915   // We should be creating an offloading descriptor only if there are devices
3916   // specified.
3917   assert(!Devices.empty() && "No OpenMP offloading devices??");
3918
3919   // Create the external variables that will point to the begin and end of the
3920   // host entries section. These will be defined by the linker.
3921   llvm::Type *OffloadEntryTy =
3922       CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3923   std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3924   auto *HostEntriesBegin = new llvm::GlobalVariable(
3925       M, OffloadEntryTy, /*isConstant=*/true,
3926       llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3927       EntriesBeginName);
3928   std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3929   auto *HostEntriesEnd =
3930       new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3931                                llvm::GlobalValue::ExternalLinkage,
3932                                /*Initializer=*/nullptr, EntriesEndName);
3933
3934   // Create all device images
3935   auto *DeviceImageTy = cast<llvm::StructType>(
3936       CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3937   ConstantInitBuilder DeviceImagesBuilder(CGM);
3938   ConstantArrayBuilder DeviceImagesEntries =
3939       DeviceImagesBuilder.beginArray(DeviceImageTy);
3940
3941   for (const llvm::Triple &Device : Devices) {
3942     StringRef T = Device.getTriple();
3943     std::string BeginName = getName({"omp_offloading", "img_start", ""});
3944     auto *ImgBegin = new llvm::GlobalVariable(
3945         M, CGM.Int8Ty, /*isConstant=*/true,
3946         llvm::GlobalValue::ExternalWeakLinkage,
3947         /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3948     std::string EndName = getName({"omp_offloading", "img_end", ""});
3949     auto *ImgEnd = new llvm::GlobalVariable(
3950         M, CGM.Int8Ty, /*isConstant=*/true,
3951         llvm::GlobalValue::ExternalWeakLinkage,
3952         /*Initializer=*/nullptr, Twine(EndName).concat(T));
3953
3954     llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3955                               HostEntriesEnd};
3956     createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3957                                              DeviceImagesEntries);
3958   }
3959
3960   // Create device images global array.
3961   std::string ImagesName = getName({"omp_offloading", "device_images"});
3962   llvm::GlobalVariable *DeviceImages =
3963       DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3964                                                 CGM.getPointerAlign(),
3965                                                 /*isConstant=*/true);
3966   DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3967
3968   // This is a Zero array to be used in the creation of the constant expressions
3969   llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3970                              llvm::Constant::getNullValue(CGM.Int32Ty)};
3971
3972   // Create the target region descriptor.
3973   llvm::Constant *Data[] = {
3974       llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3975       llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3976                                            DeviceImages, Index),
3977       HostEntriesBegin, HostEntriesEnd};
3978   std::string Descriptor = getName({"omp_offloading", "descriptor"});
3979   llvm::GlobalVariable *Desc = createGlobalStruct(
3980       CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3981
3982   // Emit code to register or unregister the descriptor at execution
3983   // startup or closing, respectively.
3984
3985   llvm::Function *UnRegFn;
3986   {
3987     FunctionArgList Args;
3988     ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3989     Args.push_back(&DummyPtr);
3990
3991     CodeGenFunction CGF(CGM);
3992     // Disable debug info for global (de-)initializer because they are not part
3993     // of some particular construct.
3994     CGF.disableDebugInfo();
3995     const auto &FI =
3996         CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3997     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3998     std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3999     UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4000     CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4001     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4002                         Desc);
4003     CGF.FinishFunction();
4004   }
4005   llvm::Function *RegFn;
4006   {
4007     CodeGenFunction CGF(CGM);
4008     // Disable debug info for global (de-)initializer because they are not part
4009     // of some particular construct.
4010     CGF.disableDebugInfo();
4011     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4012     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4013
4014     // Encode offload target triples into the registration function name. It
4015     // will serve as a comdat key for the registration/unregistration code for
4016     // this particular combination of offloading targets.
4017     SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4018     RegFnNameParts[0] = "omp_offloading";
4019     RegFnNameParts[1] = "descriptor_reg";
4020     llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4021                     [](const llvm::Triple &T) -> const std::string& {
4022                       return T.getTriple();
4023                     });
4024     llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4025     std::string Descriptor = getName(RegFnNameParts);
4026     RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4027     CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4028     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4029     // Create a variable to drive the registration and unregistration of the
4030     // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4031     ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4032                                   SourceLocation(), nullptr, C.CharTy,
4033                                   ImplicitParamDecl::Other);
4034     CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4035     CGF.FinishFunction();
4036   }
4037   if (CGM.supportsCOMDAT()) {
4038     // It is sufficient to call registration function only once, so create a
4039     // COMDAT group for registration/unregistration functions and associated
4040     // data. That would reduce startup time and code size. Registration
4041     // function serves as a COMDAT group key.
4042     llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4043     RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4044     RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4045     RegFn->setComdat(ComdatKey);
4046     UnRegFn->setComdat(ComdatKey);
4047     DeviceImages->setComdat(ComdatKey);
4048     Desc->setComdat(ComdatKey);
4049   }
4050   return RegFn;
4051 }
4052
4053 void CGOpenMPRuntime::createOffloadEntry(
4054     llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4055     llvm::GlobalValue::LinkageTypes Linkage) {
4056   StringRef Name = Addr->getName();
4057   llvm::Module &M = CGM.getModule();
4058   llvm::LLVMContext &C = M.getContext();
4059
4060   // Create constant string with the name.
4061   llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4062
4063   std::string StringName = getName({"omp_offloading", "entry_name"});
4064   auto *Str = new llvm::GlobalVariable(
4065       M, StrPtrInit->getType(), /*isConstant=*/true,
4066       llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4067   Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4068
4069   llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4070                             llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4071                             llvm::ConstantInt::get(CGM.SizeTy, Size),
4072                             llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4073                             llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4074   std::string EntryName = getName({"omp_offloading", "entry", ""});
4075   llvm::GlobalVariable *Entry = createGlobalStruct(
4076       CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4077       Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4078
4079   // The entry has to be created in the section the linker expects it to be.
4080   std::string Section = getName({"omp_offloading", "entries"});
4081   Entry->setSection(Section);
4082 }
4083
4084 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4085   // Emit the offloading entries and metadata so that the device codegen side
4086   // can easily figure out what to emit. The produced metadata looks like
4087   // this:
4088   //
4089   // !omp_offload.info = !{!1, ...}
4090   //
4091   // Right now we only generate metadata for function that contain target
4092   // regions.
4093
4094   // If we do not have entries, we don't need to do anything.
4095   if (OffloadEntriesInfoManager.empty())
4096     return;
4097
4098   llvm::Module &M = CGM.getModule();
4099   llvm::LLVMContext &C = M.getContext();
4100   SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4101       OrderedEntries(OffloadEntriesInfoManager.size());
4102   llvm::SmallVector<StringRef, 16> ParentFunctions(
4103       OffloadEntriesInfoManager.size());
4104
4105   // Auxiliary methods to create metadata values and strings.
4106   auto &&GetMDInt = [this](unsigned V) {
4107     return llvm::ConstantAsMetadata::get(
4108         llvm::ConstantInt::get(CGM.Int32Ty, V));
4109   };
4110
4111   auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4112
4113   // Create the offloading info metadata node.
4114   llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4115
4116   // Create function that emits metadata for each target region entry;
4117   auto &&TargetRegionMetadataEmitter =
4118       [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4119           unsigned DeviceID, unsigned FileID, StringRef ParentName,
4120           unsigned Line,
4121           const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4122         // Generate metadata for target regions. Each entry of this metadata
4123         // contains:
4124         // - Entry 0 -> Kind of this type of metadata (0).
4125         // - Entry 1 -> Device ID of the file where the entry was identified.
4126         // - Entry 2 -> File ID of the file where the entry was identified.
4127         // - Entry 3 -> Mangled name of the function where the entry was
4128         // identified.
4129         // - Entry 4 -> Line in the file where the entry was identified.
4130         // - Entry 5 -> Order the entry was created.
4131         // The first element of the metadata node is the kind.
4132         llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4133                                  GetMDInt(FileID),      GetMDString(ParentName),
4134                                  GetMDInt(Line),        GetMDInt(E.getOrder())};
4135
4136         // Save this entry in the right position of the ordered entries array.
4137         OrderedEntries[E.getOrder()] = &E;
4138         ParentFunctions[E.getOrder()] = ParentName;
4139
4140         // Add metadata to the named metadata node.
4141         MD->addOperand(llvm::MDNode::get(C, Ops));
4142       };
4143
4144   OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4145       TargetRegionMetadataEmitter);
4146
4147   // Create function that emits metadata for each device global variable entry;
4148   auto &&DeviceGlobalVarMetadataEmitter =
4149       [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4150        MD](StringRef MangledName,
4151            const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4152                &E) {
4153         // Generate metadata for global variables. Each entry of this metadata
4154         // contains:
4155         // - Entry 0 -> Kind of this type of metadata (1).
4156         // - Entry 1 -> Mangled name of the variable.
4157         // - Entry 2 -> Declare target kind.
4158         // - Entry 3 -> Order the entry was created.
4159         // The first element of the metadata node is the kind.
4160         llvm::Metadata *Ops[] = {
4161             GetMDInt(E.getKind()), GetMDString(MangledName),
4162             GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4163
4164         // Save this entry in the right position of the ordered entries array.
4165         OrderedEntries[E.getOrder()] = &E;
4166
4167         // Add metadata to the named metadata node.
4168         MD->addOperand(llvm::MDNode::get(C, Ops));
4169       };
4170
4171   OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4172       DeviceGlobalVarMetadataEmitter);
4173
4174   for (const auto *E : OrderedEntries) {
4175     assert(E && "All ordered entries must exist!");
4176     if (const auto *CE =
4177             dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4178                 E)) {
4179       if (!CE->getID() || !CE->getAddress()) {
4180         // Do not blame the entry if the parent funtion is not emitted.
4181         StringRef FnName = ParentFunctions[CE->getOrder()];
4182         if (!CGM.GetGlobalValue(FnName))
4183           continue;
4184         unsigned DiagID = CGM.getDiags().getCustomDiagID(
4185             DiagnosticsEngine::Error,
4186             "Offloading entry for target region is incorrect: either the "
4187             "address or the ID is invalid.");
4188         CGM.getDiags().Report(DiagID);
4189         continue;
4190       }
4191       createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4192                          CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4193     } else if (const auto *CE =
4194                    dyn_cast<OffloadEntriesInfoManagerTy::
4195                                 OffloadEntryInfoDeviceGlobalVar>(E)) {
4196       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4197           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4198               CE->getFlags());
4199       switch (Flags) {
4200       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4201         if (CGM.getLangOpts().OpenMPIsDevice &&
4202             CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4203           continue;
4204         if (!CE->getAddress()) {
4205           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4206               DiagnosticsEngine::Error,
4207               "Offloading entry for declare target variable is incorrect: the "
4208               "address is invalid.");
4209           CGM.getDiags().Report(DiagID);
4210           continue;
4211         }
4212         // The vaiable has no definition - no need to add the entry.
4213         if (CE->getVarSize().isZero())
4214           continue;
4215         break;
4216       }
4217       case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4218         assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4219                 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4220                "Declaret target link address is set.");
4221         if (CGM.getLangOpts().OpenMPIsDevice)
4222           continue;
4223         if (!CE->getAddress()) {
4224           unsigned DiagID = CGM.getDiags().getCustomDiagID(
4225               DiagnosticsEngine::Error,
4226               "Offloading entry for declare target variable is incorrect: the "
4227               "address is invalid.");
4228           CGM.getDiags().Report(DiagID);
4229           continue;
4230         }
4231         break;
4232       }
4233       createOffloadEntry(CE->getAddress(), CE->getAddress(),
4234                          CE->getVarSize().getQuantity(), Flags,
4235                          CE->getLinkage());
4236     } else {
4237       llvm_unreachable("Unsupported entry kind.");
4238     }
4239   }
4240 }
4241
4242 /// Loads all the offload entries information from the host IR
4243 /// metadata.
4244 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4245   // If we are in target mode, load the metadata from the host IR. This code has
4246   // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4247
4248   if (!CGM.getLangOpts().OpenMPIsDevice)
4249     return;
4250
4251   if (CGM.getLangOpts().OMPHostIRFile.empty())
4252     return;
4253
4254   auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4255   if (auto EC = Buf.getError()) {
4256     CGM.getDiags().Report(diag::err_cannot_open_file)
4257         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4258     return;
4259   }
4260
4261   llvm::LLVMContext C;
4262   auto ME = expectedToErrorOrAndEmitErrors(
4263       C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4264
4265   if (auto EC = ME.getError()) {
4266     unsigned DiagID = CGM.getDiags().getCustomDiagID(
4267         DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4268     CGM.getDiags().Report(DiagID)
4269         << CGM.getLangOpts().OMPHostIRFile << EC.message();
4270     return;
4271   }
4272
4273   llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4274   if (!MD)
4275     return;
4276
4277   for (llvm::MDNode *MN : MD->operands()) {
4278     auto &&GetMDInt = [MN](unsigned Idx) {
4279       auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4280       return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4281     };
4282
4283     auto &&GetMDString = [MN](unsigned Idx) {
4284       auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4285       return V->getString();
4286     };
4287
4288     switch (GetMDInt(0)) {
4289     default:
4290       llvm_unreachable("Unexpected metadata!");
4291       break;
4292     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4293         OffloadingEntryInfoTargetRegion:
4294       OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4295           /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4296           /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4297           /*Order=*/GetMDInt(5));
4298       break;
4299     case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300         OffloadingEntryInfoDeviceGlobalVar:
4301       OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4302           /*MangledName=*/GetMDString(1),
4303           static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4304               /*Flags=*/GetMDInt(2)),
4305           /*Order=*/GetMDInt(3));
4306       break;
4307     }
4308   }
4309 }
4310
4311 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4312   if (!KmpRoutineEntryPtrTy) {
4313     // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4314     ASTContext &C = CGM.getContext();
4315     QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4316     FunctionProtoType::ExtProtoInfo EPI;
4317     KmpRoutineEntryPtrQTy = C.getPointerType(
4318         C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4319     KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4320   }
4321 }
4322
4323 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4324   // Make sure the type of the entry is already created. This is the type we
4325   // have to create:
4326   // struct __tgt_offload_entry{
4327   //   void      *addr;       // Pointer to the offload entry info.
4328   //                          // (function or global)
4329   //   char      *name;       // Name of the function or global.
4330   //   size_t     size;       // Size of the entry info (0 if it a function).
4331   //   int32_t    flags;      // Flags associated with the entry, e.g. 'link'.
4332   //   int32_t    reserved;   // Reserved, to use by the runtime library.
4333   // };
4334   if (TgtOffloadEntryQTy.isNull()) {
4335     ASTContext &C = CGM.getContext();
4336     RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4337     RD->startDefinition();
4338     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4339     addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4340     addFieldToRecordDecl(C, RD, C.getSizeType());
4341     addFieldToRecordDecl(
4342         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4343     addFieldToRecordDecl(
4344         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4345     RD->completeDefinition();
4346     RD->addAttr(PackedAttr::CreateImplicit(C));
4347     TgtOffloadEntryQTy = C.getRecordType(RD);
4348   }
4349   return TgtOffloadEntryQTy;
4350 }
4351
4352 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4353   // These are the types we need to build:
4354   // struct __tgt_device_image{
4355   // void   *ImageStart;       // Pointer to the target code start.
4356   // void   *ImageEnd;         // Pointer to the target code end.
4357   // // We also add the host entries to the device image, as it may be useful
4358   // // for the target runtime to have access to that information.
4359   // __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all
4360   //                                       // the entries.
4361   // __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4362   //                                       // entries (non inclusive).
4363   // };
4364   if (TgtDeviceImageQTy.isNull()) {
4365     ASTContext &C = CGM.getContext();
4366     RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4367     RD->startDefinition();
4368     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4369     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4370     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4371     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4372     RD->completeDefinition();
4373     TgtDeviceImageQTy = C.getRecordType(RD);
4374   }
4375   return TgtDeviceImageQTy;
4376 }
4377
4378 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4379   // struct __tgt_bin_desc{
4380   //   int32_t              NumDevices;      // Number of devices supported.
4381   //   __tgt_device_image   *DeviceImages;   // Arrays of device images
4382   //                                         // (one per device).
4383   //   __tgt_offload_entry  *EntriesBegin;   // Begin of the table with all the
4384   //                                         // entries.
4385   //   __tgt_offload_entry  *EntriesEnd;     // End of the table with all the
4386   //                                         // entries (non inclusive).
4387   // };
4388   if (TgtBinaryDescriptorQTy.isNull()) {
4389     ASTContext &C = CGM.getContext();
4390     RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4391     RD->startDefinition();
4392     addFieldToRecordDecl(
4393         C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4394     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4395     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4396     addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4397     RD->completeDefinition();
4398     TgtBinaryDescriptorQTy = C.getRecordType(RD);
4399   }
4400   return TgtBinaryDescriptorQTy;
4401 }
4402
4403 namespace {
4404 struct PrivateHelpersTy {
4405   PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4406                    const VarDecl *PrivateElemInit)
4407       : Original(Original), PrivateCopy(PrivateCopy),
4408         PrivateElemInit(PrivateElemInit) {}
4409   const VarDecl *Original;
4410   const VarDecl *PrivateCopy;
4411   const VarDecl *PrivateElemInit;
4412 };
4413 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4414 } // anonymous namespace
4415
4416 static RecordDecl *
4417 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4418   if (!Privates.empty()) {
4419     ASTContext &C = CGM.getContext();
4420     // Build struct .kmp_privates_t. {
4421     //         /*  private vars  */
4422     //       };
4423     RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4424     RD->startDefinition();
4425     for (const auto &Pair : Privates) {
4426       const VarDecl *VD = Pair.second.Original;
4427       QualType Type = VD->getType().getNonReferenceType();
4428       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4429       if (VD->hasAttrs()) {
4430         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4431              E(VD->getAttrs().end());
4432              I != E; ++I)
4433           FD->addAttr(*I);
4434       }
4435     }
4436     RD->completeDefinition();
4437     return RD;
4438   }
4439   return nullptr;
4440 }
4441
4442 static RecordDecl *
4443 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4444                          QualType KmpInt32Ty,
4445                          QualType KmpRoutineEntryPointerQTy) {
4446   ASTContext &C = CGM.getContext();
4447   // Build struct kmp_task_t {
4448   //         void *              shareds;
4449   //         kmp_routine_entry_t routine;
4450   //         kmp_int32           part_id;
4451   //         kmp_cmplrdata_t data1;
4452   //         kmp_cmplrdata_t data2;
4453   // For taskloops additional fields:
4454   //         kmp_uint64          lb;
4455   //         kmp_uint64          ub;
4456   //         kmp_int64           st;
4457   //         kmp_int32           liter;
4458   //         void *              reductions;
4459   //       };
4460   RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4461   UD->startDefinition();
4462   addFieldToRecordDecl(C, UD, KmpInt32Ty);
4463   addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4464   UD->completeDefinition();
4465   QualType KmpCmplrdataTy = C.getRecordType(UD);
4466   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4467   RD->startDefinition();
4468   addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4469   addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4470   addFieldToRecordDecl(C, RD, KmpInt32Ty);
4471   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4472   addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4473   if (isOpenMPTaskLoopDirective(Kind)) {
4474     QualType KmpUInt64Ty =
4475         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4476     QualType KmpInt64Ty =
4477         CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4478     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4479     addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4480     addFieldToRecordDecl(C, RD, KmpInt64Ty);
4481     addFieldToRecordDecl(C, RD, KmpInt32Ty);
4482     addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4483   }
4484   RD->completeDefinition();
4485   return RD;
4486 }
4487
4488 static RecordDecl *
4489 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4490                                      ArrayRef<PrivateDataTy> Privates) {
4491   ASTContext &C = CGM.getContext();
4492   // Build struct kmp_task_t_with_privates {
4493   //         kmp_task_t task_data;
4494   //         .kmp_privates_t. privates;
4495   //       };
4496   RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4497   RD->startDefinition();
4498   addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4499   if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4500     addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4501   RD->completeDefinition();
4502   return RD;
4503 }
4504
4505 /// Emit a proxy function which accepts kmp_task_t as the second
4506 /// argument.
4507 /// \code
4508 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4509 ///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4510 ///   For taskloops:
4511 ///   tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4512 ///   tt->reductions, tt->shareds);
4513 ///   return 0;
4514 /// }
4515 /// \endcode
4516 static llvm::Function *
4517 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4518                       OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4519                       QualType KmpTaskTWithPrivatesPtrQTy,
4520                       QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4521                       QualType SharedsPtrTy, llvm::Function *TaskFunction,
4522                       llvm::Value *TaskPrivatesMap) {
4523   ASTContext &C = CGM.getContext();
4524   FunctionArgList Args;
4525   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4526                             ImplicitParamDecl::Other);
4527   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4528                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4529                                 ImplicitParamDecl::Other);
4530   Args.push_back(&GtidArg);
4531   Args.push_back(&TaskTypeArg);
4532   const auto &TaskEntryFnInfo =
4533       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4534   llvm::FunctionType *TaskEntryTy =
4535       CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4536   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4537   auto *TaskEntry = llvm::Function::Create(
4538       TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4539   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4540   TaskEntry->setDoesNotRecurse();
4541   CodeGenFunction CGF(CGM);
4542   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4543                     Loc, Loc);
4544
4545   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4546   // tt,
4547   // For taskloops:
4548   // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4549   // tt->task_data.shareds);
4550   llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4551       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4552   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4553       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4554       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4555   const auto *KmpTaskTWithPrivatesQTyRD =
4556       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4557   LValue Base =
4558       CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4559   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4560   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561   LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4562   llvm::Value *PartidParam = PartIdLVal.getPointer();
4563
4564   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4565   LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4566   llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4567       CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4568       CGF.ConvertTypeForMem(SharedsPtrTy));
4569
4570   auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4571   llvm::Value *PrivatesParam;
4572   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4573     LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4574     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575         PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4576   } else {
4577     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4578   }
4579
4580   llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4581                                TaskPrivatesMap,
4582                                CGF.Builder
4583                                    .CreatePointerBitCastOrAddrSpaceCast(
4584                                        TDBase.getAddress(), CGF.VoidPtrTy)
4585                                    .getPointer()};
4586   SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4587                                           std::end(CommonArgs));
4588   if (isOpenMPTaskLoopDirective(Kind)) {
4589     auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4590     LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4591     llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4592     auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4593     LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4594     llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4595     auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4596     LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4597     llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4598     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4599     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4600     llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4601     auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4602     LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4603     llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4604     CallArgs.push_back(LBParam);
4605     CallArgs.push_back(UBParam);
4606     CallArgs.push_back(StParam);
4607     CallArgs.push_back(LIParam);
4608     CallArgs.push_back(RParam);
4609   }
4610   CallArgs.push_back(SharedsParam);
4611
4612   CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4613                                                   CallArgs);
4614   CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4615                              CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4616   CGF.FinishFunction();
4617   return TaskEntry;
4618 }
4619
4620 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4621                                             SourceLocation Loc,
4622                                             QualType KmpInt32Ty,
4623                                             QualType KmpTaskTWithPrivatesPtrQTy,
4624                                             QualType KmpTaskTWithPrivatesQTy) {
4625   ASTContext &C = CGM.getContext();
4626   FunctionArgList Args;
4627   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4628                             ImplicitParamDecl::Other);
4629   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4630                                 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4631                                 ImplicitParamDecl::Other);
4632   Args.push_back(&GtidArg);
4633   Args.push_back(&TaskTypeArg);
4634   const auto &DestructorFnInfo =
4635       CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4636   llvm::FunctionType *DestructorFnTy =
4637       CGM.getTypes().GetFunctionType(DestructorFnInfo);
4638   std::string Name =
4639       CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4640   auto *DestructorFn =
4641       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4642                              Name, &CGM.getModule());
4643   CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4644                                     DestructorFnInfo);
4645   DestructorFn->setDoesNotRecurse();
4646   CodeGenFunction CGF(CGM);
4647   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4648                     Args, Loc, Loc);
4649
4650   LValue Base = CGF.EmitLoadOfPointerLValue(
4651       CGF.GetAddrOfLocalVar(&TaskTypeArg),
4652       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4653   const auto *KmpTaskTWithPrivatesQTyRD =
4654       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4655   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4656   Base = CGF.EmitLValueForField(Base, *FI);
4657   for (const auto *Field :
4658        cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4659     if (QualType::DestructionKind DtorKind =
4660             Field->getType().isDestructedType()) {
4661       LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4662       CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4663     }
4664   }
4665   CGF.FinishFunction();
4666   return DestructorFn;
4667 }
4668
4669 /// Emit a privates mapping function for correct handling of private and
4670 /// firstprivate variables.
4671 /// \code
4672 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4673 /// **noalias priv1,...,  <tyn> **noalias privn) {
4674 ///   *priv1 = &.privates.priv1;
4675 ///   ...;
4676 ///   *privn = &.privates.privn;
4677 /// }
4678 /// \endcode
4679 static llvm::Value *
4680 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4681                                ArrayRef<const Expr *> PrivateVars,
4682                                ArrayRef<const Expr *> FirstprivateVars,
4683                                ArrayRef<const Expr *> LastprivateVars,
4684                                QualType PrivatesQTy,
4685                                ArrayRef<PrivateDataTy> Privates) {
4686   ASTContext &C = CGM.getContext();
4687   FunctionArgList Args;
4688   ImplicitParamDecl TaskPrivatesArg(
4689       C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4690       C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4691       ImplicitParamDecl::Other);
4692   Args.push_back(&TaskPrivatesArg);
4693   llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4694   unsigned Counter = 1;
4695   for (const Expr *E : PrivateVars) {
4696     Args.push_back(ImplicitParamDecl::Create(
4697         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4698         C.getPointerType(C.getPointerType(E->getType()))
4699             .withConst()
4700             .withRestrict(),
4701         ImplicitParamDecl::Other));
4702     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4703     PrivateVarsPos[VD] = Counter;
4704     ++Counter;
4705   }
4706   for (const Expr *E : FirstprivateVars) {
4707     Args.push_back(ImplicitParamDecl::Create(
4708         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4709         C.getPointerType(C.getPointerType(E->getType()))
4710             .withConst()
4711             .withRestrict(),
4712         ImplicitParamDecl::Other));
4713     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4714     PrivateVarsPos[VD] = Counter;
4715     ++Counter;
4716   }
4717   for (const Expr *E : LastprivateVars) {
4718     Args.push_back(ImplicitParamDecl::Create(
4719         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4720         C.getPointerType(C.getPointerType(E->getType()))
4721             .withConst()
4722             .withRestrict(),
4723         ImplicitParamDecl::Other));
4724     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4725     PrivateVarsPos[VD] = Counter;
4726     ++Counter;
4727   }
4728   const auto &TaskPrivatesMapFnInfo =
4729       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4730   llvm::FunctionType *TaskPrivatesMapTy =
4731       CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4732   std::string Name =
4733       CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4734   auto *TaskPrivatesMap = llvm::Function::Create(
4735       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4736       &CGM.getModule());
4737   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4738                                     TaskPrivatesMapFnInfo);
4739   if (CGM.getLangOpts().Optimize) {
4740     TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4741     TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4742     TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4743   }
4744   CodeGenFunction CGF(CGM);
4745   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4746                     TaskPrivatesMapFnInfo, Args, Loc, Loc);
4747
4748   // *privi = &.privates.privi;
4749   LValue Base = CGF.EmitLoadOfPointerLValue(
4750       CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4751       TaskPrivatesArg.getType()->castAs<PointerType>());
4752   const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4753   Counter = 0;
4754   for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4755     LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4756     const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4757     LValue RefLVal =
4758         CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4759     LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4760         RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4761     CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4762     ++Counter;
4763   }
4764   CGF.FinishFunction();
4765   return TaskPrivatesMap;
4766 }
4767
4768 /// Emit initialization for private variables in task-based directives.
4769 static void emitPrivatesInit(CodeGenFunction &CGF,
4770                              const OMPExecutableDirective &D,
4771                              Address KmpTaskSharedsPtr, LValue TDBase,
4772                              const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4773                              QualType SharedsTy, QualType SharedsPtrTy,
4774                              const OMPTaskDataTy &Data,
4775                              ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4776   ASTContext &C = CGF.getContext();
4777   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4778   LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4779   OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4780                                  ? OMPD_taskloop
4781                                  : OMPD_task;
4782   const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4783   CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4784   LValue SrcBase;
4785   bool IsTargetTask =
4786       isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4787       isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4788   // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4789   // PointersArray and SizesArray. The original variables for these arrays are
4790   // not captured and we get their addresses explicitly.
4791   if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4792       (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4793     SrcBase = CGF.MakeAddrLValue(
4794         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4795             KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4796         SharedsTy);
4797   }
4798   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4799   for (const PrivateDataTy &Pair : Privates) {
4800     const VarDecl *VD = Pair.second.PrivateCopy;
4801     const Expr *Init = VD->getAnyInitializer();
4802     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4803                              !CGF.isTrivialInitializer(Init)))) {
4804       LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4805       if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4806         const VarDecl *OriginalVD = Pair.second.Original;
4807         // Check if the variable is the target-based BasePointersArray,
4808         // PointersArray or SizesArray.
4809         LValue SharedRefLValue;
4810         QualType Type = PrivateLValue.getType();
4811         const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4812         if (IsTargetTask && !SharedField) {
4813           assert(isa<ImplicitParamDecl>(OriginalVD) &&
4814                  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4815                  cast<CapturedDecl>(OriginalVD->getDeclContext())
4816                          ->getNumParams() == 0 &&
4817                  isa<TranslationUnitDecl>(
4818                      cast<CapturedDecl>(OriginalVD->getDeclContext())
4819                          ->getDeclContext()) &&
4820                  "Expected artificial target data variable.");
4821           SharedRefLValue =
4822               CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4823         } else {
4824           SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4825           SharedRefLValue = CGF.MakeAddrLValue(
4826               Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4827               SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4828               SharedRefLValue.getTBAAInfo());
4829         }
4830         if (Type->isArrayType()) {
4831           // Initialize firstprivate array.
4832           if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4833             // Perform simple memcpy.
4834             CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4835           } else {
4836             // Initialize firstprivate array using element-by-element
4837             // initialization.
4838             CGF.EmitOMPAggregateAssign(
4839                 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4840                 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4841                                                   Address SrcElement) {
4842                   // Clean up any temporaries needed by the initialization.
4843                   CodeGenFunction::OMPPrivateScope InitScope(CGF);
4844                   InitScope.addPrivate(
4845                       Elem, [SrcElement]() -> Address { return SrcElement; });
4846                   (void)InitScope.Privatize();
4847                   // Emit initialization for single element.
4848                   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4849                       CGF, &CapturesInfo);
4850                   CGF.EmitAnyExprToMem(Init, DestElement,
4851                                        Init->getType().getQualifiers(),
4852                                        /*IsInitializer=*/false);
4853                 });
4854           }
4855         } else {
4856           CodeGenFunction::OMPPrivateScope InitScope(CGF);
4857           InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4858             return SharedRefLValue.getAddress();
4859           });
4860           (void)InitScope.Privatize();
4861           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4862           CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4863                              /*capturedByInit=*/false);
4864         }
4865       } else {
4866         CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4867       }
4868     }
4869     ++FI;
4870   }
4871 }
4872
4873 /// Check if duplication function is required for taskloops.
4874 static bool checkInitIsRequired(CodeGenFunction &CGF,
4875                                 ArrayRef<PrivateDataTy> Privates) {
4876   bool InitRequired = false;
4877   for (const PrivateDataTy &Pair : Privates) {
4878     const VarDecl *VD = Pair.second.PrivateCopy;
4879     const Expr *Init = VD->getAnyInitializer();
4880     InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4881                                     !CGF.isTrivialInitializer(Init));
4882     if (InitRequired)
4883       break;
4884   }
4885   return InitRequired;
4886 }
4887
4888
4889 /// Emit task_dup function (for initialization of
4890 /// private/firstprivate/lastprivate vars and last_iter flag)
4891 /// \code
4892 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4893 /// lastpriv) {
4894 /// // setup lastprivate flag
4895 ///    task_dst->last = lastpriv;
4896 /// // could be constructor calls here...
4897 /// }
4898 /// \endcode
4899 static llvm::Value *
4900 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4901                     const OMPExecutableDirective &D,
4902                     QualType KmpTaskTWithPrivatesPtrQTy,
4903                     const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4904                     const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4905                     QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4906                     ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4907   ASTContext &C = CGM.getContext();
4908   FunctionArgList Args;
4909   ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4910                            KmpTaskTWithPrivatesPtrQTy,
4911                            ImplicitParamDecl::Other);
4912   ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4913                            KmpTaskTWithPrivatesPtrQTy,
4914                            ImplicitParamDecl::Other);
4915   ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4916                                 ImplicitParamDecl::Other);
4917   Args.push_back(&DstArg);
4918   Args.push_back(&SrcArg);
4919   Args.push_back(&LastprivArg);
4920   const auto &TaskDupFnInfo =
4921       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4922   llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4923   std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4924   auto *TaskDup = llvm::Function::Create(
4925       TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4926   CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4927   TaskDup->setDoesNotRecurse();
4928   CodeGenFunction CGF(CGM);
4929   CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4930                     Loc);
4931
4932   LValue TDBase = CGF.EmitLoadOfPointerLValue(
4933       CGF.GetAddrOfLocalVar(&DstArg),
4934       KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4935   // task_dst->liter = lastpriv;
4936   if (WithLastIter) {
4937     auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4938     LValue Base = CGF.EmitLValueForField(
4939         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4940     LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4941     llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4942         CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4943     CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4944   }
4945
4946   // Emit initial values for private copies (if any).
4947   assert(!Privates.empty());
4948   Address KmpTaskSharedsPtr = Address::invalid();
4949   if (!Data.FirstprivateVars.empty()) {
4950     LValue TDBase = CGF.EmitLoadOfPointerLValue(
4951         CGF.GetAddrOfLocalVar(&SrcArg),
4952         KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4953     LValue Base = CGF.EmitLValueForField(
4954         TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4955     KmpTaskSharedsPtr = Address(
4956         CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4957                                  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4958                                                   KmpTaskTShareds)),
4959                              Loc),
4960         CGF.getNaturalTypeAlignment(SharedsTy));
4961   }
4962   emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4963                    SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4964   CGF.FinishFunction();
4965   return TaskDup;
4966 }
4967
4968 /// Checks if destructor function is required to be generated.
4969 /// \return true if cleanups are required, false otherwise.
4970 static bool
4971 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4972   bool NeedsCleanup = false;
4973   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4974   const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4975   for (const FieldDecl *FD : PrivateRD->fields()) {
4976     NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4977     if (NeedsCleanup)
4978       break;
4979   }
4980   return NeedsCleanup;
4981 }
4982
4983 CGOpenMPRuntime::TaskResultTy
4984 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4985                               const OMPExecutableDirective &D,
4986                               llvm::Function *TaskFunction, QualType SharedsTy,
4987                               Address Shareds, const OMPTaskDataTy &Data) {
4988   ASTContext &C = CGM.getContext();
4989   llvm::SmallVector<PrivateDataTy, 4> Privates;
4990   // Aggregate privates and sort them by the alignment.
4991   auto I = Data.PrivateCopies.begin();
4992   for (const Expr *E : Data.PrivateVars) {
4993     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4994     Privates.emplace_back(
4995         C.getDeclAlign(VD),
4996         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4997                          /*PrivateElemInit=*/nullptr));
4998     ++I;
4999   }
5000   I = Data.FirstprivateCopies.begin();
5001   auto IElemInitRef = Data.FirstprivateInits.begin();
5002   for (const Expr *E : Data.FirstprivateVars) {
5003     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5004     Privates.emplace_back(
5005         C.getDeclAlign(VD),
5006         PrivateHelpersTy(
5007             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5008             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5009     ++I;
5010     ++IElemInitRef;
5011   }
5012   I = Data.LastprivateCopies.begin();
5013   for (const Expr *E : Data.LastprivateVars) {
5014     const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5015     Privates.emplace_back(
5016         C.getDeclAlign(VD),
5017         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5018                          /*PrivateElemInit=*/nullptr));
5019     ++I;
5020   }
5021   llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5022     return L.first > R.first;
5023   });
5024   QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5025   // Build type kmp_routine_entry_t (if not built yet).
5026   emitKmpRoutineEntryT(KmpInt32Ty);
5027   // Build type kmp_task_t (if not built yet).
5028   if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5029     if (SavedKmpTaskloopTQTy.isNull()) {
5030       SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5031           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5032     }
5033     KmpTaskTQTy = SavedKmpTaskloopTQTy;
5034   } else {
5035     assert((D.getDirectiveKind() == OMPD_task ||
5036             isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5037             isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5038            "Expected taskloop, task or target directive");
5039     if (SavedKmpTaskTQTy.isNull()) {
5040       SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5041           CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5042     }
5043     KmpTaskTQTy = SavedKmpTaskTQTy;
5044   }
5045   const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5046   // Build particular struct kmp_task_t for the given task.
5047   const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5048       createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5049   QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5050   QualType KmpTaskTWithPrivatesPtrQTy =
5051       C.getPointerType(KmpTaskTWithPrivatesQTy);
5052   llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5053   llvm::Type *KmpTaskTWithPrivatesPtrTy =
5054       KmpTaskTWithPrivatesTy->getPointerTo();
5055   llvm::Value *KmpTaskTWithPrivatesTySize =
5056       CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5057   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5058
5059   // Emit initial values for private copies (if any).
5060   llvm::Value *TaskPrivatesMap = nullptr;
5061   llvm::Type *TaskPrivatesMapTy =
5062       std::next(TaskFunction->arg_begin(), 3)->getType();
5063   if (!Privates.empty()) {
5064     auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5065     TaskPrivatesMap = emitTaskPrivateMappingFunction(
5066         CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5067         FI->getType(), Privates);
5068     TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5069         TaskPrivatesMap, TaskPrivatesMapTy);
5070   } else {
5071     TaskPrivatesMap = llvm::ConstantPointerNull::get(
5072         cast<llvm::PointerType>(TaskPrivatesMapTy));
5073   }
5074   // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5075   // kmp_task_t *tt);
5076   llvm::Function *TaskEntry = emitProxyTaskFunction(
5077       CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5078       KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5079       TaskPrivatesMap);
5080
5081   // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5082   // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5083   // kmp_routine_entry_t *task_entry);
5084   // Task flags. Format is taken from
5085   // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5086   // description of kmp_tasking_flags struct.
5087   enum {
5088     TiedFlag = 0x1,
5089     FinalFlag = 0x2,
5090     DestructorsFlag = 0x8,
5091     PriorityFlag = 0x20
5092   };
5093   unsigned Flags = Data.Tied ? TiedFlag : 0;
5094   bool NeedsCleanup = false;
5095   if (!Privates.empty()) {
5096     NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5097     if (NeedsCleanup)
5098       Flags = Flags | DestructorsFlag;
5099   }
5100   if (Data.Priority.getInt())
5101     Flags = Flags | PriorityFlag;
5102   llvm::Value *TaskFlags =
5103       Data.Final.getPointer()
5104           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5105                                      CGF.Builder.getInt32(FinalFlag),
5106                                      CGF.Builder.getInt32(/*C=*/0))
5107           : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5108   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5109   llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5110   SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5111       getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5112       SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5113           TaskEntry, KmpRoutineEntryPtrTy)};
5114   llvm::Value *NewTask;
5115   if (D.hasClausesOfKind<OMPNowaitClause>()) {
5116     // Check if we have any device clause associated with the directive.
5117     const Expr *Device = nullptr;
5118     if (auto *C = D.getSingleClause<OMPDeviceClause>())
5119       Device = C->getDevice();
5120     // Emit device ID if any otherwise use default value.
5121     llvm::Value *DeviceID;
5122     if (Device)
5123       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5124                                            CGF.Int64Ty, /*isSigned=*/true);
5125     else
5126       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5127     AllocArgs.push_back(DeviceID);
5128     NewTask = CGF.EmitRuntimeCall(
5129       createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5130   } else {
5131     NewTask = CGF.EmitRuntimeCall(
5132       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5133   }
5134   llvm::Value *NewTaskNewTaskTTy =
5135       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5136           NewTask, KmpTaskTWithPrivatesPtrTy);
5137   LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5138                                                KmpTaskTWithPrivatesQTy);
5139   LValue TDBase =
5140       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5141   // Fill the data in the resulting kmp_task_t record.
5142   // Copy shareds if there are any.
5143   Address KmpTaskSharedsPtr = Address::invalid();
5144   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5145     KmpTaskSharedsPtr =
5146         Address(CGF.EmitLoadOfScalar(
5147                     CGF.EmitLValueForField(
5148                         TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5149                                            KmpTaskTShareds)),
5150                     Loc),
5151                 CGF.getNaturalTypeAlignment(SharedsTy));
5152     LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5153     LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5154     CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5155   }
5156   // Emit initial values for private copies (if any).
5157   TaskResultTy Result;
5158   if (!Privates.empty()) {
5159     emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5160                      SharedsTy, SharedsPtrTy, Data, Privates,
5161                      /*ForDup=*/false);
5162     if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5163         (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5164       Result.TaskDupFn = emitTaskDupFunction(
5165           CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5166           KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5167           /*WithLastIter=*/!Data.LastprivateVars.empty());
5168     }
5169   }
5170   // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5171   enum { Priority = 0, Destructors = 1 };
5172   // Provide pointer to function with destructors for privates.
5173   auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5174   const RecordDecl *KmpCmplrdataUD =
5175       (*FI)->getType()->getAsUnionType()->getDecl();
5176   if (NeedsCleanup) {
5177     llvm::Value *DestructorFn = emitDestructorsFunction(
5178         CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5179         KmpTaskTWithPrivatesQTy);
5180     LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5181     LValue DestructorsLV = CGF.EmitLValueForField(
5182         Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5183     CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5184                               DestructorFn, KmpRoutineEntryPtrTy),
5185                           DestructorsLV);
5186   }
5187   // Set priority.
5188   if (Data.Priority.getInt()) {
5189     LValue Data2LV = CGF.EmitLValueForField(
5190         TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5191     LValue PriorityLV = CGF.EmitLValueForField(
5192         Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5193     CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5194   }
5195   Result.NewTask = NewTask;
5196   Result.TaskEntry = TaskEntry;
5197   Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5198   Result.TDBase = TDBase;
5199   Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5200   return Result;
5201 }
5202
5203 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5204                                    const OMPExecutableDirective &D,
5205                                    llvm::Function *TaskFunction,
5206                                    QualType SharedsTy, Address Shareds,
5207                                    const Expr *IfCond,
5208                                    const OMPTaskDataTy &Data) {
5209   if (!CGF.HaveInsertPoint())
5210     return;
5211
5212   TaskResultTy Result =
5213       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5214   llvm::Value *NewTask = Result.NewTask;
5215   llvm::Function *TaskEntry = Result.TaskEntry;
5216   llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5217   LValue TDBase = Result.TDBase;
5218   const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5219   ASTContext &C = CGM.getContext();
5220   // Process list of dependences.
5221   Address DependenciesArray = Address::invalid();
5222   unsigned NumDependencies = Data.Dependences.size();
5223   if (NumDependencies) {
5224     // Dependence kind for RTL.
5225     enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5226     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5227     RecordDecl *KmpDependInfoRD;
5228     QualType FlagsTy =
5229         C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5230     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5231     if (KmpDependInfoTy.isNull()) {
5232       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5233       KmpDependInfoRD->startDefinition();
5234       addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5235       addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5236       addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5237       KmpDependInfoRD->completeDefinition();
5238       KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5239     } else {
5240       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5241     }
5242     // Define type kmp_depend_info[<Dependences.size()>];
5243     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5244         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5245         ArrayType::Normal, /*IndexTypeQuals=*/0);
5246     // kmp_depend_info[<Dependences.size()>] deps;
5247     DependenciesArray =
5248         CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5249     for (unsigned I = 0; I < NumDependencies; ++I) {
5250       const Expr *E = Data.Dependences[I].second;
5251       LValue Addr = CGF.EmitLValue(E);
5252       llvm::Value *Size;
5253       QualType Ty = E->getType();
5254       if (const auto *ASE =
5255               dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5256         LValue UpAddrLVal =
5257             CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5258         llvm::Value *UpAddr =
5259             CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5260         llvm::Value *LowIntPtr =
5261             CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5262         llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5263         Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5264       } else {
5265         Size = CGF.getTypeSize(Ty);
5266       }
5267       LValue Base = CGF.MakeAddrLValue(
5268           CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5269           KmpDependInfoTy);
5270       // deps[i].base_addr = &<Dependences[i].second>;
5271       LValue BaseAddrLVal = CGF.EmitLValueForField(
5272           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5273       CGF.EmitStoreOfScalar(
5274           CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5275           BaseAddrLVal);
5276       // deps[i].len = sizeof(<Dependences[i].second>);
5277       LValue LenLVal = CGF.EmitLValueForField(
5278           Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5279       CGF.EmitStoreOfScalar(Size, LenLVal);
5280       // deps[i].flags = <Dependences[i].first>;
5281       RTLDependenceKindTy DepKind;
5282       switch (Data.Dependences[I].first) {
5283       case OMPC_DEPEND_in:
5284         DepKind = DepIn;
5285         break;
5286       // Out and InOut dependencies must use the same code.
5287       case OMPC_DEPEND_out:
5288       case OMPC_DEPEND_inout:
5289         DepKind = DepInOut;
5290         break;
5291       case OMPC_DEPEND_mutexinoutset:
5292         DepKind = DepMutexInOutSet;
5293         break;
5294       case OMPC_DEPEND_source:
5295       case OMPC_DEPEND_sink:
5296       case OMPC_DEPEND_unknown:
5297         llvm_unreachable("Unknown task dependence type");
5298       }
5299       LValue FlagsLVal = CGF.EmitLValueForField(
5300           Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5301       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5302                             FlagsLVal);
5303     }
5304     DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305         CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5306   }
5307
5308   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5309   // libcall.
5310   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5311   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5312   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5313   // list is not empty
5314   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5315   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5316   llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5317   llvm::Value *DepTaskArgs[7];
5318   if (NumDependencies) {
5319     DepTaskArgs[0] = UpLoc;
5320     DepTaskArgs[1] = ThreadID;
5321     DepTaskArgs[2] = NewTask;
5322     DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5323     DepTaskArgs[4] = DependenciesArray.getPointer();
5324     DepTaskArgs[5] = CGF.Builder.getInt32(0);
5325     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5326   }
5327   auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5328                         &TaskArgs,
5329                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5330     if (!Data.Tied) {
5331       auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5332       LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5333       CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5334     }
5335     if (NumDependencies) {
5336       CGF.EmitRuntimeCall(
5337           createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5338     } else {
5339       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5340                           TaskArgs);
5341     }
5342     // Check if parent region is untied and build return for untied task;
5343     if (auto *Region =
5344             dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5345       Region->emitUntiedSwitch(CGF);
5346   };
5347
5348   llvm::Value *DepWaitTaskArgs[6];
5349   if (NumDependencies) {
5350     DepWaitTaskArgs[0] = UpLoc;
5351     DepWaitTaskArgs[1] = ThreadID;
5352     DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5353     DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5354     DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5355     DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5356   }
5357   auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5358                         NumDependencies, &DepWaitTaskArgs,
5359                         Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5360     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5361     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5362     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5363     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5364     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5365     // is specified.
5366     if (NumDependencies)
5367       CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5368                           DepWaitTaskArgs);
5369     // Call proxy_task_entry(gtid, new_task);
5370     auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5371                       Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5372       Action.Enter(CGF);
5373       llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5374       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5375                                                           OutlinedFnArgs);
5376     };
5377
5378     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5379     // kmp_task_t *new_task);
5380     // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5381     // kmp_task_t *new_task);
5382     RegionCodeGenTy RCG(CodeGen);
5383     CommonActionTy Action(
5384         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5385         RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5386     RCG.setAction(Action);
5387     RCG(CGF);
5388   };
5389
5390   if (IfCond) {
5391     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5392   } else {
5393     RegionCodeGenTy ThenRCG(ThenCodeGen);
5394     ThenRCG(CGF);
5395   }
5396 }
5397
5398 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5399                                        const OMPLoopDirective &D,
5400                                        llvm::Function *TaskFunction,
5401                                        QualType SharedsTy, Address Shareds,
5402                                        const Expr *IfCond,
5403                                        const OMPTaskDataTy &Data) {
5404   if (!CGF.HaveInsertPoint())
5405     return;
5406   TaskResultTy Result =
5407       emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5408   // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5409   // libcall.
5410   // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5411   // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5412   // sched, kmp_uint64 grainsize, void *task_dup);
5413   llvm::Value *ThreadID = getThreadID(CGF, Loc);
5414   llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5415   llvm::Value *IfVal;
5416   if (IfCond) {
5417     IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5418                                       /*isSigned=*/true);
5419   } else {
5420     IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5421   }
5422
5423   LValue LBLVal = CGF.EmitLValueForField(
5424       Result.TDBase,
5425       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5426   const auto *LBVar =
5427       cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5428   CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5429                        /*IsInitializer=*/true);
5430   LValue UBLVal = CGF.EmitLValueForField(
5431       Result.TDBase,
5432       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5433   const auto *UBVar =
5434       cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5435   CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5436                        /*IsInitializer=*/true);
5437   LValue StLVal = CGF.EmitLValueForField(
5438       Result.TDBase,
5439       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5440   const auto *StVar =
5441       cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5442   CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5443                        /*IsInitializer=*/true);
5444   // Store reductions address.
5445   LValue RedLVal = CGF.EmitLValueForField(
5446       Result.TDBase,
5447       *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5448   if (Data.Reductions) {
5449     CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5450   } else {
5451     CGF.EmitNullInitialization(RedLVal.getAddress(),
5452                                CGF.getContext().VoidPtrTy);
5453   }
5454   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5455   llvm::Value *TaskArgs[] = {
5456       UpLoc,
5457       ThreadID,
5458       Result.NewTask,
5459       IfVal,
5460       LBLVal.getPointer(),
5461       UBLVal.getPointer(),
5462       CGF.EmitLoadOfScalar(StLVal, Loc),
5463       llvm::ConstantInt::getSigned(
5464               CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5465       llvm::ConstantInt::getSigned(
5466           CGF.IntTy, Data.Schedule.getPointer()
5467                          ? Data.Schedule.getInt() ? NumTasks : Grainsize
5468                          : NoSchedule),
5469       Data.Schedule.getPointer()
5470           ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5471                                       /*isSigned=*/false)
5472           : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5473       Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5474                              Result.TaskDupFn, CGF.VoidPtrTy)
5475                        : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5476   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5477 }
5478
5479 /// Emit reduction operation for each element of array (required for
5480 /// array sections) LHS op = RHS.
5481 /// \param Type Type of array.
5482 /// \param LHSVar Variable on the left side of the reduction operation
5483 /// (references element of array in original variable).
5484 /// \param RHSVar Variable on the right side of the reduction operation
5485 /// (references element of array in original variable).
5486 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5487 /// RHSVar.
5488 static void EmitOMPAggregateReduction(
5489     CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5490     const VarDecl *RHSVar,
5491     const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5492                                   const Expr *, const Expr *)> &RedOpGen,
5493     const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5494     const Expr *UpExpr = nullptr) {
5495   // Perform element-by-element initialization.
5496   QualType ElementTy;
5497   Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5498   Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5499
5500   // Drill down to the base element type on both arrays.
5501   const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5502   llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5503
5504   llvm::Value *RHSBegin = RHSAddr.getPointer();
5505   llvm::Value *LHSBegin = LHSAddr.getPointer();
5506   // Cast from pointer to array type to pointer to single element.
5507   llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5508   // The basic structure here is a while-do loop.
5509   llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5510   llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5511   llvm::Value *IsEmpty =
5512       CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5513   CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5514
5515   // Enter the loop body, making that address the current address.
5516   llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5517   CGF.EmitBlock(BodyBB);
5518
5519   CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5520
5521   llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5522       RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5523   RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5524   Address RHSElementCurrent =
5525       Address(RHSElementPHI,
5526               RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5527
5528   llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5529       LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5530   LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5531   Address LHSElementCurrent =
5532       Address(LHSElementPHI,
5533               LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5534
5535   // Emit copy.
5536   CodeGenFunction::OMPPrivateScope Scope(CGF);
5537   Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5538   Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5539   Scope.Privatize();
5540   RedOpGen(CGF, XExpr, EExpr, UpExpr);
5541   Scope.ForceCleanup();
5542
5543   // Shift the address forward by one element.
5544   llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5545       LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5546   llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5547       RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5548   // Check whether we've reached the end.
5549   llvm::Value *Done =
5550       CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5551   CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5552   LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5553   RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5554
5555   // Done.
5556   CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5557 }
5558
5559 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5560 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5561 /// UDR combiner function.
5562 static void emitReductionCombiner(CodeGenFunction &CGF,
5563                                   const Expr *ReductionOp) {
5564   if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5565     if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5566       if (const auto *DRE =
5567               dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5568         if (const auto *DRD =
5569                 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5570           std::pair<llvm::Function *, llvm::Function *> Reduction =
5571               CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5572           RValue Func = RValue::get(Reduction.first);
5573           CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5574           CGF.EmitIgnoredExpr(ReductionOp);
5575           return;
5576         }
5577   CGF.EmitIgnoredExpr(ReductionOp);
5578 }
5579
5580 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5581     SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5582     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5583     ArrayRef<const Expr *> ReductionOps) {
5584   ASTContext &C = CGM.getContext();
5585
5586   // void reduction_func(void *LHSArg, void *RHSArg);
5587   FunctionArgList Args;
5588   ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5589                            ImplicitParamDecl::Other);
5590   ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5591                            ImplicitParamDecl::Other);
5592   Args.push_back(&LHSArg);
5593   Args.push_back(&RHSArg);
5594   const auto &CGFI =
5595       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5596   std::string Name = getName({"omp", "reduction", "reduction_func"});
5597   auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5598                                     llvm::GlobalValue::InternalLinkage, Name,
5599                                     &CGM.getModule());
5600   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5601   Fn->setDoesNotRecurse();
5602   CodeGenFunction CGF(CGM);
5603   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5604
5605   // Dst = (void*[n])(LHSArg);
5606   // Src = (void*[n])(RHSArg);
5607   Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5608       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5609       ArgsType), CGF.getPointerAlign());
5610   Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5611       CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5612       ArgsType), CGF.getPointerAlign());
5613
5614   //  ...
5615   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5616   //  ...
5617   CodeGenFunction::OMPPrivateScope Scope(CGF);
5618   auto IPriv = Privates.begin();
5619   unsigned Idx = 0;
5620   for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5621     const auto *RHSVar =
5622         cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5623     Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5624       return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5625     });
5626     const auto *LHSVar =
5627         cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5628     Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5629       return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5630     });
5631     QualType PrivTy = (*IPriv)->getType();
5632     if (PrivTy->isVariablyModifiedType()) {
5633       // Get array size and emit VLA type.
5634       ++Idx;
5635       Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5636       llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5637       const VariableArrayType *VLA =
5638           CGF.getContext().getAsVariableArrayType(PrivTy);
5639       const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5640       CodeGenFunction::OpaqueValueMapping OpaqueMap(
5641           CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5642       CGF.EmitVariablyModifiedType(PrivTy);
5643     }
5644   }
5645   Scope.Privatize();
5646   IPriv = Privates.begin();
5647   auto ILHS = LHSExprs.begin();
5648   auto IRHS = RHSExprs.begin();
5649   for (const Expr *E : ReductionOps) {
5650     if ((*IPriv)->getType()->isArrayType()) {
5651       // Emit reduction for array section.
5652       const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5653       const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5654       EmitOMPAggregateReduction(
5655           CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5656           [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5657             emitReductionCombiner(CGF, E);
5658           });
5659     } else {
5660       // Emit reduction for array subscript or single variable.
5661       emitReductionCombiner(CGF, E);
5662     }
5663     ++IPriv;
5664     ++ILHS;
5665     ++IRHS;
5666   }
5667   Scope.ForceCleanup();
5668   CGF.FinishFunction();
5669   return Fn;
5670 }
5671
5672 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5673                                                   const Expr *ReductionOp,
5674                                                   const Expr *PrivateRef,
5675                                                   const DeclRefExpr *LHS,
5676                                                   const DeclRefExpr *RHS) {
5677   if (PrivateRef->getType()->isArrayType()) {
5678     // Emit reduction for array section.
5679     const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5680     const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5681     EmitOMPAggregateReduction(
5682         CGF, PrivateRef->getType(), LHSVar, RHSVar,
5683         [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5684           emitReductionCombiner(CGF, ReductionOp);
5685         });
5686   } else {
5687     // Emit reduction for array subscript or single variable.
5688     emitReductionCombiner(CGF, ReductionOp);
5689   }
5690 }
5691
5692 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5693                                     ArrayRef<const Expr *> Privates,
5694                                     ArrayRef<const Expr *> LHSExprs,
5695                                     ArrayRef<const Expr *> RHSExprs,
5696                                     ArrayRef<const Expr *> ReductionOps,
5697                                     ReductionOptionsTy Options) {
5698   if (!CGF.HaveInsertPoint())
5699     return;
5700
5701   bool WithNowait = Options.WithNowait;
5702   bool SimpleReduction = Options.SimpleReduction;
5703
5704   // Next code should be emitted for reduction:
5705   //
5706   // static kmp_critical_name lock = { 0 };
5707   //
5708   // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5709   //  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5710   //  ...
5711   //  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5712   //  *(Type<n>-1*)rhs[<n>-1]);
5713   // }
5714   //
5715   // ...
5716   // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5717   // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5718   // RedList, reduce_func, &<lock>)) {
5719   // case 1:
5720   //  ...
5721   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5722   //  ...
5723   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5724   // break;
5725   // case 2:
5726   //  ...
5727   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5728   //  ...
5729   // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5730   // break;
5731   // default:;
5732   // }
5733   //
5734   // if SimpleReduction is true, only the next code is generated:
5735   //  ...
5736   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5737   //  ...
5738
5739   ASTContext &C = CGM.getContext();
5740
5741   if (SimpleReduction) {
5742     CodeGenFunction::RunCleanupsScope Scope(CGF);
5743     auto IPriv = Privates.begin();
5744     auto ILHS = LHSExprs.begin();
5745     auto IRHS = RHSExprs.begin();
5746     for (const Expr *E : ReductionOps) {
5747       emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5748                                   cast<DeclRefExpr>(*IRHS));
5749       ++IPriv;
5750       ++ILHS;
5751       ++IRHS;
5752     }
5753     return;
5754   }
5755
5756   // 1. Build a list of reduction variables.
5757   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5758   auto Size = RHSExprs.size();
5759   for (const Expr *E : Privates) {
5760     if (E->getType()->isVariablyModifiedType())
5761       // Reserve place for array size.
5762       ++Size;
5763   }
5764   llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5765   QualType ReductionArrayTy =
5766       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5767                              /*IndexTypeQuals=*/0);
5768   Address ReductionList =
5769       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5770   auto IPriv = Privates.begin();
5771   unsigned Idx = 0;
5772   for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5773     Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5774     CGF.Builder.CreateStore(
5775         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5776             CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5777         Elem);
5778     if ((*IPriv)->getType()->isVariablyModifiedType()) {
5779       // Store array size.
5780       ++Idx;
5781       Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5782       llvm::Value *Size = CGF.Builder.CreateIntCast(
5783           CGF.getVLASize(
5784                  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5785               .NumElts,
5786           CGF.SizeTy, /*isSigned=*/false);
5787       CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5788                               Elem);
5789     }
5790   }
5791
5792   // 2. Emit reduce_func().
5793   llvm::Function *ReductionFn = emitReductionFunction(
5794       Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5795       LHSExprs, RHSExprs, ReductionOps);
5796
5797   // 3. Create static kmp_critical_name lock = { 0 };
5798   std::string Name = getName({"reduction"});
5799   llvm::Value *Lock = getCriticalRegionLock(Name);
5800
5801   // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5802   // RedList, reduce_func, &<lock>);
5803   llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5804   llvm::Value *ThreadId = getThreadID(CGF, Loc);
5805   llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5806   llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5807       ReductionList.getPointer(), CGF.VoidPtrTy);
5808   llvm::Value *Args[] = {
5809       IdentTLoc,                             // ident_t *<loc>
5810       ThreadId,                              // i32 <gtid>
5811       CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5812       ReductionArrayTySize,                  // size_type sizeof(RedList)
5813       RL,                                    // void *RedList
5814       ReductionFn, // void (*) (void *, void *) <reduce_func>
5815       Lock         // kmp_critical_name *&<lock>
5816   };
5817   llvm::Value *Res = CGF.EmitRuntimeCall(
5818       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5819                                        : OMPRTL__kmpc_reduce),
5820       Args);
5821
5822   // 5. Build switch(res)
5823   llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5824   llvm::SwitchInst *SwInst =
5825       CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5826
5827   // 6. Build case 1:
5828   //  ...
5829   //  <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5830   //  ...
5831   // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5832   // break;
5833   llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5834   SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5835   CGF.EmitBlock(Case1BB);
5836
5837   // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5838   llvm::Value *EndArgs[] = {
5839       IdentTLoc, // ident_t *<loc>
5840       ThreadId,  // i32 <gtid>
5841       Lock       // kmp_critical_name *&<lock>
5842   };
5843   auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5844                        CodeGenFunction &CGF, PrePostActionTy &Action) {
5845     CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5846     auto IPriv = Privates.begin();
5847     auto ILHS = LHSExprs.begin();
5848     auto IRHS = RHSExprs.begin();
5849     for (const Expr *E : ReductionOps) {
5850       RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5851                                      cast<DeclRefExpr>(*IRHS));
5852       ++IPriv;
5853       ++ILHS;
5854       ++IRHS;
5855     }
5856   };
5857   RegionCodeGenTy RCG(CodeGen);
5858   CommonActionTy Action(
5859       nullptr, llvm::None,
5860       createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5861                                        : OMPRTL__kmpc_end_reduce),
5862       EndArgs);
5863   RCG.setAction(Action);
5864   RCG(CGF);
5865
5866   CGF.EmitBranch(DefaultBB);
5867
5868   // 7. Build case 2:
5869   //  ...
5870   //  Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5871   //  ...
5872   // break;
5873   llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5874   SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5875   CGF.EmitBlock(Case2BB);
5876
5877   auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5878                              CodeGenFunction &CGF, PrePostActionTy &Action) {
5879     auto ILHS = LHSExprs.begin();
5880     auto IRHS = RHSExprs.begin();
5881     auto IPriv = Privates.begin();
5882     for (const Expr *E : ReductionOps) {
5883       const Expr *XExpr = nullptr;
5884       const Expr *EExpr = nullptr;
5885       const Expr *UpExpr = nullptr;
5886       BinaryOperatorKind BO = BO_Comma;
5887       if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5888         if (BO->getOpcode() == BO_Assign) {
5889           XExpr = BO->getLHS();
5890           UpExpr = BO->getRHS();
5891         }
5892       }
5893       // Try to emit update expression as a simple atomic.
5894       const Expr *RHSExpr = UpExpr;
5895       if (RHSExpr) {
5896         // Analyze RHS part of the whole expression.
5897         if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5898                 RHSExpr->IgnoreParenImpCasts())) {
5899           // If this is a conditional operator, analyze its condition for
5900           // min/max reduction operator.
5901           RHSExpr = ACO->getCond();
5902         }
5903         if (const auto *BORHS =
5904                 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5905           EExpr = BORHS->getRHS();
5906           BO = BORHS->getOpcode();
5907         }
5908       }
5909       if (XExpr) {
5910         const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5911         auto &&AtomicRedGen = [BO, VD,
5912                                Loc](CodeGenFunction &CGF, const Expr *XExpr,
5913                                     const Expr *EExpr, const Expr *UpExpr) {
5914           LValue X = CGF.EmitLValue(XExpr);
5915           RValue E;
5916           if (EExpr)
5917             E = CGF.EmitAnyExpr(EExpr);
5918           CGF.EmitOMPAtomicSimpleUpdateExpr(
5919               X, E, BO, /*IsXLHSInRHSPart=*/true,
5920               llvm::AtomicOrdering::Monotonic, Loc,
5921               [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5922                 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5923                 PrivateScope.addPrivate(
5924                     VD, [&CGF, VD, XRValue, Loc]() {
5925                       Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5926                       CGF.emitOMPSimpleStore(
5927                           CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5928                           VD->getType().getNonReferenceType(), Loc);
5929                       return LHSTemp;
5930                     });
5931                 (void)PrivateScope.Privatize();
5932                 return CGF.EmitAnyExpr(UpExpr);
5933               });
5934         };
5935         if ((*IPriv)->getType()->isArrayType()) {
5936           // Emit atomic reduction for array section.
5937           const auto *RHSVar =
5938               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5939           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5940                                     AtomicRedGen, XExpr, EExpr, UpExpr);
5941         } else {
5942           // Emit atomic reduction for array subscript or single variable.
5943           AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5944         }
5945       } else {
5946         // Emit as a critical region.
5947         auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5948                                            const Expr *, const Expr *) {
5949           CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5950           std::string Name = RT.getName({"atomic_reduction"});
5951           RT.emitCriticalRegion(
5952               CGF, Name,
5953               [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5954                 Action.Enter(CGF);
5955                 emitReductionCombiner(CGF, E);
5956               },
5957               Loc);
5958         };
5959         if ((*IPriv)->getType()->isArrayType()) {
5960           const auto *LHSVar =
5961               cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5962           const auto *RHSVar =
5963               cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5964           EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5965                                     CritRedGen);
5966         } else {
5967           CritRedGen(CGF, nullptr, nullptr, nullptr);
5968         }
5969       }
5970       ++ILHS;
5971       ++IRHS;
5972       ++IPriv;
5973     }
5974   };
5975   RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5976   if (!WithNowait) {
5977     // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5978     llvm::Value *EndArgs[] = {
5979         IdentTLoc, // ident_t *<loc>
5980         ThreadId,  // i32 <gtid>
5981         Lock       // kmp_critical_name *&<lock>
5982     };
5983     CommonActionTy Action(nullptr, llvm::None,
5984                           createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5985                           EndArgs);
5986     AtomicRCG.setAction(Action);
5987     AtomicRCG(CGF);
5988   } else {
5989     AtomicRCG(CGF);
5990   }
5991
5992   CGF.EmitBranch(DefaultBB);
5993   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5994 }
5995
5996 /// Generates unique name for artificial threadprivate variables.
5997 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5998 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5999                                       const Expr *Ref) {
6000   SmallString<256> Buffer;
6001   llvm::raw_svector_ostream Out(Buffer);
6002   const clang::DeclRefExpr *DE;
6003   const VarDecl *D = ::getBaseDecl(Ref, DE);
6004   if (!D)
6005     D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6006   D = D->getCanonicalDecl();
6007   std::string Name = CGM.getOpenMPRuntime().getName(
6008       {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6009   Out << Prefix << Name << "_"
6010       << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6011   return Out.str();
6012 }
6013
6014 /// Emits reduction initializer function:
6015 /// \code
6016 /// void @.red_init(void* %arg) {
6017 /// %0 = bitcast void* %arg to <type>*
6018 /// store <type> <init>, <type>* %0
6019 /// ret void
6020 /// }
6021 /// \endcode
6022 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6023                                            SourceLocation Loc,
6024                                            ReductionCodeGen &RCG, unsigned N) {
6025   ASTContext &C = CGM.getContext();
6026   FunctionArgList Args;
6027   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6028                           ImplicitParamDecl::Other);
6029   Args.emplace_back(&Param);
6030   const auto &FnInfo =
6031       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6032   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6033   std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6034   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6035                                     Name, &CGM.getModule());
6036   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6037   Fn->setDoesNotRecurse();
6038   CodeGenFunction CGF(CGM);
6039   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6040   Address PrivateAddr = CGF.EmitLoadOfPointer(
6041       CGF.GetAddrOfLocalVar(&Param),
6042       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6043   llvm::Value *Size = nullptr;
6044   // If the size of the reduction item is non-constant, load it from global
6045   // threadprivate variable.
6046   if (RCG.getSizes(N).second) {
6047     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6048         CGF, CGM.getContext().getSizeType(),
6049         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6050     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6051                                 CGM.getContext().getSizeType(), Loc);
6052   }
6053   RCG.emitAggregateType(CGF, N, Size);
6054   LValue SharedLVal;
6055   // If initializer uses initializer from declare reduction construct, emit a
6056   // pointer to the address of the original reduction item (reuired by reduction
6057   // initializer)
6058   if (RCG.usesReductionInitializer(N)) {
6059     Address SharedAddr =
6060         CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6061             CGF, CGM.getContext().VoidPtrTy,
6062             generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6063     SharedAddr = CGF.EmitLoadOfPointer(
6064         SharedAddr,
6065         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6066     SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6067   } else {
6068     SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6069         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6070         CGM.getContext().VoidPtrTy);
6071   }
6072   // Emit the initializer:
6073   // %0 = bitcast void* %arg to <type>*
6074   // store <type> <init>, <type>* %0
6075   RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6076                          [](CodeGenFunction &) { return false; });
6077   CGF.FinishFunction();
6078   return Fn;
6079 }
6080
6081 /// Emits reduction combiner function:
6082 /// \code
6083 /// void @.red_comb(void* %arg0, void* %arg1) {
6084 /// %lhs = bitcast void* %arg0 to <type>*
6085 /// %rhs = bitcast void* %arg1 to <type>*
6086 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6087 /// store <type> %2, <type>* %lhs
6088 /// ret void
6089 /// }
6090 /// \endcode
6091 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6092                                            SourceLocation Loc,
6093                                            ReductionCodeGen &RCG, unsigned N,
6094                                            const Expr *ReductionOp,
6095                                            const Expr *LHS, const Expr *RHS,
6096                                            const Expr *PrivateRef) {
6097   ASTContext &C = CGM.getContext();
6098   const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6099   const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6100   FunctionArgList Args;
6101   ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6102                                C.VoidPtrTy, ImplicitParamDecl::Other);
6103   ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6104                             ImplicitParamDecl::Other);
6105   Args.emplace_back(&ParamInOut);
6106   Args.emplace_back(&ParamIn);
6107   const auto &FnInfo =
6108       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6109   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6110   std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6111   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6112                                     Name, &CGM.getModule());
6113   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6114   Fn->setDoesNotRecurse();
6115   CodeGenFunction CGF(CGM);
6116   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6117   llvm::Value *Size = nullptr;
6118   // If the size of the reduction item is non-constant, load it from global
6119   // threadprivate variable.
6120   if (RCG.getSizes(N).second) {
6121     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6122         CGF, CGM.getContext().getSizeType(),
6123         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6124     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6125                                 CGM.getContext().getSizeType(), Loc);
6126   }
6127   RCG.emitAggregateType(CGF, N, Size);
6128   // Remap lhs and rhs variables to the addresses of the function arguments.
6129   // %lhs = bitcast void* %arg0 to <type>*
6130   // %rhs = bitcast void* %arg1 to <type>*
6131   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6132   PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6133     // Pull out the pointer to the variable.
6134     Address PtrAddr = CGF.EmitLoadOfPointer(
6135         CGF.GetAddrOfLocalVar(&ParamInOut),
6136         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6137     return CGF.Builder.CreateElementBitCast(
6138         PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6139   });
6140   PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6141     // Pull out the pointer to the variable.
6142     Address PtrAddr = CGF.EmitLoadOfPointer(
6143         CGF.GetAddrOfLocalVar(&ParamIn),
6144         C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6145     return CGF.Builder.CreateElementBitCast(
6146         PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6147   });
6148   PrivateScope.Privatize();
6149   // Emit the combiner body:
6150   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6151   // store <type> %2, <type>* %lhs
6152   CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6153       CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6154       cast<DeclRefExpr>(RHS));
6155   CGF.FinishFunction();
6156   return Fn;
6157 }
6158
6159 /// Emits reduction finalizer function:
6160 /// \code
6161 /// void @.red_fini(void* %arg) {
6162 /// %0 = bitcast void* %arg to <type>*
6163 /// <destroy>(<type>* %0)
6164 /// ret void
6165 /// }
6166 /// \endcode
6167 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6168                                            SourceLocation Loc,
6169                                            ReductionCodeGen &RCG, unsigned N) {
6170   if (!RCG.needCleanups(N))
6171     return nullptr;
6172   ASTContext &C = CGM.getContext();
6173   FunctionArgList Args;
6174   ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6175                           ImplicitParamDecl::Other);
6176   Args.emplace_back(&Param);
6177   const auto &FnInfo =
6178       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6179   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6180   std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6181   auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6182                                     Name, &CGM.getModule());
6183   CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6184   Fn->setDoesNotRecurse();
6185   CodeGenFunction CGF(CGM);
6186   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6187   Address PrivateAddr = CGF.EmitLoadOfPointer(
6188       CGF.GetAddrOfLocalVar(&Param),
6189       C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6190   llvm::Value *Size = nullptr;
6191   // If the size of the reduction item is non-constant, load it from global
6192   // threadprivate variable.
6193   if (RCG.getSizes(N).second) {
6194     Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6195         CGF, CGM.getContext().getSizeType(),
6196         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6197     Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6198                                 CGM.getContext().getSizeType(), Loc);
6199   }
6200   RCG.emitAggregateType(CGF, N, Size);
6201   // Emit the finalizer body:
6202   // <destroy>(<type>* %0)
6203   RCG.emitCleanups(CGF, N, PrivateAddr);
6204   CGF.FinishFunction();
6205   return Fn;
6206 }
6207
6208 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6209     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6210     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6211   if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6212     return nullptr;
6213
6214   // Build typedef struct:
6215   // kmp_task_red_input {
6216   //   void *reduce_shar; // shared reduction item
6217   //   size_t reduce_size; // size of data item
6218   //   void *reduce_init; // data initialization routine
6219   //   void *reduce_fini; // data finalization routine
6220   //   void *reduce_comb; // data combiner routine
6221   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
6222   // } kmp_task_red_input_t;
6223   ASTContext &C = CGM.getContext();
6224   RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6225   RD->startDefinition();
6226   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6227   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6228   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6229   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6230   const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6231   const FieldDecl *FlagsFD = addFieldToRecordDecl(
6232       C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6233   RD->completeDefinition();
6234   QualType RDType = C.getRecordType(RD);
6235   unsigned Size = Data.ReductionVars.size();
6236   llvm::APInt ArraySize(/*numBits=*/64, Size);
6237   QualType ArrayRDType = C.getConstantArrayType(
6238       RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6239   // kmp_task_red_input_t .rd_input.[Size];
6240   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6241   ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6242                        Data.ReductionOps);
6243   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6244     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6245     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6246                            llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6247     llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6248         TaskRedInput.getPointer(), Idxs,
6249         /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6250         ".rd_input.gep.");
6251     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6252     // ElemLVal.reduce_shar = &Shareds[Cnt];
6253     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6254     RCG.emitSharedLValue(CGF, Cnt);
6255     llvm::Value *CastedShared =
6256         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6257     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6258     RCG.emitAggregateType(CGF, Cnt);
6259     llvm::Value *SizeValInChars;
6260     llvm::Value *SizeVal;
6261     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6262     // We use delayed creation/initialization for VLAs, array sections and
6263     // custom reduction initializations. It is required because runtime does not
6264     // provide the way to pass the sizes of VLAs/array sections to
6265     // initializer/combiner/finalizer functions and does not pass the pointer to
6266     // original reduction item to the initializer. Instead threadprivate global
6267     // variables are used to store these values and use them in the functions.
6268     bool DelayedCreation = !!SizeVal;
6269     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6270                                                /*isSigned=*/false);
6271     LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6272     CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6273     // ElemLVal.reduce_init = init;
6274     LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6275     llvm::Value *InitAddr =
6276         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6277     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6278     DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6279     // ElemLVal.reduce_fini = fini;
6280     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6281     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6282     llvm::Value *FiniAddr = Fini
6283                                 ? CGF.EmitCastToVoidPtr(Fini)
6284                                 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6285     CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6286     // ElemLVal.reduce_comb = comb;
6287     LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6288     llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6289         CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6290         RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6291     CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6292     // ElemLVal.flags = 0;
6293     LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6294     if (DelayedCreation) {
6295       CGF.EmitStoreOfScalar(
6296           llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6297           FlagsLVal);
6298     } else
6299       CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6300   }
6301   // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6302   // *data);
6303   llvm::Value *Args[] = {
6304       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6305                                 /*isSigned=*/true),
6306       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6307       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6308                                                       CGM.VoidPtrTy)};
6309   return CGF.EmitRuntimeCall(
6310       createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6311 }
6312
6313 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6314                                               SourceLocation Loc,
6315                                               ReductionCodeGen &RCG,
6316                                               unsigned N) {
6317   auto Sizes = RCG.getSizes(N);
6318   // Emit threadprivate global variable if the type is non-constant
6319   // (Sizes.second = nullptr).
6320   if (Sizes.second) {
6321     llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6322                                                      /*isSigned=*/false);
6323     Address SizeAddr = getAddrOfArtificialThreadPrivate(
6324         CGF, CGM.getContext().getSizeType(),
6325         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6326     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6327   }
6328   // Store address of the original reduction item if custom initializer is used.
6329   if (RCG.usesReductionInitializer(N)) {
6330     Address SharedAddr = getAddrOfArtificialThreadPrivate(
6331         CGF, CGM.getContext().VoidPtrTy,
6332         generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6333     CGF.Builder.CreateStore(
6334         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6335             RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6336         SharedAddr, /*IsVolatile=*/false);
6337   }
6338 }
6339
6340 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6341                                               SourceLocation Loc,
6342                                               llvm::Value *ReductionsPtr,
6343                                               LValue SharedLVal) {
6344   // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6345   // *d);
6346   llvm::Value *Args[] = {
6347       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6348                                 /*isSigned=*/true),
6349       ReductionsPtr,
6350       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6351                                                       CGM.VoidPtrTy)};
6352   return Address(
6353       CGF.EmitRuntimeCall(
6354           createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6355       SharedLVal.getAlignment());
6356 }
6357
6358 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6359                                        SourceLocation Loc) {
6360   if (!CGF.HaveInsertPoint())
6361     return;
6362   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6363   // global_tid);
6364   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6365   // Ignore return result until untied tasks are supported.
6366   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6367   if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6368     Region->emitUntiedSwitch(CGF);
6369 }
6370
6371 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6372                                            OpenMPDirectiveKind InnerKind,
6373                                            const RegionCodeGenTy &CodeGen,
6374                                            bool HasCancel) {
6375   if (!CGF.HaveInsertPoint())
6376     return;
6377   InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6378   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6379 }
6380
6381 namespace {
6382 enum RTCancelKind {
6383   CancelNoreq = 0,
6384   CancelParallel = 1,
6385   CancelLoop = 2,
6386   CancelSections = 3,
6387   CancelTaskgroup = 4
6388 };
6389 } // anonymous namespace
6390
6391 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6392   RTCancelKind CancelKind = CancelNoreq;
6393   if (CancelRegion == OMPD_parallel)
6394     CancelKind = CancelParallel;
6395   else if (CancelRegion == OMPD_for)
6396     CancelKind = CancelLoop;
6397   else if (CancelRegion == OMPD_sections)
6398     CancelKind = CancelSections;
6399   else {
6400     assert(CancelRegion == OMPD_taskgroup);
6401     CancelKind = CancelTaskgroup;
6402   }
6403   return CancelKind;
6404 }
6405
6406 void CGOpenMPRuntime::emitCancellationPointCall(
6407     CodeGenFunction &CGF, SourceLocation Loc,
6408     OpenMPDirectiveKind CancelRegion) {
6409   if (!CGF.HaveInsertPoint())
6410     return;
6411   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6412   // global_tid, kmp_int32 cncl_kind);
6413   if (auto *OMPRegionInfo =
6414           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6415     // For 'cancellation point taskgroup', the task region info may not have a
6416     // cancel. This may instead happen in another adjacent task.
6417     if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6418       llvm::Value *Args[] = {
6419           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6420           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6421       // Ignore return result until untied tasks are supported.
6422       llvm::Value *Result = CGF.EmitRuntimeCall(
6423           createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6424       // if (__kmpc_cancellationpoint()) {
6425       //   exit from construct;
6426       // }
6427       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6428       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6429       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6430       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6431       CGF.EmitBlock(ExitBB);
6432       // exit from construct;
6433       CodeGenFunction::JumpDest CancelDest =
6434           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6435       CGF.EmitBranchThroughCleanup(CancelDest);
6436       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6437     }
6438   }
6439 }
6440
6441 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6442                                      const Expr *IfCond,
6443                                      OpenMPDirectiveKind CancelRegion) {
6444   if (!CGF.HaveInsertPoint())
6445     return;
6446   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6447   // kmp_int32 cncl_kind);
6448   if (auto *OMPRegionInfo =
6449           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6450     auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6451                                                         PrePostActionTy &) {
6452       CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6453       llvm::Value *Args[] = {
6454           RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6455           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6456       // Ignore return result until untied tasks are supported.
6457       llvm::Value *Result = CGF.EmitRuntimeCall(
6458           RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6459       // if (__kmpc_cancel()) {
6460       //   exit from construct;
6461       // }
6462       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6463       llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6464       llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6465       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6466       CGF.EmitBlock(ExitBB);
6467       // exit from construct;
6468       CodeGenFunction::JumpDest CancelDest =
6469           CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6470       CGF.EmitBranchThroughCleanup(CancelDest);
6471       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6472     };
6473     if (IfCond) {
6474       emitOMPIfClause(CGF, IfCond, ThenGen,
6475                       [](CodeGenFunction &, PrePostActionTy &) {});
6476     } else {
6477       RegionCodeGenTy ThenRCG(ThenGen);
6478       ThenRCG(CGF);
6479     }
6480   }
6481 }
6482
6483 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6484     const OMPExecutableDirective &D, StringRef ParentName,
6485     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6486     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6487   assert(!ParentName.empty() && "Invalid target region parent name!");
6488   HasEmittedTargetRegion = true;
6489   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6490                                    IsOffloadEntry, CodeGen);
6491 }
6492
6493 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6494     const OMPExecutableDirective &D, StringRef ParentName,
6495     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6496     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6497   // Create a unique name for the entry function using the source location
6498   // information of the current target region. The name will be something like:
6499   //
6500   // __omp_offloading_DD_FFFF_PP_lBB
6501   //
6502   // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6503   // mangled name of the function that encloses the target region and BB is the
6504   // line number of the target region.
6505
6506   unsigned DeviceID;
6507   unsigned FileID;
6508   unsigned Line;
6509   getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6510                            Line);
6511   SmallString<64> EntryFnName;
6512   {
6513     llvm::raw_svector_ostream OS(EntryFnName);
6514     OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6515        << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6516   }
6517
6518   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6519
6520   CodeGenFunction CGF(CGM, true);
6521   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6522   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6523
6524   OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6525
6526   // If this target outline function is not an offload entry, we don't need to
6527   // register it.
6528   if (!IsOffloadEntry)
6529     return;
6530
6531   // The target region ID is used by the runtime library to identify the current
6532   // target region, so it only has to be unique and not necessarily point to
6533   // anything. It could be the pointer to the outlined function that implements
6534   // the target region, but we aren't using that so that the compiler doesn't
6535   // need to keep that, and could therefore inline the host function if proven
6536   // worthwhile during optimization. In the other hand, if emitting code for the
6537   // device, the ID has to be the function address so that it can retrieved from
6538   // the offloading entry and launched by the runtime library. We also mark the
6539   // outlined function to have external linkage in case we are emitting code for
6540   // the device, because these functions will be entry points to the device.
6541
6542   if (CGM.getLangOpts().OpenMPIsDevice) {
6543     OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6544     OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6545     OutlinedFn->setDSOLocal(false);
6546   } else {
6547     std::string Name = getName({EntryFnName, "region_id"});
6548     OutlinedFnID = new llvm::GlobalVariable(
6549         CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6550         llvm::GlobalValue::WeakAnyLinkage,
6551         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6552   }
6553
6554   // Register the information for the entry associated with this target region.
6555   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6556       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6557       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6558 }
6559
6560 /// Checks if the expression is constant or does not have non-trivial function
6561 /// calls.
6562 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6563   // We can skip constant expressions.
6564   // We can skip expressions with trivial calls or simple expressions.
6565   return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6566           !E->hasNonTrivialCall(Ctx)) &&
6567          !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6568 }
6569
6570 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6571                                                     const Stmt *Body) {
6572   const Stmt *Child = Body->IgnoreContainers();
6573   while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6574     Child = nullptr;
6575     for (const Stmt *S : C->body()) {
6576       if (const auto *E = dyn_cast<Expr>(S)) {
6577         if (isTrivial(Ctx, E))
6578           continue;
6579       }
6580       // Some of the statements can be ignored.
6581       if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6582           isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6583         continue;
6584       // Analyze declarations.
6585       if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6586         if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6587               if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6588                   isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6589                   isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6590                   isa<UsingDirectiveDecl>(D) ||
6591                   isa<OMPDeclareReductionDecl>(D) ||
6592                   isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6593                 return true;
6594               const auto *VD = dyn_cast<VarDecl>(D);
6595               if (!VD)
6596                 return false;
6597               return VD->isConstexpr() ||
6598                      ((VD->getType().isTrivialType(Ctx) ||
6599                        VD->getType()->isReferenceType()) &&
6600                       (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6601             }))
6602           continue;
6603       }
6604       // Found multiple children - cannot get the one child only.
6605       if (Child)
6606         return nullptr;
6607       Child = S;
6608     }
6609     if (Child)
6610       Child = Child->IgnoreContainers();
6611   }
6612   return Child;
6613 }
6614
6615 /// Emit the number of teams for a target directive.  Inspect the num_teams
6616 /// clause associated with a teams construct combined or closely nested
6617 /// with the target directive.
6618 ///
6619 /// Emit a team of size one for directives such as 'target parallel' that
6620 /// have no associated teams construct.
6621 ///
6622 /// Otherwise, return nullptr.
6623 static llvm::Value *
6624 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6625                                const OMPExecutableDirective &D) {
6626   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6627          "Clauses associated with the teams directive expected to be emitted "
6628          "only for the host!");
6629   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6630   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6631          "Expected target-based executable directive.");
6632   CGBuilderTy &Bld = CGF.Builder;
6633   switch (DirectiveKind) {
6634   case OMPD_target: {
6635     const auto *CS = D.getInnermostCapturedStmt();
6636     const auto *Body =
6637         CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6638     const Stmt *ChildStmt =
6639         CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6640     if (const auto *NestedDir =
6641             dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6642       if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6643         if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6644           CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6645           CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6646           const Expr *NumTeams =
6647               NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6648           llvm::Value *NumTeamsVal =
6649               CGF.EmitScalarExpr(NumTeams,
6650                                  /*IgnoreResultAssign*/ true);
6651           return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6652                                    /*isSigned=*/true);
6653         }
6654         return Bld.getInt32(0);
6655       }
6656       if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6657           isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6658         return Bld.getInt32(1);
6659       return Bld.getInt32(0);
6660     }
6661     return nullptr;
6662   }
6663   case OMPD_target_teams:
6664   case OMPD_target_teams_distribute:
6665   case OMPD_target_teams_distribute_simd:
6666   case OMPD_target_teams_distribute_parallel_for:
6667   case OMPD_target_teams_distribute_parallel_for_simd: {
6668     if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6669       CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6670       const Expr *NumTeams =
6671           D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672       llvm::Value *NumTeamsVal =
6673           CGF.EmitScalarExpr(NumTeams,
6674                              /*IgnoreResultAssign*/ true);
6675       return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6676                                /*isSigned=*/true);
6677     }
6678     return Bld.getInt32(0);
6679   }
6680   case OMPD_target_parallel:
6681   case OMPD_target_parallel_for:
6682   case OMPD_target_parallel_for_simd:
6683   case OMPD_target_simd:
6684     return Bld.getInt32(1);
6685   case OMPD_parallel:
6686   case OMPD_for:
6687   case OMPD_parallel_for:
6688   case OMPD_parallel_sections:
6689   case OMPD_for_simd:
6690   case OMPD_parallel_for_simd:
6691   case OMPD_cancel:
6692   case OMPD_cancellation_point:
6693   case OMPD_ordered:
6694   case OMPD_threadprivate:
6695   case OMPD_allocate:
6696   case OMPD_task:
6697   case OMPD_simd:
6698   case OMPD_sections:
6699   case OMPD_section:
6700   case OMPD_single:
6701   case OMPD_master:
6702   case OMPD_critical:
6703   case OMPD_taskyield:
6704   case OMPD_barrier:
6705   case OMPD_taskwait:
6706   case OMPD_taskgroup:
6707   case OMPD_atomic:
6708   case OMPD_flush:
6709   case OMPD_teams:
6710   case OMPD_target_data:
6711   case OMPD_target_exit_data:
6712   case OMPD_target_enter_data:
6713   case OMPD_distribute:
6714   case OMPD_distribute_simd:
6715   case OMPD_distribute_parallel_for:
6716   case OMPD_distribute_parallel_for_simd:
6717   case OMPD_teams_distribute:
6718   case OMPD_teams_distribute_simd:
6719   case OMPD_teams_distribute_parallel_for:
6720   case OMPD_teams_distribute_parallel_for_simd:
6721   case OMPD_target_update:
6722   case OMPD_declare_simd:
6723   case OMPD_declare_target:
6724   case OMPD_end_declare_target:
6725   case OMPD_declare_reduction:
6726   case OMPD_declare_mapper:
6727   case OMPD_taskloop:
6728   case OMPD_taskloop_simd:
6729   case OMPD_requires:
6730   case OMPD_unknown:
6731     break;
6732   }
6733   llvm_unreachable("Unexpected directive kind.");
6734 }
6735
6736 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6737                                   llvm::Value *DefaultThreadLimitVal) {
6738   const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6739       CGF.getContext(), CS->getCapturedStmt());
6740   if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6741     if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6742       llvm::Value *NumThreads = nullptr;
6743       llvm::Value *CondVal = nullptr;
6744       // Handle if clause. If if clause present, the number of threads is
6745       // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6746       if (Dir->hasClausesOfKind<OMPIfClause>()) {
6747         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6748         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6749         const OMPIfClause *IfClause = nullptr;
6750         for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6751           if (C->getNameModifier() == OMPD_unknown ||
6752               C->getNameModifier() == OMPD_parallel) {
6753             IfClause = C;
6754             break;
6755           }
6756         }
6757         if (IfClause) {
6758           const Expr *Cond = IfClause->getCondition();
6759           bool Result;
6760           if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6761             if (!Result)
6762               return CGF.Builder.getInt32(1);
6763           } else {
6764             CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6765             if (const auto *PreInit =
6766                     cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6767               for (const auto *I : PreInit->decls()) {
6768                 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6769                   CGF.EmitVarDecl(cast<VarDecl>(*I));
6770                 } else {
6771                   CodeGenFunction::AutoVarEmission Emission =
6772                       CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6773                   CGF.EmitAutoVarCleanups(Emission);
6774                 }
6775               }
6776             }
6777             CondVal = CGF.EvaluateExprAsBool(Cond);
6778           }
6779         }
6780       }
6781       // Check the value of num_threads clause iff if clause was not specified
6782       // or is not evaluated to false.
6783       if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6784         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6785         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6786         const auto *NumThreadsClause =
6787             Dir->getSingleClause<OMPNumThreadsClause>();
6788         CodeGenFunction::LexicalScope Scope(
6789             CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6790         if (const auto *PreInit =
6791                 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6792           for (const auto *I : PreInit->decls()) {
6793             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6794               CGF.EmitVarDecl(cast<VarDecl>(*I));
6795             } else {
6796               CodeGenFunction::AutoVarEmission Emission =
6797                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6798               CGF.EmitAutoVarCleanups(Emission);
6799             }
6800           }
6801         }
6802         NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6803         NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6804                                                /*isSigned=*/false);
6805         if (DefaultThreadLimitVal)
6806           NumThreads = CGF.Builder.CreateSelect(
6807               CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6808               DefaultThreadLimitVal, NumThreads);
6809       } else {
6810         NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6811                                            : CGF.Builder.getInt32(0);
6812       }
6813       // Process condition of the if clause.
6814       if (CondVal) {
6815         NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6816                                               CGF.Builder.getInt32(1));
6817       }
6818       return NumThreads;
6819     }
6820     if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6821       return CGF.Builder.getInt32(1);
6822     return DefaultThreadLimitVal;
6823   }
6824   return DefaultThreadLimitVal ? DefaultThreadLimitVal
6825                                : CGF.Builder.getInt32(0);
6826 }
6827
6828 /// Emit the number of threads for a target directive.  Inspect the
6829 /// thread_limit clause associated with a teams construct combined or closely
6830 /// nested with the target directive.
6831 ///
6832 /// Emit the num_threads clause for directives such as 'target parallel' that
6833 /// have no associated teams construct.
6834 ///
6835 /// Otherwise, return nullptr.
6836 static llvm::Value *
6837 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6838                                  const OMPExecutableDirective &D) {
6839   assert(!CGF.getLangOpts().OpenMPIsDevice &&
6840          "Clauses associated with the teams directive expected to be emitted "
6841          "only for the host!");
6842   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6843   assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6844          "Expected target-based executable directive.");
6845   CGBuilderTy &Bld = CGF.Builder;
6846   llvm::Value *ThreadLimitVal = nullptr;
6847   llvm::Value *NumThreadsVal = nullptr;
6848   switch (DirectiveKind) {
6849   case OMPD_target: {
6850     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6851     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6852       return NumThreads;
6853     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6854         CGF.getContext(), CS->getCapturedStmt());
6855     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6856       if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6857         CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6858         CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6859         const auto *ThreadLimitClause =
6860             Dir->getSingleClause<OMPThreadLimitClause>();
6861         CodeGenFunction::LexicalScope Scope(
6862             CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6863         if (const auto *PreInit =
6864                 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6865           for (const auto *I : PreInit->decls()) {
6866             if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6867               CGF.EmitVarDecl(cast<VarDecl>(*I));
6868             } else {
6869               CodeGenFunction::AutoVarEmission Emission =
6870                   CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6871               CGF.EmitAutoVarCleanups(Emission);
6872             }
6873           }
6874         }
6875         llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6876             ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6877         ThreadLimitVal =
6878             Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6879       }
6880       if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6881           !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6882         CS = Dir->getInnermostCapturedStmt();
6883         const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6884             CGF.getContext(), CS->getCapturedStmt());
6885         Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6886       }
6887       if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6888           !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6889         CS = Dir->getInnermostCapturedStmt();
6890         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6891           return NumThreads;
6892       }
6893       if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6894         return Bld.getInt32(1);
6895     }
6896     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6897   }
6898   case OMPD_target_teams: {
6899     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6900       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6901       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6902       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6903           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6904       ThreadLimitVal =
6905           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6906     }
6907     const CapturedStmt *CS = D.getInnermostCapturedStmt();
6908     if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6909       return NumThreads;
6910     const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6911         CGF.getContext(), CS->getCapturedStmt());
6912     if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6913       if (Dir->getDirectiveKind() == OMPD_distribute) {
6914         CS = Dir->getInnermostCapturedStmt();
6915         if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6916           return NumThreads;
6917       }
6918     }
6919     return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6920   }
6921   case OMPD_target_teams_distribute:
6922     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6923       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6924       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6925       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6926           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6927       ThreadLimitVal =
6928           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6929     }
6930     return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6931   case OMPD_target_parallel:
6932   case OMPD_target_parallel_for:
6933   case OMPD_target_parallel_for_simd:
6934   case OMPD_target_teams_distribute_parallel_for:
6935   case OMPD_target_teams_distribute_parallel_for_simd: {
6936     llvm::Value *CondVal = nullptr;
6937     // Handle if clause. If if clause present, the number of threads is
6938     // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6939     if (D.hasClausesOfKind<OMPIfClause>()) {
6940       const OMPIfClause *IfClause = nullptr;
6941       for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6942         if (C->getNameModifier() == OMPD_unknown ||
6943             C->getNameModifier() == OMPD_parallel) {
6944           IfClause = C;
6945           break;
6946         }
6947       }
6948       if (IfClause) {
6949         const Expr *Cond = IfClause->getCondition();
6950         bool Result;
6951         if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6952           if (!Result)
6953             return Bld.getInt32(1);
6954         } else {
6955           CodeGenFunction::RunCleanupsScope Scope(CGF);
6956           CondVal = CGF.EvaluateExprAsBool(Cond);
6957         }
6958       }
6959     }
6960     if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6961       CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6962       const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6963       llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6964           ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6965       ThreadLimitVal =
6966           Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6967     }
6968     if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6969       CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6970       const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6971       llvm::Value *NumThreads = CGF.EmitScalarExpr(
6972           NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6973       NumThreadsVal =
6974           Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6975       ThreadLimitVal = ThreadLimitVal
6976                            ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6977                                                                 ThreadLimitVal),
6978                                               NumThreadsVal, ThreadLimitVal)
6979                            : NumThreadsVal;
6980     }
6981     if (!ThreadLimitVal)
6982       ThreadLimitVal = Bld.getInt32(0);
6983     if (CondVal)
6984       return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6985     return ThreadLimitVal;
6986   }
6987   case OMPD_target_teams_distribute_simd:
6988   case OMPD_target_simd:
6989     return Bld.getInt32(1);
6990   case OMPD_parallel:
6991   case OMPD_for:
6992   case OMPD_parallel_for:
6993   case OMPD_parallel_sections:
6994   case OMPD_for_simd:
6995   case OMPD_parallel_for_simd:
6996   case OMPD_cancel:
6997   case OMPD_cancellation_point:
6998   case OMPD_ordered:
6999   case OMPD_threadprivate:
7000   case OMPD_allocate:
7001   case OMPD_task:
7002   case OMPD_simd:
7003   case OMPD_sections:
7004   case OMPD_section:
7005   case OMPD_single:
7006   case OMPD_master:
7007   case OMPD_critical:
7008   case OMPD_taskyield:
7009   case OMPD_barrier:
7010   case OMPD_taskwait:
7011   case OMPD_taskgroup:
7012   case OMPD_atomic:
7013   case OMPD_flush:
7014   case OMPD_teams:
7015   case OMPD_target_data:
7016   case OMPD_target_exit_data:
7017   case OMPD_target_enter_data:
7018   case OMPD_distribute:
7019   case OMPD_distribute_simd:
7020   case OMPD_distribute_parallel_for:
7021   case OMPD_distribute_parallel_for_simd:
7022   case OMPD_teams_distribute:
7023   case OMPD_teams_distribute_simd:
7024   case OMPD_teams_distribute_parallel_for:
7025   case OMPD_teams_distribute_parallel_for_simd:
7026   case OMPD_target_update:
7027   case OMPD_declare_simd:
7028   case OMPD_declare_target:
7029   case OMPD_end_declare_target:
7030   case OMPD_declare_reduction:
7031   case OMPD_declare_mapper:
7032   case OMPD_taskloop:
7033   case OMPD_taskloop_simd:
7034   case OMPD_requires:
7035   case OMPD_unknown:
7036     break;
7037   }
7038   llvm_unreachable("Unsupported directive kind.");
7039 }
7040
7041 namespace {
7042 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7043
7044 // Utility to handle information from clauses associated with a given
7045 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7046 // It provides a convenient interface to obtain the information and generate
7047 // code for that information.
7048 class MappableExprsHandler {
7049 public:
7050   /// Values for bit flags used to specify the mapping type for
7051   /// offloading.
7052   enum OpenMPOffloadMappingFlags : uint64_t {
7053     /// No flags
7054     OMP_MAP_NONE = 0x0,
7055     /// Allocate memory on the device and move data from host to device.
7056     OMP_MAP_TO = 0x01,
7057     /// Allocate memory on the device and move data from device to host.
7058     OMP_MAP_FROM = 0x02,
7059     /// Always perform the requested mapping action on the element, even
7060     /// if it was already mapped before.
7061     OMP_MAP_ALWAYS = 0x04,
7062     /// Delete the element from the device environment, ignoring the
7063     /// current reference count associated with the element.
7064     OMP_MAP_DELETE = 0x08,
7065     /// The element being mapped is a pointer-pointee pair; both the
7066     /// pointer and the pointee should be mapped.
7067     OMP_MAP_PTR_AND_OBJ = 0x10,
7068     /// This flags signals that the base address of an entry should be
7069     /// passed to the target kernel as an argument.
7070     OMP_MAP_TARGET_PARAM = 0x20,
7071     /// Signal that the runtime library has to return the device pointer
7072     /// in the current position for the data being mapped. Used when we have the
7073     /// use_device_ptr clause.
7074     OMP_MAP_RETURN_PARAM = 0x40,
7075     /// This flag signals that the reference being passed is a pointer to
7076     /// private data.
7077     OMP_MAP_PRIVATE = 0x80,
7078     /// Pass the element to the device by value.
7079     OMP_MAP_LITERAL = 0x100,
7080     /// Implicit map
7081     OMP_MAP_IMPLICIT = 0x200,
7082     /// The 16 MSBs of the flags indicate whether the entry is member of some
7083     /// struct/class.
7084     OMP_MAP_MEMBER_OF = 0xffff000000000000,
7085     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7086   };
7087
7088   /// Class that associates information with a base pointer to be passed to the
7089   /// runtime library.
7090   class BasePointerInfo {
7091     /// The base pointer.
7092     llvm::Value *Ptr = nullptr;
7093     /// The base declaration that refers to this device pointer, or null if
7094     /// there is none.
7095     const ValueDecl *DevPtrDecl = nullptr;
7096
7097   public:
7098     BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7099         : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7100     llvm::Value *operator*() const { return Ptr; }
7101     const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7102     void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7103   };
7104
7105   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7106   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7107   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7108
7109   /// Map between a struct and the its lowest & highest elements which have been
7110   /// mapped.
7111   /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7112   ///                    HE(FieldIndex, Pointer)}
7113   struct StructRangeInfoTy {
7114     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7115         0, Address::invalid()};
7116     std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7117         0, Address::invalid()};
7118     Address Base = Address::invalid();
7119   };
7120
7121 private:
7122   /// Kind that defines how a device pointer has to be returned.
7123   struct MapInfo {
7124     OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7125     OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7126     ArrayRef<OpenMPMapModifierKind> MapModifiers;
7127     bool ReturnDevicePointer = false;
7128     bool IsImplicit = false;
7129
7130     MapInfo() = default;
7131     MapInfo(
7132         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7133         OpenMPMapClauseKind MapType,
7134         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7135         bool ReturnDevicePointer, bool IsImplicit)
7136         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7137           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7138   };
7139
7140   /// If use_device_ptr is used on a pointer which is a struct member and there
7141   /// is no map information about it, then emission of that entry is deferred
7142   /// until the whole struct has been processed.
7143   struct DeferredDevicePtrEntryTy {
7144     const Expr *IE = nullptr;
7145     const ValueDecl *VD = nullptr;
7146
7147     DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7148         : IE(IE), VD(VD) {}
7149   };
7150
7151   /// Directive from where the map clauses were extracted.
7152   const OMPExecutableDirective &CurDir;
7153
7154   /// Function the directive is being generated for.
7155   CodeGenFunction &CGF;
7156
7157   /// Set of all first private variables in the current directive.
7158   /// bool data is set to true if the variable is implicitly marked as
7159   /// firstprivate, false otherwise.
7160   llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7161
7162   /// Map between device pointer declarations and their expression components.
7163   /// The key value for declarations in 'this' is null.
7164   llvm::DenseMap<
7165       const ValueDecl *,
7166       SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7167       DevPointersMap;
7168
7169   llvm::Value *getExprTypeSize(const Expr *E) const {
7170     QualType ExprTy = E->getType().getCanonicalType();
7171
7172     // Reference types are ignored for mapping purposes.
7173     if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7174       ExprTy = RefTy->getPointeeType().getCanonicalType();
7175
7176     // Given that an array section is considered a built-in type, we need to
7177     // do the calculation based on the length of the section instead of relying
7178     // on CGF.getTypeSize(E->getType()).
7179     if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7180       QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7181                             OAE->getBase()->IgnoreParenImpCasts())
7182                             .getCanonicalType();
7183
7184       // If there is no length associated with the expression, that means we
7185       // are using the whole length of the base.
7186       if (!OAE->getLength() && OAE->getColonLoc().isValid())
7187         return CGF.getTypeSize(BaseTy);
7188
7189       llvm::Value *ElemSize;
7190       if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191         ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7192       } else {
7193         const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194         assert(ATy && "Expecting array type if not a pointer type.");
7195         ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7196       }
7197
7198       // If we don't have a length at this point, that is because we have an
7199       // array section with a single element.
7200       if (!OAE->getLength())
7201         return ElemSize;
7202
7203       llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7204       LengthVal =
7205           CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7206       return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7207     }
7208     return CGF.getTypeSize(ExprTy);
7209   }
7210
7211   /// Return the corresponding bits for a given map clause modifier. Add
7212   /// a flag marking the map as a pointer if requested. Add a flag marking the
7213   /// map as the first one of a series of maps that relate to the same map
7214   /// expression.
7215   OpenMPOffloadMappingFlags getMapTypeBits(
7216       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7217       bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7218     OpenMPOffloadMappingFlags Bits =
7219         IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7220     switch (MapType) {
7221     case OMPC_MAP_alloc:
7222     case OMPC_MAP_release:
7223       // alloc and release is the default behavior in the runtime library,  i.e.
7224       // if we don't pass any bits alloc/release that is what the runtime is
7225       // going to do. Therefore, we don't need to signal anything for these two
7226       // type modifiers.
7227       break;
7228     case OMPC_MAP_to:
7229       Bits |= OMP_MAP_TO;
7230       break;
7231     case OMPC_MAP_from:
7232       Bits |= OMP_MAP_FROM;
7233       break;
7234     case OMPC_MAP_tofrom:
7235       Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7236       break;
7237     case OMPC_MAP_delete:
7238       Bits |= OMP_MAP_DELETE;
7239       break;
7240     case OMPC_MAP_unknown:
7241       llvm_unreachable("Unexpected map type!");
7242     }
7243     if (AddPtrFlag)
7244       Bits |= OMP_MAP_PTR_AND_OBJ;
7245     if (AddIsTargetParamFlag)
7246       Bits |= OMP_MAP_TARGET_PARAM;
7247     if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7248         != MapModifiers.end())
7249       Bits |= OMP_MAP_ALWAYS;
7250     return Bits;
7251   }
7252
7253   /// Return true if the provided expression is a final array section. A
7254   /// final array section, is one whose length can't be proved to be one.
7255   bool isFinalArraySectionExpression(const Expr *E) const {
7256     const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7257
7258     // It is not an array section and therefore not a unity-size one.
7259     if (!OASE)
7260       return false;
7261
7262     // An array section with no colon always refer to a single element.
7263     if (OASE->getColonLoc().isInvalid())
7264       return false;
7265
7266     const Expr *Length = OASE->getLength();
7267
7268     // If we don't have a length we have to check if the array has size 1
7269     // for this dimension. Also, we should always expect a length if the
7270     // base type is pointer.
7271     if (!Length) {
7272       QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7273                              OASE->getBase()->IgnoreParenImpCasts())
7274                              .getCanonicalType();
7275       if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7276         return ATy->getSize().getSExtValue() != 1;
7277       // If we don't have a constant dimension length, we have to consider
7278       // the current section as having any size, so it is not necessarily
7279       // unitary. If it happen to be unity size, that's user fault.
7280       return true;
7281     }
7282
7283     // Check if the length evaluates to 1.
7284     Expr::EvalResult Result;
7285     if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7286       return true; // Can have more that size 1.
7287
7288     llvm::APSInt ConstLength = Result.Val.getInt();
7289     return ConstLength.getSExtValue() != 1;
7290   }
7291
7292   /// Generate the base pointers, section pointers, sizes and map type
7293   /// bits for the provided map type, map modifier, and expression components.
7294   /// \a IsFirstComponent should be set to true if the provided set of
7295   /// components is the first associated with a capture.
7296   void generateInfoForComponentList(
7297       OpenMPMapClauseKind MapType,
7298       ArrayRef<OpenMPMapModifierKind> MapModifiers,
7299       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7300       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7301       MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7302       StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7303       bool IsImplicit,
7304       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7305           OverlappedElements = llvm::None) const {
7306     // The following summarizes what has to be generated for each map and the
7307     // types below. The generated information is expressed in this order:
7308     // base pointer, section pointer, size, flags
7309     // (to add to the ones that come from the map type and modifier).
7310     //
7311     // double d;
7312     // int i[100];
7313     // float *p;
7314     //
7315     // struct S1 {
7316     //   int i;
7317     //   float f[50];
7318     // }
7319     // struct S2 {
7320     //   int i;
7321     //   float f[50];
7322     //   S1 s;
7323     //   double *p;
7324     //   struct S2 *ps;
7325     // }
7326     // S2 s;
7327     // S2 *ps;
7328     //
7329     // map(d)
7330     // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7331     //
7332     // map(i)
7333     // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7334     //
7335     // map(i[1:23])
7336     // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7337     //
7338     // map(p)
7339     // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7340     //
7341     // map(p[1:24])
7342     // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7343     //
7344     // map(s)
7345     // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7346     //
7347     // map(s.i)
7348     // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7349     //
7350     // map(s.s.f)
7351     // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7352     //
7353     // map(s.p)
7354     // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7355     //
7356     // map(to: s.p[:22])
7357     // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7358     // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7359     // &(s.p), &(s.p[0]), 22*sizeof(double),
7360     //   MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7361     // (*) alloc space for struct members, only this is a target parameter
7362     // (**) map the pointer (nothing to be mapped in this example) (the compiler
7363     //      optimizes this entry out, same in the examples below)
7364     // (***) map the pointee (map: to)
7365     //
7366     // map(s.ps)
7367     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7368     //
7369     // map(from: s.ps->s.i)
7370     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7371     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7372     // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ  | FROM
7373     //
7374     // map(to: s.ps->ps)
7375     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7376     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7377     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ  | TO
7378     //
7379     // map(s.ps->ps->ps)
7380     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7381     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7382     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7383     // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7384     //
7385     // map(to: s.ps->ps->s.f[:22])
7386     // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387     // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388     // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7389     // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7390     //
7391     // map(ps)
7392     // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7393     //
7394     // map(ps->i)
7395     // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7396     //
7397     // map(ps->s.f)
7398     // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7399     //
7400     // map(from: ps->p)
7401     // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7402     //
7403     // map(to: ps->p[:22])
7404     // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7405     // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7406     // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7407     //
7408     // map(ps->ps)
7409     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7410     //
7411     // map(from: ps->ps->s.i)
7412     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7413     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7414     // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7415     //
7416     // map(from: ps->ps->ps)
7417     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7418     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7419     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7420     //
7421     // map(ps->ps->ps->ps)
7422     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7423     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7424     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7425     // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7426     //
7427     // map(to: ps->ps->ps->s.f[:22])
7428     // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429     // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430     // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7431     // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7432     //
7433     // map(to: s.f[:22]) map(from: s.p[:33])
7434     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7435     //     sizeof(double*) (**), TARGET_PARAM
7436     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7437     // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7438     // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7439     // (*) allocate contiguous space needed to fit all mapped members even if
7440     //     we allocate space for members not mapped (in this example,
7441     //     s.f[22..49] and s.s are not mapped, yet we must allocate space for
7442     //     them as well because they fall between &s.f[0] and &s.p)
7443     //
7444     // map(from: s.f[:22]) map(to: ps->p[:33])
7445     // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7446     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7447     // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7448     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7449     // (*) the struct this entry pertains to is the 2nd element in the list of
7450     //     arguments, hence MEMBER_OF(2)
7451     //
7452     // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7453     // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7454     // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7455     // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7456     // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7457     // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7458     // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7459     // (*) the struct this entry pertains to is the 4th element in the list
7460     //     of arguments, hence MEMBER_OF(4)
7461
7462     // Track if the map information being generated is the first for a capture.
7463     bool IsCaptureFirstInfo = IsFirstComponentList;
7464     // When the variable is on a declare target link or in a to clause with
7465     // unified memory, a reference is needed to hold the host/device address
7466     // of the variable.
7467     bool RequiresReference = false;
7468
7469     // Scan the components from the base to the complete expression.
7470     auto CI = Components.rbegin();
7471     auto CE = Components.rend();
7472     auto I = CI;
7473
7474     // Track if the map information being generated is the first for a list of
7475     // components.
7476     bool IsExpressionFirstInfo = true;
7477     Address BP = Address::invalid();
7478     const Expr *AssocExpr = I->getAssociatedExpression();
7479     const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7480     const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7481
7482     if (isa<MemberExpr>(AssocExpr)) {
7483       // The base is the 'this' pointer. The content of the pointer is going
7484       // to be the base of the field being mapped.
7485       BP = CGF.LoadCXXThisAddress();
7486     } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7487                (OASE &&
7488                 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7489       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7490     } else {
7491       // The base is the reference to the variable.
7492       // BP = &Var.
7493       BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7494       if (const auto *VD =
7495               dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7496         if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7497                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7498           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7499               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7500                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7501             RequiresReference = true;
7502             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7503           }
7504         }
7505       }
7506
7507       // If the variable is a pointer and is being dereferenced (i.e. is not
7508       // the last component), the base has to be the pointer itself, not its
7509       // reference. References are ignored for mapping purposes.
7510       QualType Ty =
7511           I->getAssociatedDeclaration()->getType().getNonReferenceType();
7512       if (Ty->isAnyPointerType() && std::next(I) != CE) {
7513         BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7514
7515         // We do not need to generate individual map information for the
7516         // pointer, it can be associated with the combined storage.
7517         ++I;
7518       }
7519     }
7520
7521     // Track whether a component of the list should be marked as MEMBER_OF some
7522     // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7523     // in a component list should be marked as MEMBER_OF, all subsequent entries
7524     // do not belong to the base struct. E.g.
7525     // struct S2 s;
7526     // s.ps->ps->ps->f[:]
7527     //   (1) (2) (3) (4)
7528     // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7529     // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7530     // is the pointee of ps(2) which is not member of struct s, so it should not
7531     // be marked as such (it is still PTR_AND_OBJ).
7532     // The variable is initialized to false so that PTR_AND_OBJ entries which
7533     // are not struct members are not considered (e.g. array of pointers to
7534     // data).
7535     bool ShouldBeMemberOf = false;
7536
7537     // Variable keeping track of whether or not we have encountered a component
7538     // in the component list which is a member expression. Useful when we have a
7539     // pointer or a final array section, in which case it is the previous
7540     // component in the list which tells us whether we have a member expression.
7541     // E.g. X.f[:]
7542     // While processing the final array section "[:]" it is "f" which tells us
7543     // whether we are dealing with a member of a declared struct.
7544     const MemberExpr *EncounteredME = nullptr;
7545
7546     for (; I != CE; ++I) {
7547       // If the current component is member of a struct (parent struct) mark it.
7548       if (!EncounteredME) {
7549         EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7550         // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7551         // as MEMBER_OF the parent struct.
7552         if (EncounteredME)
7553           ShouldBeMemberOf = true;
7554       }
7555
7556       auto Next = std::next(I);
7557
7558       // We need to generate the addresses and sizes if this is the last
7559       // component, if the component is a pointer or if it is an array section
7560       // whose length can't be proved to be one. If this is a pointer, it
7561       // becomes the base address for the following components.
7562
7563       // A final array section, is one whose length can't be proved to be one.
7564       bool IsFinalArraySection =
7565           isFinalArraySectionExpression(I->getAssociatedExpression());
7566
7567       // Get information on whether the element is a pointer. Have to do a
7568       // special treatment for array sections given that they are built-in
7569       // types.
7570       const auto *OASE =
7571           dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7572       bool IsPointer =
7573           (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7574                        .getCanonicalType()
7575                        ->isAnyPointerType()) ||
7576           I->getAssociatedExpression()->getType()->isAnyPointerType();
7577
7578       if (Next == CE || IsPointer || IsFinalArraySection) {
7579         // If this is not the last component, we expect the pointer to be
7580         // associated with an array expression or member expression.
7581         assert((Next == CE ||
7582                 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7583                 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7584                 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7585                "Unexpected expression");
7586
7587         Address LB =
7588             CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7589
7590         // If this component is a pointer inside the base struct then we don't
7591         // need to create any entry for it - it will be combined with the object
7592         // it is pointing to into a single PTR_AND_OBJ entry.
7593         bool IsMemberPointer =
7594             IsPointer && EncounteredME &&
7595             (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7596              EncounteredME);
7597         if (!OverlappedElements.empty()) {
7598           // Handle base element with the info for overlapped elements.
7599           assert(!PartialStruct.Base.isValid() && "The base element is set.");
7600           assert(Next == CE &&
7601                  "Expected last element for the overlapped elements.");
7602           assert(!IsPointer &&
7603                  "Unexpected base element with the pointer type.");
7604           // Mark the whole struct as the struct that requires allocation on the
7605           // device.
7606           PartialStruct.LowestElem = {0, LB};
7607           CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7608               I->getAssociatedExpression()->getType());
7609           Address HB = CGF.Builder.CreateConstGEP(
7610               CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7611                                                               CGF.VoidPtrTy),
7612               TypeSize.getQuantity() - 1);
7613           PartialStruct.HighestElem = {
7614               std::numeric_limits<decltype(
7615                   PartialStruct.HighestElem.first)>::max(),
7616               HB};
7617           PartialStruct.Base = BP;
7618           // Emit data for non-overlapped data.
7619           OpenMPOffloadMappingFlags Flags =
7620               OMP_MAP_MEMBER_OF |
7621               getMapTypeBits(MapType, MapModifiers, IsImplicit,
7622                              /*AddPtrFlag=*/false,
7623                              /*AddIsTargetParamFlag=*/false);
7624           LB = BP;
7625           llvm::Value *Size = nullptr;
7626           // Do bitcopy of all non-overlapped structure elements.
7627           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7628                    Component : OverlappedElements) {
7629             Address ComponentLB = Address::invalid();
7630             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7631                  Component) {
7632               if (MC.getAssociatedDeclaration()) {
7633                 ComponentLB =
7634                     CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7635                         .getAddress();
7636                 Size = CGF.Builder.CreatePtrDiff(
7637                     CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7638                     CGF.EmitCastToVoidPtr(LB.getPointer()));
7639                 break;
7640               }
7641             }
7642             BasePointers.push_back(BP.getPointer());
7643             Pointers.push_back(LB.getPointer());
7644             Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7645                                                       /*isSigned=*/true));
7646             Types.push_back(Flags);
7647             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7648           }
7649           BasePointers.push_back(BP.getPointer());
7650           Pointers.push_back(LB.getPointer());
7651           Size = CGF.Builder.CreatePtrDiff(
7652               CGF.EmitCastToVoidPtr(
7653                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7654               CGF.EmitCastToVoidPtr(LB.getPointer()));
7655           Sizes.push_back(
7656               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7657           Types.push_back(Flags);
7658           break;
7659         }
7660         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7661         if (!IsMemberPointer) {
7662           BasePointers.push_back(BP.getPointer());
7663           Pointers.push_back(LB.getPointer());
7664           Sizes.push_back(
7665               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7666
7667           // We need to add a pointer flag for each map that comes from the
7668           // same expression except for the first one. We also need to signal
7669           // this map is the first one that relates with the current capture
7670           // (there is a set of entries for each capture).
7671           OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7672               MapType, MapModifiers, IsImplicit,
7673               !IsExpressionFirstInfo || RequiresReference,
7674               IsCaptureFirstInfo && !RequiresReference);
7675
7676           if (!IsExpressionFirstInfo) {
7677             // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7678             // then we reset the TO/FROM/ALWAYS/DELETE flags.
7679             if (IsPointer)
7680               Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7681                          OMP_MAP_DELETE);
7682
7683             if (ShouldBeMemberOf) {
7684               // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7685               // should be later updated with the correct value of MEMBER_OF.
7686               Flags |= OMP_MAP_MEMBER_OF;
7687               // From now on, all subsequent PTR_AND_OBJ entries should not be
7688               // marked as MEMBER_OF.
7689               ShouldBeMemberOf = false;
7690             }
7691           }
7692
7693           Types.push_back(Flags);
7694         }
7695
7696         // If we have encountered a member expression so far, keep track of the
7697         // mapped member. If the parent is "*this", then the value declaration
7698         // is nullptr.
7699         if (EncounteredME) {
7700           const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7701           unsigned FieldIndex = FD->getFieldIndex();
7702
7703           // Update info about the lowest and highest elements for this struct
7704           if (!PartialStruct.Base.isValid()) {
7705             PartialStruct.LowestElem = {FieldIndex, LB};
7706             PartialStruct.HighestElem = {FieldIndex, LB};
7707             PartialStruct.Base = BP;
7708           } else if (FieldIndex < PartialStruct.LowestElem.first) {
7709             PartialStruct.LowestElem = {FieldIndex, LB};
7710           } else if (FieldIndex > PartialStruct.HighestElem.first) {
7711             PartialStruct.HighestElem = {FieldIndex, LB};
7712           }
7713         }
7714
7715         // If we have a final array section, we are done with this expression.
7716         if (IsFinalArraySection)
7717           break;
7718
7719         // The pointer becomes the base for the next element.
7720         if (Next != CE)
7721           BP = LB;
7722
7723         IsExpressionFirstInfo = false;
7724         IsCaptureFirstInfo = false;
7725       }
7726     }
7727   }
7728
7729   /// Return the adjusted map modifiers if the declaration a capture refers to
7730   /// appears in a first-private clause. This is expected to be used only with
7731   /// directives that start with 'target'.
7732   MappableExprsHandler::OpenMPOffloadMappingFlags
7733   getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7734     assert(Cap.capturesVariable() && "Expected capture by reference only!");
7735
7736     // A first private variable captured by reference will use only the
7737     // 'private ptr' and 'map to' flag. Return the right flags if the captured
7738     // declaration is known as first-private in this handler.
7739     if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7740       if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7741           Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7742         return MappableExprsHandler::OMP_MAP_ALWAYS |
7743                MappableExprsHandler::OMP_MAP_TO;
7744       if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7745         return MappableExprsHandler::OMP_MAP_TO |
7746                MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7747       return MappableExprsHandler::OMP_MAP_PRIVATE |
7748              MappableExprsHandler::OMP_MAP_TO;
7749     }
7750     return MappableExprsHandler::OMP_MAP_TO |
7751            MappableExprsHandler::OMP_MAP_FROM;
7752   }
7753
7754   static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7755     // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7756     return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7757                                                   << 48);
7758   }
7759
7760   static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7761                                      OpenMPOffloadMappingFlags MemberOfFlag) {
7762     // If the entry is PTR_AND_OBJ but has not been marked with the special
7763     // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7764     // marked as MEMBER_OF.
7765     if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7766         ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7767       return;
7768
7769     // Reset the placeholder value to prepare the flag for the assignment of the
7770     // proper MEMBER_OF value.
7771     Flags &= ~OMP_MAP_MEMBER_OF;
7772     Flags |= MemberOfFlag;
7773   }
7774
7775   void getPlainLayout(const CXXRecordDecl *RD,
7776                       llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7777                       bool AsBase) const {
7778     const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7779
7780     llvm::StructType *St =
7781         AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7782
7783     unsigned NumElements = St->getNumElements();
7784     llvm::SmallVector<
7785         llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7786         RecordLayout(NumElements);
7787
7788     // Fill bases.
7789     for (const auto &I : RD->bases()) {
7790       if (I.isVirtual())
7791         continue;
7792       const auto *Base = I.getType()->getAsCXXRecordDecl();
7793       // Ignore empty bases.
7794       if (Base->isEmpty() || CGF.getContext()
7795                                  .getASTRecordLayout(Base)
7796                                  .getNonVirtualSize()
7797                                  .isZero())
7798         continue;
7799
7800       unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7801       RecordLayout[FieldIndex] = Base;
7802     }
7803     // Fill in virtual bases.
7804     for (const auto &I : RD->vbases()) {
7805       const auto *Base = I.getType()->getAsCXXRecordDecl();
7806       // Ignore empty bases.
7807       if (Base->isEmpty())
7808         continue;
7809       unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7810       if (RecordLayout[FieldIndex])
7811         continue;
7812       RecordLayout[FieldIndex] = Base;
7813     }
7814     // Fill in all the fields.
7815     assert(!RD->isUnion() && "Unexpected union.");
7816     for (const auto *Field : RD->fields()) {
7817       // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7818       // will fill in later.)
7819       if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7820         unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7821         RecordLayout[FieldIndex] = Field;
7822       }
7823     }
7824     for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7825              &Data : RecordLayout) {
7826       if (Data.isNull())
7827         continue;
7828       if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7829         getPlainLayout(Base, Layout, /*AsBase=*/true);
7830       else
7831         Layout.push_back(Data.get<const FieldDecl *>());
7832     }
7833   }
7834
7835 public:
7836   MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7837       : CurDir(Dir), CGF(CGF) {
7838     // Extract firstprivate clause information.
7839     for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7840       for (const auto *D : C->varlists())
7841         FirstPrivateDecls.try_emplace(
7842             cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7843     // Extract device pointer clause information.
7844     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7845       for (auto L : C->component_lists())
7846         DevPointersMap[L.first].push_back(L.second);
7847   }
7848
7849   /// Generate code for the combined entry if we have a partially mapped struct
7850   /// and take care of the mapping flags of the arguments corresponding to
7851   /// individual struct members.
7852   void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7853                          MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7854                          MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7855                          const StructRangeInfoTy &PartialStruct) const {
7856     // Base is the base of the struct
7857     BasePointers.push_back(PartialStruct.Base.getPointer());
7858     // Pointer is the address of the lowest element
7859     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7860     Pointers.push_back(LB);
7861     // Size is (addr of {highest+1} element) - (addr of lowest element)
7862     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7863     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7864     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7865     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7866     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7867     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7868                                                   /*isSigned=*/false);
7869     Sizes.push_back(Size);
7870     // Map type is always TARGET_PARAM
7871     Types.push_back(OMP_MAP_TARGET_PARAM);
7872     // Remove TARGET_PARAM flag from the first element
7873     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7874
7875     // All other current entries will be MEMBER_OF the combined entry
7876     // (except for PTR_AND_OBJ entries which do not have a placeholder value
7877     // 0xFFFF in the MEMBER_OF field).
7878     OpenMPOffloadMappingFlags MemberOfFlag =
7879         getMemberOfFlag(BasePointers.size() - 1);
7880     for (auto &M : CurTypes)
7881       setCorrectMemberOfFlag(M, MemberOfFlag);
7882   }
7883
7884   /// Generate all the base pointers, section pointers, sizes and map
7885   /// types for the extracted mappable expressions. Also, for each item that
7886   /// relates with a device pointer, a pair of the relevant declaration and
7887   /// index where it occurs is appended to the device pointers info array.
7888   void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7889                        MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7890                        MapFlagsArrayTy &Types) const {
7891     // We have to process the component lists that relate with the same
7892     // declaration in a single chunk so that we can generate the map flags
7893     // correctly. Therefore, we organize all lists in a map.
7894     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7895
7896     // Helper function to fill the information map for the different supported
7897     // clauses.
7898     auto &&InfoGen = [&Info](
7899         const ValueDecl *D,
7900         OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7901         OpenMPMapClauseKind MapType,
7902         ArrayRef<OpenMPMapModifierKind> MapModifiers,
7903         bool ReturnDevicePointer, bool IsImplicit) {
7904       const ValueDecl *VD =
7905           D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7906       Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7907                             IsImplicit);
7908     };
7909
7910     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7911     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7912       for (const auto &L : C->component_lists()) {
7913         InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7914             /*ReturnDevicePointer=*/false, C->isImplicit());
7915       }
7916     for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7917       for (const auto &L : C->component_lists()) {
7918         InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7919             /*ReturnDevicePointer=*/false, C->isImplicit());
7920       }
7921     for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7922       for (const auto &L : C->component_lists()) {
7923         InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7924             /*ReturnDevicePointer=*/false, C->isImplicit());
7925       }
7926
7927     // Look at the use_device_ptr clause information and mark the existing map
7928     // entries as such. If there is no map information for an entry in the
7929     // use_device_ptr list, we create one with map type 'alloc' and zero size
7930     // section. It is the user fault if that was not mapped before. If there is
7931     // no map information and the pointer is a struct member, then we defer the
7932     // emission of that entry until the whole struct has been processed.
7933     llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7934         DeferredInfo;
7935
7936     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7937     for (const auto *C :
7938         this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7939       for (const auto &L : C->component_lists()) {
7940         assert(!L.second.empty() && "Not expecting empty list of components!");
7941         const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7942         VD = cast<ValueDecl>(VD->getCanonicalDecl());
7943         const Expr *IE = L.second.back().getAssociatedExpression();
7944         // If the first component is a member expression, we have to look into
7945         // 'this', which maps to null in the map of map information. Otherwise
7946         // look directly for the information.
7947         auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7948
7949         // We potentially have map information for this declaration already.
7950         // Look for the first set of components that refer to it.
7951         if (It != Info.end()) {
7952           auto CI = std::find_if(
7953               It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7954                 return MI.Components.back().getAssociatedDeclaration() == VD;
7955               });
7956           // If we found a map entry, signal that the pointer has to be returned
7957           // and move on to the next declaration.
7958           if (CI != It->second.end()) {
7959             CI->ReturnDevicePointer = true;
7960             continue;
7961           }
7962         }
7963
7964         // We didn't find any match in our map information - generate a zero
7965         // size array section - if the pointer is a struct member we defer this
7966         // action until the whole struct has been processed.
7967         // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7968         if (isa<MemberExpr>(IE)) {
7969           // Insert the pointer into Info to be processed by
7970           // generateInfoForComponentList. Because it is a member pointer
7971           // without a pointee, no entry will be generated for it, therefore
7972           // we need to generate one after the whole struct has been processed.
7973           // Nonetheless, generateInfoForComponentList must be called to take
7974           // the pointer into account for the calculation of the range of the
7975           // partial struct.
7976           InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7977                   /*ReturnDevicePointer=*/false, C->isImplicit());
7978           DeferredInfo[nullptr].emplace_back(IE, VD);
7979         } else {
7980           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7981               this->CGF.EmitLValue(IE), IE->getExprLoc());
7982           BasePointers.emplace_back(Ptr, VD);
7983           Pointers.push_back(Ptr);
7984           Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
7985           Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7986         }
7987       }
7988     }
7989
7990     for (const auto &M : Info) {
7991       // We need to know when we generate information for the first component
7992       // associated with a capture, because the mapping flags depend on it.
7993       bool IsFirstComponentList = true;
7994
7995       // Temporary versions of arrays
7996       MapBaseValuesArrayTy CurBasePointers;
7997       MapValuesArrayTy CurPointers;
7998       MapValuesArrayTy CurSizes;
7999       MapFlagsArrayTy CurTypes;
8000       StructRangeInfoTy PartialStruct;
8001
8002       for (const MapInfo &L : M.second) {
8003         assert(!L.Components.empty() &&
8004                "Not expecting declaration with no component lists.");
8005
8006         // Remember the current base pointer index.
8007         unsigned CurrentBasePointersIdx = CurBasePointers.size();
8008         // FIXME: MSVC 2013 seems to require this-> to find the member method.
8009         this->generateInfoForComponentList(
8010             L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8011             CurPointers, CurSizes, CurTypes, PartialStruct,
8012             IsFirstComponentList, L.IsImplicit);
8013
8014         // If this entry relates with a device pointer, set the relevant
8015         // declaration and add the 'return pointer' flag.
8016         if (L.ReturnDevicePointer) {
8017           assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8018                  "Unexpected number of mapped base pointers.");
8019
8020           const ValueDecl *RelevantVD =
8021               L.Components.back().getAssociatedDeclaration();
8022           assert(RelevantVD &&
8023                  "No relevant declaration related with device pointer??");
8024
8025           CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8026           CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8027         }
8028         IsFirstComponentList = false;
8029       }
8030
8031       // Append any pending zero-length pointers which are struct members and
8032       // used with use_device_ptr.
8033       auto CI = DeferredInfo.find(M.first);
8034       if (CI != DeferredInfo.end()) {
8035         for (const DeferredDevicePtrEntryTy &L : CI->second) {
8036           llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8037           llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8038               this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8039           CurBasePointers.emplace_back(BasePtr, L.VD);
8040           CurPointers.push_back(Ptr);
8041           CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8042           // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8043           // value MEMBER_OF=FFFF so that the entry is later updated with the
8044           // correct value of MEMBER_OF.
8045           CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8046                              OMP_MAP_MEMBER_OF);
8047         }
8048       }
8049
8050       // If there is an entry in PartialStruct it means we have a struct with
8051       // individual members mapped. Emit an extra combined entry.
8052       if (PartialStruct.Base.isValid())
8053         emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8054                           PartialStruct);
8055
8056       // We need to append the results of this capture to what we already have.
8057       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8058       Pointers.append(CurPointers.begin(), CurPointers.end());
8059       Sizes.append(CurSizes.begin(), CurSizes.end());
8060       Types.append(CurTypes.begin(), CurTypes.end());
8061     }
8062   }
8063
8064   /// Emit capture info for lambdas for variables captured by reference.
8065   void generateInfoForLambdaCaptures(
8066       const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8067       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8068       MapFlagsArrayTy &Types,
8069       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8070     const auto *RD = VD->getType()
8071                          .getCanonicalType()
8072                          .getNonReferenceType()
8073                          ->getAsCXXRecordDecl();
8074     if (!RD || !RD->isLambda())
8075       return;
8076     Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8077     LValue VDLVal = CGF.MakeAddrLValue(
8078         VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8079     llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8080     FieldDecl *ThisCapture = nullptr;
8081     RD->getCaptureFields(Captures, ThisCapture);
8082     if (ThisCapture) {
8083       LValue ThisLVal =
8084           CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8085       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8086       LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8087       BasePointers.push_back(ThisLVal.getPointer());
8088       Pointers.push_back(ThisLValVal.getPointer());
8089       Sizes.push_back(
8090           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8091                                     CGF.Int64Ty, /*isSigned=*/true));
8092       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8093                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8094     }
8095     for (const LambdaCapture &LC : RD->captures()) {
8096       if (!LC.capturesVariable())
8097         continue;
8098       const VarDecl *VD = LC.getCapturedVar();
8099       if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8100         continue;
8101       auto It = Captures.find(VD);
8102       assert(It != Captures.end() && "Found lambda capture without field.");
8103       LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8104       if (LC.getCaptureKind() == LCK_ByRef) {
8105         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8106         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8107         BasePointers.push_back(VarLVal.getPointer());
8108         Pointers.push_back(VarLValVal.getPointer());
8109         Sizes.push_back(CGF.Builder.CreateIntCast(
8110             CGF.getTypeSize(
8111                 VD->getType().getCanonicalType().getNonReferenceType()),
8112             CGF.Int64Ty, /*isSigned=*/true));
8113       } else {
8114         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8115         LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8116         BasePointers.push_back(VarLVal.getPointer());
8117         Pointers.push_back(VarRVal.getScalarVal());
8118         Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8119       }
8120       Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8121                       OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8122     }
8123   }
8124
8125   /// Set correct indices for lambdas captures.
8126   void adjustMemberOfForLambdaCaptures(
8127       const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8128       MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8129       MapFlagsArrayTy &Types) const {
8130     for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8131       // Set correct member_of idx for all implicit lambda captures.
8132       if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8133                        OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8134         continue;
8135       llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8136       assert(BasePtr && "Unable to find base lambda address.");
8137       int TgtIdx = -1;
8138       for (unsigned J = I; J > 0; --J) {
8139         unsigned Idx = J - 1;
8140         if (Pointers[Idx] != BasePtr)
8141           continue;
8142         TgtIdx = Idx;
8143         break;
8144       }
8145       assert(TgtIdx != -1 && "Unable to find parent lambda.");
8146       // All other current entries will be MEMBER_OF the combined entry
8147       // (except for PTR_AND_OBJ entries which do not have a placeholder value
8148       // 0xFFFF in the MEMBER_OF field).
8149       OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8150       setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8151     }
8152   }
8153
8154   /// Generate the base pointers, section pointers, sizes and map types
8155   /// associated to a given capture.
8156   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8157                               llvm::Value *Arg,
8158                               MapBaseValuesArrayTy &BasePointers,
8159                               MapValuesArrayTy &Pointers,
8160                               MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8161                               StructRangeInfoTy &PartialStruct) const {
8162     assert(!Cap->capturesVariableArrayType() &&
8163            "Not expecting to generate map info for a variable array type!");
8164
8165     // We need to know when we generating information for the first component
8166     const ValueDecl *VD = Cap->capturesThis()
8167                               ? nullptr
8168                               : Cap->getCapturedVar()->getCanonicalDecl();
8169
8170     // If this declaration appears in a is_device_ptr clause we just have to
8171     // pass the pointer by value. If it is a reference to a declaration, we just
8172     // pass its value.
8173     if (DevPointersMap.count(VD)) {
8174       BasePointers.emplace_back(Arg, VD);
8175       Pointers.push_back(Arg);
8176       Sizes.push_back(
8177           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178                                     CGF.Int64Ty, /*isSigned=*/true));
8179       Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8180       return;
8181     }
8182
8183     using MapData =
8184         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8185                    OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8186     SmallVector<MapData, 4> DeclComponentLists;
8187     // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8188     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8189       for (const auto &L : C->decl_component_lists(VD)) {
8190         assert(L.first == VD &&
8191                "We got information for the wrong declaration??");
8192         assert(!L.second.empty() &&
8193                "Not expecting declaration with no component lists.");
8194         DeclComponentLists.emplace_back(L.second, C->getMapType(),
8195                                         C->getMapTypeModifiers(),
8196                                         C->isImplicit());
8197       }
8198     }
8199
8200     // Find overlapping elements (including the offset from the base element).
8201     llvm::SmallDenseMap<
8202         const MapData *,
8203         llvm::SmallVector<
8204             OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8205         4>
8206         OverlappedData;
8207     size_t Count = 0;
8208     for (const MapData &L : DeclComponentLists) {
8209       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8210       OpenMPMapClauseKind MapType;
8211       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8212       bool IsImplicit;
8213       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8214       ++Count;
8215       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8216         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8217         std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8218         auto CI = Components.rbegin();
8219         auto CE = Components.rend();
8220         auto SI = Components1.rbegin();
8221         auto SE = Components1.rend();
8222         for (; CI != CE && SI != SE; ++CI, ++SI) {
8223           if (CI->getAssociatedExpression()->getStmtClass() !=
8224               SI->getAssociatedExpression()->getStmtClass())
8225             break;
8226           // Are we dealing with different variables/fields?
8227           if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8228             break;
8229         }
8230         // Found overlapping if, at least for one component, reached the head of
8231         // the components list.
8232         if (CI == CE || SI == SE) {
8233           assert((CI != CE || SI != SE) &&
8234                  "Unexpected full match of the mapping components.");
8235           const MapData &BaseData = CI == CE ? L : L1;
8236           OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8237               SI == SE ? Components : Components1;
8238           auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8239           OverlappedElements.getSecond().push_back(SubData);
8240         }
8241       }
8242     }
8243     // Sort the overlapped elements for each item.
8244     llvm::SmallVector<const FieldDecl *, 4> Layout;
8245     if (!OverlappedData.empty()) {
8246       if (const auto *CRD =
8247               VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8248         getPlainLayout(CRD, Layout, /*AsBase=*/false);
8249       else {
8250         const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8251         Layout.append(RD->field_begin(), RD->field_end());
8252       }
8253     }
8254     for (auto &Pair : OverlappedData) {
8255       llvm::sort(
8256           Pair.getSecond(),
8257           [&Layout](
8258               OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8259               OMPClauseMappableExprCommon::MappableExprComponentListRef
8260                   Second) {
8261             auto CI = First.rbegin();
8262             auto CE = First.rend();
8263             auto SI = Second.rbegin();
8264             auto SE = Second.rend();
8265             for (; CI != CE && SI != SE; ++CI, ++SI) {
8266               if (CI->getAssociatedExpression()->getStmtClass() !=
8267                   SI->getAssociatedExpression()->getStmtClass())
8268                 break;
8269               // Are we dealing with different variables/fields?
8270               if (CI->getAssociatedDeclaration() !=
8271                   SI->getAssociatedDeclaration())
8272                 break;
8273             }
8274
8275             // Lists contain the same elements.
8276             if (CI == CE && SI == SE)
8277               return false;
8278
8279             // List with less elements is less than list with more elements.
8280             if (CI == CE || SI == SE)
8281               return CI == CE;
8282
8283             const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8284             const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8285             if (FD1->getParent() == FD2->getParent())
8286               return FD1->getFieldIndex() < FD2->getFieldIndex();
8287             const auto It =
8288                 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8289                   return FD == FD1 || FD == FD2;
8290                 });
8291             return *It == FD1;
8292           });
8293     }
8294
8295     // Associated with a capture, because the mapping flags depend on it.
8296     // Go through all of the elements with the overlapped elements.
8297     for (const auto &Pair : OverlappedData) {
8298       const MapData &L = *Pair.getFirst();
8299       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8300       OpenMPMapClauseKind MapType;
8301       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8302       bool IsImplicit;
8303       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8304       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8305           OverlappedComponents = Pair.getSecond();
8306       bool IsFirstComponentList = true;
8307       generateInfoForComponentList(MapType, MapModifiers, Components,
8308                                    BasePointers, Pointers, Sizes, Types,
8309                                    PartialStruct, IsFirstComponentList,
8310                                    IsImplicit, OverlappedComponents);
8311     }
8312     // Go through other elements without overlapped elements.
8313     bool IsFirstComponentList = OverlappedData.empty();
8314     for (const MapData &L : DeclComponentLists) {
8315       OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8316       OpenMPMapClauseKind MapType;
8317       ArrayRef<OpenMPMapModifierKind> MapModifiers;
8318       bool IsImplicit;
8319       std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8320       auto It = OverlappedData.find(&L);
8321       if (It == OverlappedData.end())
8322         generateInfoForComponentList(MapType, MapModifiers, Components,
8323                                      BasePointers, Pointers, Sizes, Types,
8324                                      PartialStruct, IsFirstComponentList,
8325                                      IsImplicit);
8326       IsFirstComponentList = false;
8327     }
8328   }
8329
8330   /// Generate the base pointers, section pointers, sizes and map types
8331   /// associated with the declare target link variables.
8332   void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8333                                         MapValuesArrayTy &Pointers,
8334                                         MapValuesArrayTy &Sizes,
8335                                         MapFlagsArrayTy &Types) const {
8336     // Map other list items in the map clause which are not captured variables
8337     // but "declare target link" global variables.
8338     for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8339       for (const auto &L : C->component_lists()) {
8340         if (!L.first)
8341           continue;
8342         const auto *VD = dyn_cast<VarDecl>(L.first);
8343         if (!VD)
8344           continue;
8345         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8346             OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8347         if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8348             !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8349           continue;
8350         StructRangeInfoTy PartialStruct;
8351         generateInfoForComponentList(
8352             C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8353             Pointers, Sizes, Types, PartialStruct,
8354             /*IsFirstComponentList=*/true, C->isImplicit());
8355         assert(!PartialStruct.Base.isValid() &&
8356                "No partial structs for declare target link expected.");
8357       }
8358     }
8359   }
8360
8361   /// Generate the default map information for a given capture \a CI,
8362   /// record field declaration \a RI and captured value \a CV.
8363   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8364                               const FieldDecl &RI, llvm::Value *CV,
8365                               MapBaseValuesArrayTy &CurBasePointers,
8366                               MapValuesArrayTy &CurPointers,
8367                               MapValuesArrayTy &CurSizes,
8368                               MapFlagsArrayTy &CurMapTypes) const {
8369     bool IsImplicit = true;
8370     // Do the default mapping.
8371     if (CI.capturesThis()) {
8372       CurBasePointers.push_back(CV);
8373       CurPointers.push_back(CV);
8374       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8375       CurSizes.push_back(
8376           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8377                                     CGF.Int64Ty, /*isSigned=*/true));
8378       // Default map type.
8379       CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8380     } else if (CI.capturesVariableByCopy()) {
8381       CurBasePointers.push_back(CV);
8382       CurPointers.push_back(CV);
8383       if (!RI.getType()->isAnyPointerType()) {
8384         // We have to signal to the runtime captures passed by value that are
8385         // not pointers.
8386         CurMapTypes.push_back(OMP_MAP_LITERAL);
8387         CurSizes.push_back(CGF.Builder.CreateIntCast(
8388             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8389       } else {
8390         // Pointers are implicitly mapped with a zero size and no flags
8391         // (other than first map that is added for all implicit maps).
8392         CurMapTypes.push_back(OMP_MAP_NONE);
8393         CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8394       }
8395       const VarDecl *VD = CI.getCapturedVar();
8396       auto I = FirstPrivateDecls.find(VD);
8397       if (I != FirstPrivateDecls.end())
8398         IsImplicit = I->getSecond();
8399     } else {
8400       assert(CI.capturesVariable() && "Expected captured reference.");
8401       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8402       QualType ElementType = PtrTy->getPointeeType();
8403       CurSizes.push_back(CGF.Builder.CreateIntCast(
8404           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8405       // The default map type for a scalar/complex type is 'to' because by
8406       // default the value doesn't have to be retrieved. For an aggregate
8407       // type, the default is 'tofrom'.
8408       CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8409       const VarDecl *VD = CI.getCapturedVar();
8410       auto I = FirstPrivateDecls.find(VD);
8411       if (I != FirstPrivateDecls.end() &&
8412           VD->getType().isConstant(CGF.getContext())) {
8413         llvm::Constant *Addr =
8414             CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8415         // Copy the value of the original variable to the new global copy.
8416         CGF.Builder.CreateMemCpy(
8417             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8418             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8419             CurSizes.back(), /*IsVolatile=*/false);
8420         // Use new global variable as the base pointers.
8421         CurBasePointers.push_back(Addr);
8422         CurPointers.push_back(Addr);
8423       } else {
8424         CurBasePointers.push_back(CV);
8425         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8426           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8427               CV, ElementType, CGF.getContext().getDeclAlign(VD),
8428               AlignmentSource::Decl));
8429           CurPointers.push_back(PtrAddr.getPointer());
8430         } else {
8431           CurPointers.push_back(CV);
8432         }
8433       }
8434       if (I != FirstPrivateDecls.end())
8435         IsImplicit = I->getSecond();
8436     }
8437     // Every default map produces a single argument which is a target parameter.
8438     CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8439
8440     // Add flag stating this is an implicit map.
8441     if (IsImplicit)
8442       CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8443   }
8444 };
8445 } // anonymous namespace
8446
8447 /// Emit the arrays used to pass the captures and map information to the
8448 /// offloading runtime library. If there is no map or capture information,
8449 /// return nullptr by reference.
8450 static void
8451 emitOffloadingArrays(CodeGenFunction &CGF,
8452                      MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8453                      MappableExprsHandler::MapValuesArrayTy &Pointers,
8454                      MappableExprsHandler::MapValuesArrayTy &Sizes,
8455                      MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8456                      CGOpenMPRuntime::TargetDataInfo &Info) {
8457   CodeGenModule &CGM = CGF.CGM;
8458   ASTContext &Ctx = CGF.getContext();
8459
8460   // Reset the array information.
8461   Info.clearArrayInfo();
8462   Info.NumberOfPtrs = BasePointers.size();
8463
8464   if (Info.NumberOfPtrs) {
8465     // Detect if we have any capture size requiring runtime evaluation of the
8466     // size so that a constant array could be eventually used.
8467     bool hasRuntimeEvaluationCaptureSize = false;
8468     for (llvm::Value *S : Sizes)
8469       if (!isa<llvm::Constant>(S)) {
8470         hasRuntimeEvaluationCaptureSize = true;
8471         break;
8472       }
8473
8474     llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8475     QualType PointerArrayType =
8476         Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8477                                  /*IndexTypeQuals=*/0);
8478
8479     Info.BasePointersArray =
8480         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8481     Info.PointersArray =
8482         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8483
8484     // If we don't have any VLA types or other types that require runtime
8485     // evaluation, we can use a constant array for the map sizes, otherwise we
8486     // need to fill up the arrays as we do for the pointers.
8487     QualType Int64Ty =
8488         Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8489     if (hasRuntimeEvaluationCaptureSize) {
8490       QualType SizeArrayType =
8491           Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8492                                    /*IndexTypeQuals=*/0);
8493       Info.SizesArray =
8494           CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8495     } else {
8496       // We expect all the sizes to be constant, so we collect them to create
8497       // a constant array.
8498       SmallVector<llvm::Constant *, 16> ConstSizes;
8499       for (llvm::Value *S : Sizes)
8500         ConstSizes.push_back(cast<llvm::Constant>(S));
8501
8502       auto *SizesArrayInit = llvm::ConstantArray::get(
8503           llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8504       std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8505       auto *SizesArrayGbl = new llvm::GlobalVariable(
8506           CGM.getModule(), SizesArrayInit->getType(),
8507           /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8508           SizesArrayInit, Name);
8509       SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8510       Info.SizesArray = SizesArrayGbl;
8511     }
8512
8513     // The map types are always constant so we don't need to generate code to
8514     // fill arrays. Instead, we create an array constant.
8515     SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8516     llvm::copy(MapTypes, Mapping.begin());
8517     llvm::Constant *MapTypesArrayInit =
8518         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8519     std::string MaptypesName =
8520         CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8521     auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8522         CGM.getModule(), MapTypesArrayInit->getType(),
8523         /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8524         MapTypesArrayInit, MaptypesName);
8525     MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8526     Info.MapTypesArray = MapTypesArrayGbl;
8527
8528     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8529       llvm::Value *BPVal = *BasePointers[I];
8530       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8531           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8532           Info.BasePointersArray, 0, I);
8533       BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8534           BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8535       Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8536       CGF.Builder.CreateStore(BPVal, BPAddr);
8537
8538       if (Info.requiresDevicePointerInfo())
8539         if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8540           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8541
8542       llvm::Value *PVal = Pointers[I];
8543       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8544           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8545           Info.PointersArray, 0, I);
8546       P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8547           P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8548       Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8549       CGF.Builder.CreateStore(PVal, PAddr);
8550
8551       if (hasRuntimeEvaluationCaptureSize) {
8552         llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8553             llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8554             Info.SizesArray,
8555             /*Idx0=*/0,
8556             /*Idx1=*/I);
8557         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8558         CGF.Builder.CreateStore(
8559             CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8560             SAddr);
8561       }
8562     }
8563   }
8564 }
8565 /// Emit the arguments to be passed to the runtime library based on the
8566 /// arrays of pointers, sizes and map types.
8567 static void emitOffloadingArraysArgument(
8568     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8569     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8570     llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8571   CodeGenModule &CGM = CGF.CGM;
8572   if (Info.NumberOfPtrs) {
8573     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8574         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8575         Info.BasePointersArray,
8576         /*Idx0=*/0, /*Idx1=*/0);
8577     PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8578         llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8579         Info.PointersArray,
8580         /*Idx0=*/0,
8581         /*Idx1=*/0);
8582     SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8583         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8584         /*Idx0=*/0, /*Idx1=*/0);
8585     MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8586         llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8587         Info.MapTypesArray,
8588         /*Idx0=*/0,
8589         /*Idx1=*/0);
8590   } else {
8591     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8592     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8593     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8594     MapTypesArrayArg =
8595         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8596   }
8597 }
8598
8599 /// Check for inner distribute directive.
8600 static const OMPExecutableDirective *
8601 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8602   const auto *CS = D.getInnermostCapturedStmt();
8603   const auto *Body =
8604       CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8605   const Stmt *ChildStmt =
8606       CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8607
8608   if (const auto *NestedDir =
8609           dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8610     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8611     switch (D.getDirectiveKind()) {
8612     case OMPD_target:
8613       if (isOpenMPDistributeDirective(DKind))
8614         return NestedDir;
8615       if (DKind == OMPD_teams) {
8616         Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8617             /*IgnoreCaptured=*/true);
8618         if (!Body)
8619           return nullptr;
8620         ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8621         if (const auto *NND =
8622                 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8623           DKind = NND->getDirectiveKind();
8624           if (isOpenMPDistributeDirective(DKind))
8625             return NND;
8626         }
8627       }
8628       return nullptr;
8629     case OMPD_target_teams:
8630       if (isOpenMPDistributeDirective(DKind))
8631         return NestedDir;
8632       return nullptr;
8633     case OMPD_target_parallel:
8634     case OMPD_target_simd:
8635     case OMPD_target_parallel_for:
8636     case OMPD_target_parallel_for_simd:
8637       return nullptr;
8638     case OMPD_target_teams_distribute:
8639     case OMPD_target_teams_distribute_simd:
8640     case OMPD_target_teams_distribute_parallel_for:
8641     case OMPD_target_teams_distribute_parallel_for_simd:
8642     case OMPD_parallel:
8643     case OMPD_for:
8644     case OMPD_parallel_for:
8645     case OMPD_parallel_sections:
8646     case OMPD_for_simd:
8647     case OMPD_parallel_for_simd:
8648     case OMPD_cancel:
8649     case OMPD_cancellation_point:
8650     case OMPD_ordered:
8651     case OMPD_threadprivate:
8652     case OMPD_allocate:
8653     case OMPD_task:
8654     case OMPD_simd:
8655     case OMPD_sections:
8656     case OMPD_section:
8657     case OMPD_single:
8658     case OMPD_master:
8659     case OMPD_critical:
8660     case OMPD_taskyield:
8661     case OMPD_barrier:
8662     case OMPD_taskwait:
8663     case OMPD_taskgroup:
8664     case OMPD_atomic:
8665     case OMPD_flush:
8666     case OMPD_teams:
8667     case OMPD_target_data:
8668     case OMPD_target_exit_data:
8669     case OMPD_target_enter_data:
8670     case OMPD_distribute:
8671     case OMPD_distribute_simd:
8672     case OMPD_distribute_parallel_for:
8673     case OMPD_distribute_parallel_for_simd:
8674     case OMPD_teams_distribute:
8675     case OMPD_teams_distribute_simd:
8676     case OMPD_teams_distribute_parallel_for:
8677     case OMPD_teams_distribute_parallel_for_simd:
8678     case OMPD_target_update:
8679     case OMPD_declare_simd:
8680     case OMPD_declare_target:
8681     case OMPD_end_declare_target:
8682     case OMPD_declare_reduction:
8683     case OMPD_declare_mapper:
8684     case OMPD_taskloop:
8685     case OMPD_taskloop_simd:
8686     case OMPD_requires:
8687     case OMPD_unknown:
8688       llvm_unreachable("Unexpected directive.");
8689     }
8690   }
8691
8692   return nullptr;
8693 }
8694
8695 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8696     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8697     const llvm::function_ref<llvm::Value *(
8698         CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8699   OpenMPDirectiveKind Kind = D.getDirectiveKind();
8700   const OMPExecutableDirective *TD = &D;
8701   // Get nested teams distribute kind directive, if any.
8702   if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8703     TD = getNestedDistributeDirective(CGM.getContext(), D);
8704   if (!TD)
8705     return;
8706   const auto *LD = cast<OMPLoopDirective>(TD);
8707   auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8708                                                      PrePostActionTy &) {
8709     llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8710
8711     // Emit device ID if any.
8712     llvm::Value *DeviceID;
8713     if (Device)
8714       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8715                                            CGF.Int64Ty, /*isSigned=*/true);
8716     else
8717       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8718
8719     llvm::Value *Args[] = {DeviceID, NumIterations};
8720     CGF.EmitRuntimeCall(
8721         createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8722   };
8723   emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8724 }
8725
8726 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8727                                      const OMPExecutableDirective &D,
8728                                      llvm::Function *OutlinedFn,
8729                                      llvm::Value *OutlinedFnID,
8730                                      const Expr *IfCond, const Expr *Device) {
8731   if (!CGF.HaveInsertPoint())
8732     return;
8733
8734   assert(OutlinedFn && "Invalid outlined function!");
8735
8736   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8737   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8738   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8739   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8740                                             PrePostActionTy &) {
8741     CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8742   };
8743   emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8744
8745   CodeGenFunction::OMPTargetDataInfo InputInfo;
8746   llvm::Value *MapTypesArray = nullptr;
8747   // Fill up the pointer arrays and transfer execution to the device.
8748   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8749                     &MapTypesArray, &CS, RequiresOuterTask,
8750                     &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8751     // On top of the arrays that were filled up, the target offloading call
8752     // takes as arguments the device id as well as the host pointer. The host
8753     // pointer is used by the runtime library to identify the current target
8754     // region, so it only has to be unique and not necessarily point to
8755     // anything. It could be the pointer to the outlined function that
8756     // implements the target region, but we aren't using that so that the
8757     // compiler doesn't need to keep that, and could therefore inline the host
8758     // function if proven worthwhile during optimization.
8759
8760     // From this point on, we need to have an ID of the target region defined.
8761     assert(OutlinedFnID && "Invalid outlined function ID!");
8762
8763     // Emit device ID if any.
8764     llvm::Value *DeviceID;
8765     if (Device) {
8766       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8767                                            CGF.Int64Ty, /*isSigned=*/true);
8768     } else {
8769       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8770     }
8771
8772     // Emit the number of elements in the offloading arrays.
8773     llvm::Value *PointerNum =
8774         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8775
8776     // Return value of the runtime offloading call.
8777     llvm::Value *Return;
8778
8779     llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8780     llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8781
8782     bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8783     // The target region is an outlined function launched by the runtime
8784     // via calls __tgt_target() or __tgt_target_teams().
8785     //
8786     // __tgt_target() launches a target region with one team and one thread,
8787     // executing a serial region.  This master thread may in turn launch
8788     // more threads within its team upon encountering a parallel region,
8789     // however, no additional teams can be launched on the device.
8790     //
8791     // __tgt_target_teams() launches a target region with one or more teams,
8792     // each with one or more threads.  This call is required for target
8793     // constructs such as:
8794     //  'target teams'
8795     //  'target' / 'teams'
8796     //  'target teams distribute parallel for'
8797     //  'target parallel'
8798     // and so on.
8799     //
8800     // Note that on the host and CPU targets, the runtime implementation of
8801     // these calls simply call the outlined function without forking threads.
8802     // The outlined functions themselves have runtime calls to
8803     // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8804     // the compiler in emitTeamsCall() and emitParallelCall().
8805     //
8806     // In contrast, on the NVPTX target, the implementation of
8807     // __tgt_target_teams() launches a GPU kernel with the requested number
8808     // of teams and threads so no additional calls to the runtime are required.
8809     if (NumTeams) {
8810       // If we have NumTeams defined this means that we have an enclosed teams
8811       // region. Therefore we also expect to have NumThreads defined. These two
8812       // values should be defined in the presence of a teams directive,
8813       // regardless of having any clauses associated. If the user is using teams
8814       // but no clauses, these two values will be the default that should be
8815       // passed to the runtime library - a 32-bit integer with the value zero.
8816       assert(NumThreads && "Thread limit expression should be available along "
8817                            "with number of teams.");
8818       llvm::Value *OffloadingArgs[] = {DeviceID,
8819                                        OutlinedFnID,
8820                                        PointerNum,
8821                                        InputInfo.BasePointersArray.getPointer(),
8822                                        InputInfo.PointersArray.getPointer(),
8823                                        InputInfo.SizesArray.getPointer(),
8824                                        MapTypesArray,
8825                                        NumTeams,
8826                                        NumThreads};
8827       Return = CGF.EmitRuntimeCall(
8828           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8829                                           : OMPRTL__tgt_target_teams),
8830           OffloadingArgs);
8831     } else {
8832       llvm::Value *OffloadingArgs[] = {DeviceID,
8833                                        OutlinedFnID,
8834                                        PointerNum,
8835                                        InputInfo.BasePointersArray.getPointer(),
8836                                        InputInfo.PointersArray.getPointer(),
8837                                        InputInfo.SizesArray.getPointer(),
8838                                        MapTypesArray};
8839       Return = CGF.EmitRuntimeCall(
8840           createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8841                                           : OMPRTL__tgt_target),
8842           OffloadingArgs);
8843     }
8844
8845     // Check the error code and execute the host version if required.
8846     llvm::BasicBlock *OffloadFailedBlock =
8847         CGF.createBasicBlock("omp_offload.failed");
8848     llvm::BasicBlock *OffloadContBlock =
8849         CGF.createBasicBlock("omp_offload.cont");
8850     llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8851     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8852
8853     CGF.EmitBlock(OffloadFailedBlock);
8854     if (RequiresOuterTask) {
8855       CapturedVars.clear();
8856       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8857     }
8858     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8859     CGF.EmitBranch(OffloadContBlock);
8860
8861     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8862   };
8863
8864   // Notify that the host version must be executed.
8865   auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8866                     RequiresOuterTask](CodeGenFunction &CGF,
8867                                        PrePostActionTy &) {
8868     if (RequiresOuterTask) {
8869       CapturedVars.clear();
8870       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8871     }
8872     emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8873   };
8874
8875   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8876                           &CapturedVars, RequiresOuterTask,
8877                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8878     // Fill up the arrays with all the captured variables.
8879     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8880     MappableExprsHandler::MapValuesArrayTy Pointers;
8881     MappableExprsHandler::MapValuesArrayTy Sizes;
8882     MappableExprsHandler::MapFlagsArrayTy MapTypes;
8883
8884     // Get mappable expression information.
8885     MappableExprsHandler MEHandler(D, CGF);
8886     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8887
8888     auto RI = CS.getCapturedRecordDecl()->field_begin();
8889     auto CV = CapturedVars.begin();
8890     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8891                                               CE = CS.capture_end();
8892          CI != CE; ++CI, ++RI, ++CV) {
8893       MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8894       MappableExprsHandler::MapValuesArrayTy CurPointers;
8895       MappableExprsHandler::MapValuesArrayTy CurSizes;
8896       MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8897       MappableExprsHandler::StructRangeInfoTy PartialStruct;
8898
8899       // VLA sizes are passed to the outlined region by copy and do not have map
8900       // information associated.
8901       if (CI->capturesVariableArrayType()) {
8902         CurBasePointers.push_back(*CV);
8903         CurPointers.push_back(*CV);
8904         CurSizes.push_back(CGF.Builder.CreateIntCast(
8905             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
8906         // Copy to the device as an argument. No need to retrieve it.
8907         CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8908                               MappableExprsHandler::OMP_MAP_TARGET_PARAM |
8909                               MappableExprsHandler::OMP_MAP_IMPLICIT);
8910       } else {
8911         // If we have any information in the map clause, we use it, otherwise we
8912         // just do a default mapping.
8913         MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8914                                          CurSizes, CurMapTypes, PartialStruct);
8915         if (CurBasePointers.empty())
8916           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8917                                            CurPointers, CurSizes, CurMapTypes);
8918         // Generate correct mapping for variables captured by reference in
8919         // lambdas.
8920         if (CI->capturesVariable())
8921           MEHandler.generateInfoForLambdaCaptures(
8922               CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8923               CurMapTypes, LambdaPointers);
8924       }
8925       // We expect to have at least an element of information for this capture.
8926       assert(!CurBasePointers.empty() &&
8927              "Non-existing map pointer for capture!");
8928       assert(CurBasePointers.size() == CurPointers.size() &&
8929              CurBasePointers.size() == CurSizes.size() &&
8930              CurBasePointers.size() == CurMapTypes.size() &&
8931              "Inconsistent map information sizes!");
8932
8933       // If there is an entry in PartialStruct it means we have a struct with
8934       // individual members mapped. Emit an extra combined entry.
8935       if (PartialStruct.Base.isValid())
8936         MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8937                                     CurMapTypes, PartialStruct);
8938
8939       // We need to append the results of this capture to what we already have.
8940       BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8941       Pointers.append(CurPointers.begin(), CurPointers.end());
8942       Sizes.append(CurSizes.begin(), CurSizes.end());
8943       MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8944     }
8945     // Adjust MEMBER_OF flags for the lambdas captures.
8946     MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8947                                               Pointers, MapTypes);
8948     // Map other list items in the map clause which are not captured variables
8949     // but "declare target link" global variables.
8950     MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8951                                                MapTypes);
8952
8953     TargetDataInfo Info;
8954     // Fill up the arrays and create the arguments.
8955     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8956     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8957                                  Info.PointersArray, Info.SizesArray,
8958                                  Info.MapTypesArray, Info);
8959     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8960     InputInfo.BasePointersArray =
8961         Address(Info.BasePointersArray, CGM.getPointerAlign());
8962     InputInfo.PointersArray =
8963         Address(Info.PointersArray, CGM.getPointerAlign());
8964     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8965     MapTypesArray = Info.MapTypesArray;
8966     if (RequiresOuterTask)
8967       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8968     else
8969       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8970   };
8971
8972   auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8973                              CodeGenFunction &CGF, PrePostActionTy &) {
8974     if (RequiresOuterTask) {
8975       CodeGenFunction::OMPTargetDataInfo InputInfo;
8976       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8977     } else {
8978       emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8979     }
8980   };
8981
8982   // If we have a target function ID it means that we need to support
8983   // offloading, otherwise, just execute on the host. We need to execute on host
8984   // regardless of the conditional in the if clause if, e.g., the user do not
8985   // specify target triples.
8986   if (OutlinedFnID) {
8987     if (IfCond) {
8988       emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8989     } else {
8990       RegionCodeGenTy ThenRCG(TargetThenGen);
8991       ThenRCG(CGF);
8992     }
8993   } else {
8994     RegionCodeGenTy ElseRCG(TargetElseGen);
8995     ElseRCG(CGF);
8996   }
8997 }
8998
8999 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9000                                                     StringRef ParentName) {
9001   if (!S)
9002     return;
9003
9004   // Codegen OMP target directives that offload compute to the device.
9005   bool RequiresDeviceCodegen =
9006       isa<OMPExecutableDirective>(S) &&
9007       isOpenMPTargetExecutionDirective(
9008           cast<OMPExecutableDirective>(S)->getDirectiveKind());
9009
9010   if (RequiresDeviceCodegen) {
9011     const auto &E = *cast<OMPExecutableDirective>(S);
9012     unsigned DeviceID;
9013     unsigned FileID;
9014     unsigned Line;
9015     getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9016                              FileID, Line);
9017
9018     // Is this a target region that should not be emitted as an entry point? If
9019     // so just signal we are done with this target region.
9020     if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9021                                                             ParentName, Line))
9022       return;
9023
9024     switch (E.getDirectiveKind()) {
9025     case OMPD_target:
9026       CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9027                                                    cast<OMPTargetDirective>(E));
9028       break;
9029     case OMPD_target_parallel:
9030       CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9031           CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9032       break;
9033     case OMPD_target_teams:
9034       CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9035           CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9036       break;
9037     case OMPD_target_teams_distribute:
9038       CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9039           CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9040       break;
9041     case OMPD_target_teams_distribute_simd:
9042       CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9043           CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9044       break;
9045     case OMPD_target_parallel_for:
9046       CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9047           CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9048       break;
9049     case OMPD_target_parallel_for_simd:
9050       CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9051           CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9052       break;
9053     case OMPD_target_simd:
9054       CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9055           CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9056       break;
9057     case OMPD_target_teams_distribute_parallel_for:
9058       CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9059           CGM, ParentName,
9060           cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9061       break;
9062     case OMPD_target_teams_distribute_parallel_for_simd:
9063       CodeGenFunction::
9064           EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9065               CGM, ParentName,
9066               cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9067       break;
9068     case OMPD_parallel:
9069     case OMPD_for:
9070     case OMPD_parallel_for:
9071     case OMPD_parallel_sections:
9072     case OMPD_for_simd:
9073     case OMPD_parallel_for_simd:
9074     case OMPD_cancel:
9075     case OMPD_cancellation_point:
9076     case OMPD_ordered:
9077     case OMPD_threadprivate:
9078     case OMPD_allocate:
9079     case OMPD_task:
9080     case OMPD_simd:
9081     case OMPD_sections:
9082     case OMPD_section:
9083     case OMPD_single:
9084     case OMPD_master:
9085     case OMPD_critical:
9086     case OMPD_taskyield:
9087     case OMPD_barrier:
9088     case OMPD_taskwait:
9089     case OMPD_taskgroup:
9090     case OMPD_atomic:
9091     case OMPD_flush:
9092     case OMPD_teams:
9093     case OMPD_target_data:
9094     case OMPD_target_exit_data:
9095     case OMPD_target_enter_data:
9096     case OMPD_distribute:
9097     case OMPD_distribute_simd:
9098     case OMPD_distribute_parallel_for:
9099     case OMPD_distribute_parallel_for_simd:
9100     case OMPD_teams_distribute:
9101     case OMPD_teams_distribute_simd:
9102     case OMPD_teams_distribute_parallel_for:
9103     case OMPD_teams_distribute_parallel_for_simd:
9104     case OMPD_target_update:
9105     case OMPD_declare_simd:
9106     case OMPD_declare_target:
9107     case OMPD_end_declare_target:
9108     case OMPD_declare_reduction:
9109     case OMPD_declare_mapper:
9110     case OMPD_taskloop:
9111     case OMPD_taskloop_simd:
9112     case OMPD_requires:
9113     case OMPD_unknown:
9114       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9115     }
9116     return;
9117   }
9118
9119   if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9120     if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9121       return;
9122
9123     scanForTargetRegionsFunctions(
9124         E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9125     return;
9126   }
9127
9128   // If this is a lambda function, look into its body.
9129   if (const auto *L = dyn_cast<LambdaExpr>(S))
9130     S = L->getBody();
9131
9132   // Keep looking for target regions recursively.
9133   for (const Stmt *II : S->children())
9134     scanForTargetRegionsFunctions(II, ParentName);
9135 }
9136
9137 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9138   // If emitting code for the host, we do not process FD here. Instead we do
9139   // the normal code generation.
9140   if (!CGM.getLangOpts().OpenMPIsDevice)
9141     return false;
9142
9143   const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9144   StringRef Name = CGM.getMangledName(GD);
9145   // Try to detect target regions in the function.
9146   if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9147     scanForTargetRegionsFunctions(FD->getBody(), Name);
9148
9149   // Do not to emit function if it is not marked as declare target.
9150   return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9151          AlreadyEmittedTargetFunctions.count(Name) == 0;
9152 }
9153
9154 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9155   if (!CGM.getLangOpts().OpenMPIsDevice)
9156     return false;
9157
9158   // Check if there are Ctors/Dtors in this declaration and look for target
9159   // regions in it. We use the complete variant to produce the kernel name
9160   // mangling.
9161   QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9162   if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9163     for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9164       StringRef ParentName =
9165           CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9166       scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9167     }
9168     if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9169       StringRef ParentName =
9170           CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9171       scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9172     }
9173   }
9174
9175   // Do not to emit variable if it is not marked as declare target.
9176   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9177       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9178           cast<VarDecl>(GD.getDecl()));
9179   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9180       (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9181        HasRequiresUnifiedSharedMemory)) {
9182     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9183     return true;
9184   }
9185   return false;
9186 }
9187
9188 llvm::Constant *
9189 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9190                                                 const VarDecl *VD) {
9191   assert(VD->getType().isConstant(CGM.getContext()) &&
9192          "Expected constant variable.");
9193   StringRef VarName;
9194   llvm::Constant *Addr;
9195   llvm::GlobalValue::LinkageTypes Linkage;
9196   QualType Ty = VD->getType();
9197   SmallString<128> Buffer;
9198   {
9199     unsigned DeviceID;
9200     unsigned FileID;
9201     unsigned Line;
9202     getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9203                              FileID, Line);
9204     llvm::raw_svector_ostream OS(Buffer);
9205     OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9206        << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9207     VarName = OS.str();
9208   }
9209   Linkage = llvm::GlobalValue::InternalLinkage;
9210   Addr =
9211       getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9212                                   getDefaultFirstprivateAddressSpace());
9213   cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9214   CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9215   CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9216   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9217       VarName, Addr, VarSize,
9218       OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9219   return Addr;
9220 }
9221
9222 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9223                                                    llvm::Constant *Addr) {
9224   llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9225       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9226   if (!Res) {
9227     if (CGM.getLangOpts().OpenMPIsDevice) {
9228       // Register non-target variables being emitted in device code (debug info
9229       // may cause this).
9230       StringRef VarName = CGM.getMangledName(VD);
9231       EmittedNonTargetVariables.try_emplace(VarName, Addr);
9232     }
9233     return;
9234   }
9235   // Register declare target variables.
9236   OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9237   StringRef VarName;
9238   CharUnits VarSize;
9239   llvm::GlobalValue::LinkageTypes Linkage;
9240
9241   if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9242       !HasRequiresUnifiedSharedMemory) {
9243     Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9244     VarName = CGM.getMangledName(VD);
9245     if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9246       VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9247       assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9248     } else {
9249       VarSize = CharUnits::Zero();
9250     }
9251     Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9252     // Temp solution to prevent optimizations of the internal variables.
9253     if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9254       std::string RefName = getName({VarName, "ref"});
9255       if (!CGM.GetGlobalValue(RefName)) {
9256         llvm::Constant *AddrRef =
9257             getOrCreateInternalVariable(Addr->getType(), RefName);
9258         auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9259         GVAddrRef->setConstant(/*Val=*/true);
9260         GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9261         GVAddrRef->setInitializer(Addr);
9262         CGM.addCompilerUsedGlobal(GVAddrRef);
9263       }
9264     }
9265   } else {
9266     assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9267             (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9268              HasRequiresUnifiedSharedMemory)) &&
9269            "Declare target attribute must link or to with unified memory.");
9270     if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9271       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9272     else
9273       Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9274
9275     if (CGM.getLangOpts().OpenMPIsDevice) {
9276       VarName = Addr->getName();
9277       Addr = nullptr;
9278     } else {
9279       VarName = getAddrOfDeclareTargetVar(VD).getName();
9280       Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9281     }
9282     VarSize = CGM.getPointerSize();
9283     Linkage = llvm::GlobalValue::WeakAnyLinkage;
9284   }
9285
9286   OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9287       VarName, Addr, VarSize, Flags, Linkage);
9288 }
9289
9290 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9291   if (isa<FunctionDecl>(GD.getDecl()) ||
9292       isa<OMPDeclareReductionDecl>(GD.getDecl()))
9293     return emitTargetFunctions(GD);
9294
9295   return emitTargetGlobalVariable(GD);
9296 }
9297
9298 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9299   for (const VarDecl *VD : DeferredGlobalVariables) {
9300     llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9301         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9302     if (!Res)
9303       continue;
9304     if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9305         !HasRequiresUnifiedSharedMemory) {
9306       CGM.EmitGlobal(VD);
9307     } else {
9308       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9309               (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9310                HasRequiresUnifiedSharedMemory)) &&
9311              "Expected link clause or to clause with unified memory.");
9312       (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9313     }
9314   }
9315 }
9316
9317 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9318     CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9319   assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9320          " Expected target-based directive.");
9321 }
9322
9323 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9324     const OMPRequiresDecl *D) {
9325   for (const OMPClause *Clause : D->clauselists()) {
9326     if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9327       HasRequiresUnifiedSharedMemory = true;
9328       break;
9329     }
9330   }
9331 }
9332
9333 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9334                                                        LangAS &AS) {
9335   if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9336     return false;
9337   const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9338   switch(A->getAllocatorType()) {
9339   case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9340   // Not supported, fallback to the default mem space.
9341   case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9342   case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9343   case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9344   case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9345   case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9346   case OMPAllocateDeclAttr::OMPConstMemAlloc:
9347   case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9348     AS = LangAS::Default;
9349     return true;
9350   case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9351     llvm_unreachable("Expected predefined allocator for the variables with the "
9352                      "static storage.");
9353   }
9354   return false;
9355 }
9356
9357 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9358   return HasRequiresUnifiedSharedMemory;
9359 }
9360
9361 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9362     CodeGenModule &CGM)
9363     : CGM(CGM) {
9364   if (CGM.getLangOpts().OpenMPIsDevice) {
9365     SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9366     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9367   }
9368 }
9369
9370 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9371   if (CGM.getLangOpts().OpenMPIsDevice)
9372     CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9373 }
9374
9375 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9376   if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9377     return true;
9378
9379   StringRef Name = CGM.getMangledName(GD);
9380   const auto *D = cast<FunctionDecl>(GD.getDecl());
9381   // Do not to emit function if it is marked as declare target as it was already
9382   // emitted.
9383   if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9384     if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9385       if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9386         return !F->isDeclaration();
9387       return false;
9388     }
9389     return true;
9390   }
9391
9392   return !AlreadyEmittedTargetFunctions.insert(Name).second;
9393 }
9394
9395 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9396   // If we don't have entries or if we are emitting code for the device, we
9397   // don't need to do anything.
9398   if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9399       CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9400       (OffloadEntriesInfoManager.empty() &&
9401        !HasEmittedDeclareTargetRegion &&
9402        !HasEmittedTargetRegion))
9403     return nullptr;
9404
9405   // Create and register the function that handles the requires directives.
9406   ASTContext &C = CGM.getContext();
9407
9408   llvm::Function *RequiresRegFn;
9409   {
9410     CodeGenFunction CGF(CGM);
9411     const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9412     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9413     std::string ReqName = getName({"omp_offloading", "requires_reg"});
9414     RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9415     CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9416     OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9417     // TODO: check for other requires clauses.
9418     // The requires directive takes effect only when a target region is
9419     // present in the compilation unit. Otherwise it is ignored and not
9420     // passed to the runtime. This avoids the runtime from throwing an error
9421     // for mismatching requires clauses across compilation units that don't
9422     // contain at least 1 target region.
9423     assert((HasEmittedTargetRegion ||
9424             HasEmittedDeclareTargetRegion ||
9425             !OffloadEntriesInfoManager.empty()) &&
9426            "Target or declare target region expected.");
9427     if (HasRequiresUnifiedSharedMemory)
9428       Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9429     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9430         llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9431     CGF.FinishFunction();
9432   }
9433   return RequiresRegFn;
9434 }
9435
9436 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9437   // If we have offloading in the current module, we need to emit the entries
9438   // now and register the offloading descriptor.
9439   createOffloadEntriesAndInfoMetadata();
9440
9441   // Create and register the offloading binary descriptors. This is the main
9442   // entity that captures all the information about offloading in the current
9443   // compilation unit.
9444   return createOffloadingBinaryDescriptorRegistration();
9445 }
9446
9447 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9448                                     const OMPExecutableDirective &D,
9449                                     SourceLocation Loc,
9450                                     llvm::Function *OutlinedFn,
9451                                     ArrayRef<llvm::Value *> CapturedVars) {
9452   if (!CGF.HaveInsertPoint())
9453     return;
9454
9455   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9456   CodeGenFunction::RunCleanupsScope Scope(CGF);
9457
9458   // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9459   llvm::Value *Args[] = {
9460       RTLoc,
9461       CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9462       CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9463   llvm::SmallVector<llvm::Value *, 16> RealArgs;
9464   RealArgs.append(std::begin(Args), std::end(Args));
9465   RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9466
9467   llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9468   CGF.EmitRuntimeCall(RTLFn, RealArgs);
9469 }
9470
9471 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9472                                          const Expr *NumTeams,
9473                                          const Expr *ThreadLimit,
9474                                          SourceLocation Loc) {
9475   if (!CGF.HaveInsertPoint())
9476     return;
9477
9478   llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9479
9480   llvm::Value *NumTeamsVal =
9481       NumTeams
9482           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9483                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9484           : CGF.Builder.getInt32(0);
9485
9486   llvm::Value *ThreadLimitVal =
9487       ThreadLimit
9488           ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9489                                       CGF.CGM.Int32Ty, /* isSigned = */ true)
9490           : CGF.Builder.getInt32(0);
9491
9492   // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9493   llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9494                                      ThreadLimitVal};
9495   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9496                       PushNumTeamsArgs);
9497 }
9498
9499 void CGOpenMPRuntime::emitTargetDataCalls(
9500     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9501     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9502   if (!CGF.HaveInsertPoint())
9503     return;
9504
9505   // Action used to replace the default codegen action and turn privatization
9506   // off.
9507   PrePostActionTy NoPrivAction;
9508
9509   // Generate the code for the opening of the data environment. Capture all the
9510   // arguments of the runtime call by reference because they are used in the
9511   // closing of the region.
9512   auto &&BeginThenGen = [this, &D, Device, &Info,
9513                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9514     // Fill up the arrays with all the mapped variables.
9515     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9516     MappableExprsHandler::MapValuesArrayTy Pointers;
9517     MappableExprsHandler::MapValuesArrayTy Sizes;
9518     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9519
9520     // Get map clause information.
9521     MappableExprsHandler MCHandler(D, CGF);
9522     MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9523
9524     // Fill up the arrays and create the arguments.
9525     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9526
9527     llvm::Value *BasePointersArrayArg = nullptr;
9528     llvm::Value *PointersArrayArg = nullptr;
9529     llvm::Value *SizesArrayArg = nullptr;
9530     llvm::Value *MapTypesArrayArg = nullptr;
9531     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9532                                  SizesArrayArg, MapTypesArrayArg, Info);
9533
9534     // Emit device ID if any.
9535     llvm::Value *DeviceID = nullptr;
9536     if (Device) {
9537       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9538                                            CGF.Int64Ty, /*isSigned=*/true);
9539     } else {
9540       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9541     }
9542
9543     // Emit the number of elements in the offloading arrays.
9544     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9545
9546     llvm::Value *OffloadingArgs[] = {
9547         DeviceID,         PointerNum,    BasePointersArrayArg,
9548         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9549     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9550                         OffloadingArgs);
9551
9552     // If device pointer privatization is required, emit the body of the region
9553     // here. It will have to be duplicated: with and without privatization.
9554     if (!Info.CaptureDeviceAddrMap.empty())
9555       CodeGen(CGF);
9556   };
9557
9558   // Generate code for the closing of the data region.
9559   auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9560                                             PrePostActionTy &) {
9561     assert(Info.isValid() && "Invalid data environment closing arguments.");
9562
9563     llvm::Value *BasePointersArrayArg = nullptr;
9564     llvm::Value *PointersArrayArg = nullptr;
9565     llvm::Value *SizesArrayArg = nullptr;
9566     llvm::Value *MapTypesArrayArg = nullptr;
9567     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9568                                  SizesArrayArg, MapTypesArrayArg, Info);
9569
9570     // Emit device ID if any.
9571     llvm::Value *DeviceID = nullptr;
9572     if (Device) {
9573       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9574                                            CGF.Int64Ty, /*isSigned=*/true);
9575     } else {
9576       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9577     }
9578
9579     // Emit the number of elements in the offloading arrays.
9580     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9581
9582     llvm::Value *OffloadingArgs[] = {
9583         DeviceID,         PointerNum,    BasePointersArrayArg,
9584         PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9585     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9586                         OffloadingArgs);
9587   };
9588
9589   // If we need device pointer privatization, we need to emit the body of the
9590   // region with no privatization in the 'else' branch of the conditional.
9591   // Otherwise, we don't have to do anything.
9592   auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9593                                                          PrePostActionTy &) {
9594     if (!Info.CaptureDeviceAddrMap.empty()) {
9595       CodeGen.setAction(NoPrivAction);
9596       CodeGen(CGF);
9597     }
9598   };
9599
9600   // We don't have to do anything to close the region if the if clause evaluates
9601   // to false.
9602   auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9603
9604   if (IfCond) {
9605     emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9606   } else {
9607     RegionCodeGenTy RCG(BeginThenGen);
9608     RCG(CGF);
9609   }
9610
9611   // If we don't require privatization of device pointers, we emit the body in
9612   // between the runtime calls. This avoids duplicating the body code.
9613   if (Info.CaptureDeviceAddrMap.empty()) {
9614     CodeGen.setAction(NoPrivAction);
9615     CodeGen(CGF);
9616   }
9617
9618   if (IfCond) {
9619     emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9620   } else {
9621     RegionCodeGenTy RCG(EndThenGen);
9622     RCG(CGF);
9623   }
9624 }
9625
9626 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9627     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9628     const Expr *Device) {
9629   if (!CGF.HaveInsertPoint())
9630     return;
9631
9632   assert((isa<OMPTargetEnterDataDirective>(D) ||
9633           isa<OMPTargetExitDataDirective>(D) ||
9634           isa<OMPTargetUpdateDirective>(D)) &&
9635          "Expecting either target enter, exit data, or update directives.");
9636
9637   CodeGenFunction::OMPTargetDataInfo InputInfo;
9638   llvm::Value *MapTypesArray = nullptr;
9639   // Generate the code for the opening of the data environment.
9640   auto &&ThenGen = [this, &D, Device, &InputInfo,
9641                     &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9642     // Emit device ID if any.
9643     llvm::Value *DeviceID = nullptr;
9644     if (Device) {
9645       DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9646                                            CGF.Int64Ty, /*isSigned=*/true);
9647     } else {
9648       DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9649     }
9650
9651     // Emit the number of elements in the offloading arrays.
9652     llvm::Constant *PointerNum =
9653         CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9654
9655     llvm::Value *OffloadingArgs[] = {DeviceID,
9656                                      PointerNum,
9657                                      InputInfo.BasePointersArray.getPointer(),
9658                                      InputInfo.PointersArray.getPointer(),
9659                                      InputInfo.SizesArray.getPointer(),
9660                                      MapTypesArray};
9661
9662     // Select the right runtime function call for each expected standalone
9663     // directive.
9664     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9665     OpenMPRTLFunction RTLFn;
9666     switch (D.getDirectiveKind()) {
9667     case OMPD_target_enter_data:
9668       RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9669                         : OMPRTL__tgt_target_data_begin;
9670       break;
9671     case OMPD_target_exit_data:
9672       RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9673                         : OMPRTL__tgt_target_data_end;
9674       break;
9675     case OMPD_target_update:
9676       RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9677                         : OMPRTL__tgt_target_data_update;
9678       break;
9679     case OMPD_parallel:
9680     case OMPD_for:
9681     case OMPD_parallel_for:
9682     case OMPD_parallel_sections:
9683     case OMPD_for_simd:
9684     case OMPD_parallel_for_simd:
9685     case OMPD_cancel:
9686     case OMPD_cancellation_point:
9687     case OMPD_ordered:
9688     case OMPD_threadprivate:
9689     case OMPD_allocate:
9690     case OMPD_task:
9691     case OMPD_simd:
9692     case OMPD_sections:
9693     case OMPD_section:
9694     case OMPD_single:
9695     case OMPD_master:
9696     case OMPD_critical:
9697     case OMPD_taskyield:
9698     case OMPD_barrier:
9699     case OMPD_taskwait:
9700     case OMPD_taskgroup:
9701     case OMPD_atomic:
9702     case OMPD_flush:
9703     case OMPD_teams:
9704     case OMPD_target_data:
9705     case OMPD_distribute:
9706     case OMPD_distribute_simd:
9707     case OMPD_distribute_parallel_for:
9708     case OMPD_distribute_parallel_for_simd:
9709     case OMPD_teams_distribute:
9710     case OMPD_teams_distribute_simd:
9711     case OMPD_teams_distribute_parallel_for:
9712     case OMPD_teams_distribute_parallel_for_simd:
9713     case OMPD_declare_simd:
9714     case OMPD_declare_target:
9715     case OMPD_end_declare_target:
9716     case OMPD_declare_reduction:
9717     case OMPD_declare_mapper:
9718     case OMPD_taskloop:
9719     case OMPD_taskloop_simd:
9720     case OMPD_target:
9721     case OMPD_target_simd:
9722     case OMPD_target_teams_distribute:
9723     case OMPD_target_teams_distribute_simd:
9724     case OMPD_target_teams_distribute_parallel_for:
9725     case OMPD_target_teams_distribute_parallel_for_simd:
9726     case OMPD_target_teams:
9727     case OMPD_target_parallel:
9728     case OMPD_target_parallel_for:
9729     case OMPD_target_parallel_for_simd:
9730     case OMPD_requires:
9731     case OMPD_unknown:
9732       llvm_unreachable("Unexpected standalone target data directive.");
9733       break;
9734     }
9735     CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9736   };
9737
9738   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9739                              CodeGenFunction &CGF, PrePostActionTy &) {
9740     // Fill up the arrays with all the mapped variables.
9741     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9742     MappableExprsHandler::MapValuesArrayTy Pointers;
9743     MappableExprsHandler::MapValuesArrayTy Sizes;
9744     MappableExprsHandler::MapFlagsArrayTy MapTypes;
9745
9746     // Get map clause information.
9747     MappableExprsHandler MEHandler(D, CGF);
9748     MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9749
9750     TargetDataInfo Info;
9751     // Fill up the arrays and create the arguments.
9752     emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9753     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9754                                  Info.PointersArray, Info.SizesArray,
9755                                  Info.MapTypesArray, Info);
9756     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9757     InputInfo.BasePointersArray =
9758         Address(Info.BasePointersArray, CGM.getPointerAlign());
9759     InputInfo.PointersArray =
9760         Address(Info.PointersArray, CGM.getPointerAlign());
9761     InputInfo.SizesArray =
9762         Address(Info.SizesArray, CGM.getPointerAlign());
9763     MapTypesArray = Info.MapTypesArray;
9764     if (D.hasClausesOfKind<OMPDependClause>())
9765       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9766     else
9767       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9768   };
9769
9770   if (IfCond) {
9771     emitOMPIfClause(CGF, IfCond, TargetThenGen,
9772                     [](CodeGenFunction &CGF, PrePostActionTy &) {});
9773   } else {
9774     RegionCodeGenTy ThenRCG(TargetThenGen);
9775     ThenRCG(CGF);
9776   }
9777 }
9778
9779 namespace {
9780   /// Kind of parameter in a function with 'declare simd' directive.
9781   enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9782   /// Attribute set of the parameter.
9783   struct ParamAttrTy {
9784     ParamKindTy Kind = Vector;
9785     llvm::APSInt StrideOrArg;
9786     llvm::APSInt Alignment;
9787   };
9788 } // namespace
9789
9790 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9791                                 ArrayRef<ParamAttrTy> ParamAttrs) {
9792   // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9793   // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9794   // of that clause. The VLEN value must be power of 2.
9795   // In other case the notion of the function`s "characteristic data type" (CDT)
9796   // is used to compute the vector length.
9797   // CDT is defined in the following order:
9798   //   a) For non-void function, the CDT is the return type.
9799   //   b) If the function has any non-uniform, non-linear parameters, then the
9800   //   CDT is the type of the first such parameter.
9801   //   c) If the CDT determined by a) or b) above is struct, union, or class
9802   //   type which is pass-by-value (except for the type that maps to the
9803   //   built-in complex data type), the characteristic data type is int.
9804   //   d) If none of the above three cases is applicable, the CDT is int.
9805   // The VLEN is then determined based on the CDT and the size of vector
9806   // register of that ISA for which current vector version is generated. The
9807   // VLEN is computed using the formula below:
9808   //   VLEN  = sizeof(vector_register) / sizeof(CDT),
9809   // where vector register size specified in section 3.2.1 Registers and the
9810   // Stack Frame of original AMD64 ABI document.
9811   QualType RetType = FD->getReturnType();
9812   if (RetType.isNull())
9813     return 0;
9814   ASTContext &C = FD->getASTContext();
9815   QualType CDT;
9816   if (!RetType.isNull() && !RetType->isVoidType()) {
9817     CDT = RetType;
9818   } else {
9819     unsigned Offset = 0;
9820     if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9821       if (ParamAttrs[Offset].Kind == Vector)
9822         CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9823       ++Offset;
9824     }
9825     if (CDT.isNull()) {
9826       for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9827         if (ParamAttrs[I + Offset].Kind == Vector) {
9828           CDT = FD->getParamDecl(I)->getType();
9829           break;
9830         }
9831       }
9832     }
9833   }
9834   if (CDT.isNull())
9835     CDT = C.IntTy;
9836   CDT = CDT->getCanonicalTypeUnqualified();
9837   if (CDT->isRecordType() || CDT->isUnionType())
9838     CDT = C.IntTy;
9839   return C.getTypeSize(CDT);
9840 }
9841
9842 static void
9843 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9844                            const llvm::APSInt &VLENVal,
9845                            ArrayRef<ParamAttrTy> ParamAttrs,
9846                            OMPDeclareSimdDeclAttr::BranchStateTy State) {
9847   struct ISADataTy {
9848     char ISA;
9849     unsigned VecRegSize;
9850   };
9851   ISADataTy ISAData[] = {
9852       {
9853           'b', 128
9854       }, // SSE
9855       {
9856           'c', 256
9857       }, // AVX
9858       {
9859           'd', 256
9860       }, // AVX2
9861       {
9862           'e', 512
9863       }, // AVX512
9864   };
9865   llvm::SmallVector<char, 2> Masked;
9866   switch (State) {
9867   case OMPDeclareSimdDeclAttr::BS_Undefined:
9868     Masked.push_back('N');
9869     Masked.push_back('M');
9870     break;
9871   case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9872     Masked.push_back('N');
9873     break;
9874   case OMPDeclareSimdDeclAttr::BS_Inbranch:
9875     Masked.push_back('M');
9876     break;
9877   }
9878   for (char Mask : Masked) {
9879     for (const ISADataTy &Data : ISAData) {
9880       SmallString<256> Buffer;
9881       llvm::raw_svector_ostream Out(Buffer);
9882       Out << "_ZGV" << Data.ISA << Mask;
9883       if (!VLENVal) {
9884         unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
9885         assert(NumElts && "Non-zero simdlen/cdtsize expected");
9886         Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
9887       } else {
9888         Out << VLENVal;
9889       }
9890       for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9891         switch (ParamAttr.Kind){
9892         case LinearWithVarStride:
9893           Out << 's' << ParamAttr.StrideOrArg;
9894           break;
9895         case Linear:
9896           Out << 'l';
9897           if (!!ParamAttr.StrideOrArg)
9898             Out << ParamAttr.StrideOrArg;
9899           break;
9900         case Uniform:
9901           Out << 'u';
9902           break;
9903         case Vector:
9904           Out << 'v';
9905           break;
9906         }
9907         if (!!ParamAttr.Alignment)
9908           Out << 'a' << ParamAttr.Alignment;
9909       }
9910       Out << '_' << Fn->getName();
9911       Fn->addFnAttr(Out.str());
9912     }
9913   }
9914 }
9915
9916 // This are the Functions that are needed to mangle the name of the
9917 // vector functions generated by the compiler, according to the rules
9918 // defined in the "Vector Function ABI specifications for AArch64",
9919 // available at
9920 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9921
9922 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9923 ///
9924 /// TODO: Need to implement the behavior for reference marked with a
9925 /// var or no linear modifiers (1.b in the section). For this, we
9926 /// need to extend ParamKindTy to support the linear modifiers.
9927 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9928   QT = QT.getCanonicalType();
9929
9930   if (QT->isVoidType())
9931     return false;
9932
9933   if (Kind == ParamKindTy::Uniform)
9934     return false;
9935
9936   if (Kind == ParamKindTy::Linear)
9937     return false;
9938
9939   // TODO: Handle linear references with modifiers
9940
9941   if (Kind == ParamKindTy::LinearWithVarStride)
9942     return false;
9943
9944   return true;
9945 }
9946
9947 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9948 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9949   QT = QT.getCanonicalType();
9950   unsigned Size = C.getTypeSize(QT);
9951
9952   // Only scalars and complex within 16 bytes wide set PVB to true.
9953   if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9954     return false;
9955
9956   if (QT->isFloatingType())
9957     return true;
9958
9959   if (QT->isIntegerType())
9960     return true;
9961
9962   if (QT->isPointerType())
9963     return true;
9964
9965   // TODO: Add support for complex types (section 3.1.2, item 2).
9966
9967   return false;
9968 }
9969
9970 /// Computes the lane size (LS) of a return type or of an input parameter,
9971 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9972 /// TODO: Add support for references, section 3.2.1, item 1.
9973 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9974   if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9975     QualType PTy = QT.getCanonicalType()->getPointeeType();
9976     if (getAArch64PBV(PTy, C))
9977       return C.getTypeSize(PTy);
9978   }
9979   if (getAArch64PBV(QT, C))
9980     return C.getTypeSize(QT);
9981
9982   return C.getTypeSize(C.getUIntPtrType());
9983 }
9984
9985 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9986 // signature of the scalar function, as defined in 3.2.2 of the
9987 // AAVFABI.
9988 static std::tuple<unsigned, unsigned, bool>
9989 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9990   QualType RetType = FD->getReturnType().getCanonicalType();
9991
9992   ASTContext &C = FD->getASTContext();
9993
9994   bool OutputBecomesInput = false;
9995
9996   llvm::SmallVector<unsigned, 8> Sizes;
9997   if (!RetType->isVoidType()) {
9998     Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9999     if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10000       OutputBecomesInput = true;
10001   }
10002   for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10003     QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10004     Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10005   }
10006
10007   assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10008   // The LS of a function parameter / return value can only be a power
10009   // of 2, starting from 8 bits, up to 128.
10010   assert(std::all_of(Sizes.begin(), Sizes.end(),
10011                      [](unsigned Size) {
10012                        return Size == 8 || Size == 16 || Size == 32 ||
10013                               Size == 64 || Size == 128;
10014                      }) &&
10015          "Invalid size");
10016
10017   return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10018                          *std::max_element(std::begin(Sizes), std::end(Sizes)),
10019                          OutputBecomesInput);
10020 }
10021
10022 /// Mangle the parameter part of the vector function name according to
10023 /// their OpenMP classification. The mangling function is defined in
10024 /// section 3.5 of the AAVFABI.
10025 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10026   SmallString<256> Buffer;
10027   llvm::raw_svector_ostream Out(Buffer);
10028   for (const auto &ParamAttr : ParamAttrs) {
10029     switch (ParamAttr.Kind) {
10030     case LinearWithVarStride:
10031       Out << "ls" << ParamAttr.StrideOrArg;
10032       break;
10033     case Linear:
10034       Out << 'l';
10035       // Don't print the step value if it is not present or if it is
10036       // equal to 1.
10037       if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10038         Out << ParamAttr.StrideOrArg;
10039       break;
10040     case Uniform:
10041       Out << 'u';
10042       break;
10043     case Vector:
10044       Out << 'v';
10045       break;
10046     }
10047
10048     if (!!ParamAttr.Alignment)
10049       Out << 'a' << ParamAttr.Alignment;
10050   }
10051
10052   return Out.str();
10053 }
10054
10055 // Function used to add the attribute. The parameter `VLEN` is
10056 // templated to allow the use of "x" when targeting scalable functions
10057 // for SVE.
10058 template <typename T>
10059 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10060                                  char ISA, StringRef ParSeq,
10061                                  StringRef MangledName, bool OutputBecomesInput,
10062                                  llvm::Function *Fn) {
10063   SmallString<256> Buffer;
10064   llvm::raw_svector_ostream Out(Buffer);
10065   Out << Prefix << ISA << LMask << VLEN;
10066   if (OutputBecomesInput)
10067     Out << "v";
10068   Out << ParSeq << "_" << MangledName;
10069   Fn->addFnAttr(Out.str());
10070 }
10071
10072 // Helper function to generate the Advanced SIMD names depending on
10073 // the value of the NDS when simdlen is not present.
10074 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10075                                       StringRef Prefix, char ISA,
10076                                       StringRef ParSeq, StringRef MangledName,
10077                                       bool OutputBecomesInput,
10078                                       llvm::Function *Fn) {
10079   switch (NDS) {
10080   case 8:
10081     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10082                          OutputBecomesInput, Fn);
10083     addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10084                          OutputBecomesInput, Fn);
10085     break;
10086   case 16:
10087     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10088                          OutputBecomesInput, Fn);
10089     addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10090                          OutputBecomesInput, Fn);
10091     break;
10092   case 32:
10093     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10094                          OutputBecomesInput, Fn);
10095     addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10096                          OutputBecomesInput, Fn);
10097     break;
10098   case 64:
10099   case 128:
10100     addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10101                          OutputBecomesInput, Fn);
10102     break;
10103   default:
10104     llvm_unreachable("Scalar type is too wide.");
10105   }
10106 }
10107
10108 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10109 static void emitAArch64DeclareSimdFunction(
10110     CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10111     ArrayRef<ParamAttrTy> ParamAttrs,
10112     OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10113     char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10114
10115   // Get basic data for building the vector signature.
10116   const auto Data = getNDSWDS(FD, ParamAttrs);
10117   const unsigned NDS = std::get<0>(Data);
10118   const unsigned WDS = std::get<1>(Data);
10119   const bool OutputBecomesInput = std::get<2>(Data);
10120
10121   // Check the values provided via `simdlen` by the user.
10122   // 1. A `simdlen(1)` doesn't produce vector signatures,
10123   if (UserVLEN == 1) {
10124     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10125         DiagnosticsEngine::Warning,
10126         "The clause simdlen(1) has no effect when targeting aarch64.");
10127     CGM.getDiags().Report(SLoc, DiagID);
10128     return;
10129   }
10130
10131   // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10132   // Advanced SIMD output.
10133   if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10134     unsigned DiagID = CGM.getDiags().getCustomDiagID(
10135         DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10136                                     "power of 2 when targeting Advanced SIMD.");
10137     CGM.getDiags().Report(SLoc, DiagID);
10138     return;
10139   }
10140
10141   // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10142   // limits.
10143   if (ISA == 's' && UserVLEN != 0) {
10144     if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10145       unsigned DiagID = CGM.getDiags().getCustomDiagID(
10146           DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10147                                       "lanes in the architectural constraints "
10148                                       "for SVE (min is 128-bit, max is "
10149                                       "2048-bit, by steps of 128-bit)");
10150       CGM.getDiags().Report(SLoc, DiagID) << WDS;
10151       return;
10152     }
10153   }
10154
10155   // Sort out parameter sequence.
10156   const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10157   StringRef Prefix = "_ZGV";
10158   // Generate simdlen from user input (if any).
10159   if (UserVLEN) {
10160     if (ISA == 's') {
10161       // SVE generates only a masked function.
10162       addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10163                            OutputBecomesInput, Fn);
10164     } else {
10165       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10166       // Advanced SIMD generates one or two functions, depending on
10167       // the `[not]inbranch` clause.
10168       switch (State) {
10169       case OMPDeclareSimdDeclAttr::BS_Undefined:
10170         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10171                              OutputBecomesInput, Fn);
10172         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10173                              OutputBecomesInput, Fn);
10174         break;
10175       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10176         addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10177                              OutputBecomesInput, Fn);
10178         break;
10179       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10180         addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10181                              OutputBecomesInput, Fn);
10182         break;
10183       }
10184     }
10185   } else {
10186     // If no user simdlen is provided, follow the AAVFABI rules for
10187     // generating the vector length.
10188     if (ISA == 's') {
10189       // SVE, section 3.4.1, item 1.
10190       addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10191                            OutputBecomesInput, Fn);
10192     } else {
10193       assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10194       // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10195       // two vector names depending on the use of the clause
10196       // `[not]inbranch`.
10197       switch (State) {
10198       case OMPDeclareSimdDeclAttr::BS_Undefined:
10199         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10200                                   OutputBecomesInput, Fn);
10201         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10202                                   OutputBecomesInput, Fn);
10203         break;
10204       case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10205         addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10206                                   OutputBecomesInput, Fn);
10207         break;
10208       case OMPDeclareSimdDeclAttr::BS_Inbranch:
10209         addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10210                                   OutputBecomesInput, Fn);
10211         break;
10212       }
10213     }
10214   }
10215 }
10216
10217 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10218                                               llvm::Function *Fn) {
10219   ASTContext &C = CGM.getContext();
10220   FD = FD->getMostRecentDecl();
10221   // Map params to their positions in function decl.
10222   llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10223   if (isa<CXXMethodDecl>(FD))
10224     ParamPositions.try_emplace(FD, 0);
10225   unsigned ParamPos = ParamPositions.size();
10226   for (const ParmVarDecl *P : FD->parameters()) {
10227     ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10228     ++ParamPos;
10229   }
10230   while (FD) {
10231     for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10232       llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10233       // Mark uniform parameters.
10234       for (const Expr *E : Attr->uniforms()) {
10235         E = E->IgnoreParenImpCasts();
10236         unsigned Pos;
10237         if (isa<CXXThisExpr>(E)) {
10238           Pos = ParamPositions[FD];
10239         } else {
10240           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10241                                 ->getCanonicalDecl();
10242           Pos = ParamPositions[PVD];
10243         }
10244         ParamAttrs[Pos].Kind = Uniform;
10245       }
10246       // Get alignment info.
10247       auto NI = Attr->alignments_begin();
10248       for (const Expr *E : Attr->aligneds()) {
10249         E = E->IgnoreParenImpCasts();
10250         unsigned Pos;
10251         QualType ParmTy;
10252         if (isa<CXXThisExpr>(E)) {
10253           Pos = ParamPositions[FD];
10254           ParmTy = E->getType();
10255         } else {
10256           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10257                                 ->getCanonicalDecl();
10258           Pos = ParamPositions[PVD];
10259           ParmTy = PVD->getType();
10260         }
10261         ParamAttrs[Pos].Alignment =
10262             (*NI)
10263                 ? (*NI)->EvaluateKnownConstInt(C)
10264                 : llvm::APSInt::getUnsigned(
10265                       C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10266                           .getQuantity());
10267         ++NI;
10268       }
10269       // Mark linear parameters.
10270       auto SI = Attr->steps_begin();
10271       auto MI = Attr->modifiers_begin();
10272       for (const Expr *E : Attr->linears()) {
10273         E = E->IgnoreParenImpCasts();
10274         unsigned Pos;
10275         if (isa<CXXThisExpr>(E)) {
10276           Pos = ParamPositions[FD];
10277         } else {
10278           const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10279                                 ->getCanonicalDecl();
10280           Pos = ParamPositions[PVD];
10281         }
10282         ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10283         ParamAttr.Kind = Linear;
10284         if (*SI) {
10285           Expr::EvalResult Result;
10286           if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10287             if (const auto *DRE =
10288                     cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10289               if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10290                 ParamAttr.Kind = LinearWithVarStride;
10291                 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10292                     ParamPositions[StridePVD->getCanonicalDecl()]);
10293               }
10294             }
10295           } else {
10296             ParamAttr.StrideOrArg = Result.Val.getInt();
10297           }
10298         }
10299         ++SI;
10300         ++MI;
10301       }
10302       llvm::APSInt VLENVal;
10303       SourceLocation ExprLoc;
10304       const Expr *VLENExpr = Attr->getSimdlen();
10305       if (VLENExpr) {
10306         VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10307         ExprLoc = VLENExpr->getExprLoc();
10308       }
10309       OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10310       if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10311           CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10312         emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10313       } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10314         unsigned VLEN = VLENVal.getExtValue();
10315         StringRef MangledName = Fn->getName();
10316         if (CGM.getTarget().hasFeature("sve"))
10317           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10318                                          MangledName, 's', 128, Fn, ExprLoc);
10319         if (CGM.getTarget().hasFeature("neon"))
10320           emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10321                                          MangledName, 'n', 128, Fn, ExprLoc);
10322       }
10323     }
10324     FD = FD->getPreviousDecl();
10325   }
10326 }
10327
10328 namespace {
10329 /// Cleanup action for doacross support.
10330 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10331 public:
10332   static const int DoacrossFinArgs = 2;
10333
10334 private:
10335   llvm::FunctionCallee RTLFn;
10336   llvm::Value *Args[DoacrossFinArgs];
10337
10338 public:
10339   DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10340                     ArrayRef<llvm::Value *> CallArgs)
10341       : RTLFn(RTLFn) {
10342     assert(CallArgs.size() == DoacrossFinArgs);
10343     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10344   }
10345   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10346     if (!CGF.HaveInsertPoint())
10347       return;
10348     CGF.EmitRuntimeCall(RTLFn, Args);
10349   }
10350 };
10351 } // namespace
10352
10353 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10354                                        const OMPLoopDirective &D,
10355                                        ArrayRef<Expr *> NumIterations) {
10356   if (!CGF.HaveInsertPoint())
10357     return;
10358
10359   ASTContext &C = CGM.getContext();
10360   QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10361   RecordDecl *RD;
10362   if (KmpDimTy.isNull()) {
10363     // Build struct kmp_dim {  // loop bounds info casted to kmp_int64
10364     //  kmp_int64 lo; // lower
10365     //  kmp_int64 up; // upper
10366     //  kmp_int64 st; // stride
10367     // };
10368     RD = C.buildImplicitRecord("kmp_dim");
10369     RD->startDefinition();
10370     addFieldToRecordDecl(C, RD, Int64Ty);
10371     addFieldToRecordDecl(C, RD, Int64Ty);
10372     addFieldToRecordDecl(C, RD, Int64Ty);
10373     RD->completeDefinition();
10374     KmpDimTy = C.getRecordType(RD);
10375   } else {
10376     RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10377   }
10378   llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10379   QualType ArrayTy =
10380       C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10381
10382   Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10383   CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10384   enum { LowerFD = 0, UpperFD, StrideFD };
10385   // Fill dims with data.
10386   for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10387     LValue DimsLVal = CGF.MakeAddrLValue(
10388         CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10389     // dims.upper = num_iterations;
10390     LValue UpperLVal = CGF.EmitLValueForField(
10391         DimsLVal, *std::next(RD->field_begin(), UpperFD));
10392     llvm::Value *NumIterVal =
10393         CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10394                                  D.getNumIterations()->getType(), Int64Ty,
10395                                  D.getNumIterations()->getExprLoc());
10396     CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10397     // dims.stride = 1;
10398     LValue StrideLVal = CGF.EmitLValueForField(
10399         DimsLVal, *std::next(RD->field_begin(), StrideFD));
10400     CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10401                           StrideLVal);
10402   }
10403
10404   // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10405   // kmp_int32 num_dims, struct kmp_dim * dims);
10406   llvm::Value *Args[] = {
10407       emitUpdateLocation(CGF, D.getBeginLoc()),
10408       getThreadID(CGF, D.getBeginLoc()),
10409       llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10410       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10411           CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10412           CGM.VoidPtrTy)};
10413
10414   llvm::FunctionCallee RTLFn =
10415       createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10416   CGF.EmitRuntimeCall(RTLFn, Args);
10417   llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10418       emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10419   llvm::FunctionCallee FiniRTLFn =
10420       createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10421   CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10422                                              llvm::makeArrayRef(FiniArgs));
10423 }
10424
10425 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10426                                           const OMPDependClause *C) {
10427   QualType Int64Ty =
10428       CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10429   llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10430   QualType ArrayTy = CGM.getContext().getConstantArrayType(
10431       Int64Ty, Size, ArrayType::Normal, 0);
10432   Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10433   for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10434     const Expr *CounterVal = C->getLoopData(I);
10435     assert(CounterVal);
10436     llvm::Value *CntVal = CGF.EmitScalarConversion(
10437         CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10438         CounterVal->getExprLoc());
10439     CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10440                           /*Volatile=*/false, Int64Ty);
10441   }
10442   llvm::Value *Args[] = {
10443       emitUpdateLocation(CGF, C->getBeginLoc()),
10444       getThreadID(CGF, C->getBeginLoc()),
10445       CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10446   llvm::FunctionCallee RTLFn;
10447   if (C->getDependencyKind() == OMPC_DEPEND_source) {
10448     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10449   } else {
10450     assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10451     RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10452   }
10453   CGF.EmitRuntimeCall(RTLFn, Args);
10454 }
10455
10456 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10457                                llvm::FunctionCallee Callee,
10458                                ArrayRef<llvm::Value *> Args) const {
10459   assert(Loc.isValid() && "Outlined function call location must be valid.");
10460   auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10461
10462   if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10463     if (Fn->doesNotThrow()) {
10464       CGF.EmitNounwindRuntimeCall(Fn, Args);
10465       return;
10466     }
10467   }
10468   CGF.EmitRuntimeCall(Callee, Args);
10469 }
10470
10471 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10472     CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10473     ArrayRef<llvm::Value *> Args) const {
10474   emitCall(CGF, Loc, OutlinedFn, Args);
10475 }
10476
10477 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10478   if (const auto *FD = dyn_cast<FunctionDecl>(D))
10479     if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10480       HasEmittedDeclareTargetRegion = true;
10481 }
10482
10483 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10484                                              const VarDecl *NativeParam,
10485                                              const VarDecl *TargetParam) const {
10486   return CGF.GetAddrOfLocalVar(NativeParam);
10487 }
10488
10489 namespace {
10490 /// Cleanup action for allocate support.
10491 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10492 public:
10493   static const int CleanupArgs = 3;
10494
10495 private:
10496   llvm::FunctionCallee RTLFn;
10497   llvm::Value *Args[CleanupArgs];
10498
10499 public:
10500   OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10501                        ArrayRef<llvm::Value *> CallArgs)
10502       : RTLFn(RTLFn) {
10503     assert(CallArgs.size() == CleanupArgs &&
10504            "Size of arguments does not match.");
10505     std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10506   }
10507   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10508     if (!CGF.HaveInsertPoint())
10509       return;
10510     CGF.EmitRuntimeCall(RTLFn, Args);
10511   }
10512 };
10513 } // namespace
10514
10515 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10516                                                    const VarDecl *VD) {
10517   if (!VD)
10518     return Address::invalid();
10519   const VarDecl *CVD = VD->getCanonicalDecl();
10520   if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10521     return Address::invalid();
10522   const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10523   // Use the default allocation.
10524   if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10525       !AA->getAllocator())
10526     return Address::invalid();
10527   llvm::Value *Size;
10528   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10529   if (CVD->getType()->isVariablyModifiedType()) {
10530     Size = CGF.getTypeSize(CVD->getType());
10531     // Align the size: ((size + align - 1) / align) * align
10532     Size = CGF.Builder.CreateNUWAdd(
10533         Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10534     Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10535     Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10536   } else {
10537     CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10538     Size = CGM.getSize(Sz.alignTo(Align));
10539   }
10540   llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10541   assert(AA->getAllocator() &&
10542          "Expected allocator expression for non-default allocator.");
10543   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10544   // According to the standard, the original allocator type is a enum (integer).
10545   // Convert to pointer type, if required.
10546   if (Allocator->getType()->isIntegerTy())
10547     Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10548   else if (Allocator->getType()->isPointerTy())
10549     Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10550                                                                 CGM.VoidPtrTy);
10551   llvm::Value *Args[] = {ThreadID, Size, Allocator};
10552
10553   llvm::Value *Addr =
10554       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10555                           CVD->getName() + ".void.addr");
10556   llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10557                                                               Allocator};
10558   llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10559
10560   CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10561                                                 llvm::makeArrayRef(FiniArgs));
10562   Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10563       Addr,
10564       CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10565       CVD->getName() + ".addr");
10566   return Address(Addr, Align);
10567 }
10568
10569 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10570     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10571     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10572   llvm_unreachable("Not supported in SIMD-only mode");
10573 }
10574
10575 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10576     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10577     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10578   llvm_unreachable("Not supported in SIMD-only mode");
10579 }
10580
10581 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10582     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10583     const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10584     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10585     bool Tied, unsigned &NumberOfParts) {
10586   llvm_unreachable("Not supported in SIMD-only mode");
10587 }
10588
10589 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10590                                            SourceLocation Loc,
10591                                            llvm::Function *OutlinedFn,
10592                                            ArrayRef<llvm::Value *> CapturedVars,
10593                                            const Expr *IfCond) {
10594   llvm_unreachable("Not supported in SIMD-only mode");
10595 }
10596
10597 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10598     CodeGenFunction &CGF, StringRef CriticalName,
10599     const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10600     const Expr *Hint) {
10601   llvm_unreachable("Not supported in SIMD-only mode");
10602 }
10603
10604 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10605                                            const RegionCodeGenTy &MasterOpGen,
10606                                            SourceLocation Loc) {
10607   llvm_unreachable("Not supported in SIMD-only mode");
10608 }
10609
10610 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10611                                             SourceLocation Loc) {
10612   llvm_unreachable("Not supported in SIMD-only mode");
10613 }
10614
10615 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10616     CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10617     SourceLocation Loc) {
10618   llvm_unreachable("Not supported in SIMD-only mode");
10619 }
10620
10621 void CGOpenMPSIMDRuntime::emitSingleRegion(
10622     CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10623     SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10624     ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10625     ArrayRef<const Expr *> AssignmentOps) {
10626   llvm_unreachable("Not supported in SIMD-only mode");
10627 }
10628
10629 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10630                                             const RegionCodeGenTy &OrderedOpGen,
10631                                             SourceLocation Loc,
10632                                             bool IsThreads) {
10633   llvm_unreachable("Not supported in SIMD-only mode");
10634 }
10635
10636 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10637                                           SourceLocation Loc,
10638                                           OpenMPDirectiveKind Kind,
10639                                           bool EmitChecks,
10640                                           bool ForceSimpleCall) {
10641   llvm_unreachable("Not supported in SIMD-only mode");
10642 }
10643
10644 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10645     CodeGenFunction &CGF, SourceLocation Loc,
10646     const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10647     bool Ordered, const DispatchRTInput &DispatchValues) {
10648   llvm_unreachable("Not supported in SIMD-only mode");
10649 }
10650
10651 void CGOpenMPSIMDRuntime::emitForStaticInit(
10652     CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10653     const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10654   llvm_unreachable("Not supported in SIMD-only mode");
10655 }
10656
10657 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10658     CodeGenFunction &CGF, SourceLocation Loc,
10659     OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10660   llvm_unreachable("Not supported in SIMD-only mode");
10661 }
10662
10663 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10664                                                      SourceLocation Loc,
10665                                                      unsigned IVSize,
10666                                                      bool IVSigned) {
10667   llvm_unreachable("Not supported in SIMD-only mode");
10668 }
10669
10670 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10671                                               SourceLocation Loc,
10672                                               OpenMPDirectiveKind DKind) {
10673   llvm_unreachable("Not supported in SIMD-only mode");
10674 }
10675
10676 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10677                                               SourceLocation Loc,
10678                                               unsigned IVSize, bool IVSigned,
10679                                               Address IL, Address LB,
10680                                               Address UB, Address ST) {
10681   llvm_unreachable("Not supported in SIMD-only mode");
10682 }
10683
10684 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10685                                                llvm::Value *NumThreads,
10686                                                SourceLocation Loc) {
10687   llvm_unreachable("Not supported in SIMD-only mode");
10688 }
10689
10690 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10691                                              OpenMPProcBindClauseKind ProcBind,
10692                                              SourceLocation Loc) {
10693   llvm_unreachable("Not supported in SIMD-only mode");
10694 }
10695
10696 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10697                                                     const VarDecl *VD,
10698                                                     Address VDAddr,
10699                                                     SourceLocation Loc) {
10700   llvm_unreachable("Not supported in SIMD-only mode");
10701 }
10702
10703 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10704     const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10705     CodeGenFunction *CGF) {
10706   llvm_unreachable("Not supported in SIMD-only mode");
10707 }
10708
10709 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10710     CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10711   llvm_unreachable("Not supported in SIMD-only mode");
10712 }
10713
10714 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10715                                     ArrayRef<const Expr *> Vars,
10716                                     SourceLocation Loc) {
10717   llvm_unreachable("Not supported in SIMD-only mode");
10718 }
10719
10720 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10721                                        const OMPExecutableDirective &D,
10722                                        llvm::Function *TaskFunction,
10723                                        QualType SharedsTy, Address Shareds,
10724                                        const Expr *IfCond,
10725                                        const OMPTaskDataTy &Data) {
10726   llvm_unreachable("Not supported in SIMD-only mode");
10727 }
10728
10729 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10730     CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10731     llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10732     const Expr *IfCond, const OMPTaskDataTy &Data) {
10733   llvm_unreachable("Not supported in SIMD-only mode");
10734 }
10735
10736 void CGOpenMPSIMDRuntime::emitReduction(
10737     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10738     ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10739     ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10740   assert(Options.SimpleReduction && "Only simple reduction is expected.");
10741   CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10742                                  ReductionOps, Options);
10743 }
10744
10745 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10746     CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10747     ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10748   llvm_unreachable("Not supported in SIMD-only mode");
10749 }
10750
10751 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10752                                                   SourceLocation Loc,
10753                                                   ReductionCodeGen &RCG,
10754                                                   unsigned N) {
10755   llvm_unreachable("Not supported in SIMD-only mode");
10756 }
10757
10758 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10759                                                   SourceLocation Loc,
10760                                                   llvm::Value *ReductionsPtr,
10761                                                   LValue SharedLVal) {
10762   llvm_unreachable("Not supported in SIMD-only mode");
10763 }
10764
10765 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10766                                            SourceLocation Loc) {
10767   llvm_unreachable("Not supported in SIMD-only mode");
10768 }
10769
10770 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10771     CodeGenFunction &CGF, SourceLocation Loc,
10772     OpenMPDirectiveKind CancelRegion) {
10773   llvm_unreachable("Not supported in SIMD-only mode");
10774 }
10775
10776 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10777                                          SourceLocation Loc, const Expr *IfCond,
10778                                          OpenMPDirectiveKind CancelRegion) {
10779   llvm_unreachable("Not supported in SIMD-only mode");
10780 }
10781
10782 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10783     const OMPExecutableDirective &D, StringRef ParentName,
10784     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10785     bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10786   llvm_unreachable("Not supported in SIMD-only mode");
10787 }
10788
10789 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10790                                          const OMPExecutableDirective &D,
10791                                          llvm::Function *OutlinedFn,
10792                                          llvm::Value *OutlinedFnID,
10793                                          const Expr *IfCond,
10794                                          const Expr *Device) {
10795   llvm_unreachable("Not supported in SIMD-only mode");
10796 }
10797
10798 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10799   llvm_unreachable("Not supported in SIMD-only mode");
10800 }
10801
10802 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10803   llvm_unreachable("Not supported in SIMD-only mode");
10804 }
10805
10806 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10807   return false;
10808 }
10809
10810 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10811   return nullptr;
10812 }
10813
10814 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10815                                         const OMPExecutableDirective &D,
10816                                         SourceLocation Loc,
10817                                         llvm::Function *OutlinedFn,
10818                                         ArrayRef<llvm::Value *> CapturedVars) {
10819   llvm_unreachable("Not supported in SIMD-only mode");
10820 }
10821
10822 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10823                                              const Expr *NumTeams,
10824                                              const Expr *ThreadLimit,
10825                                              SourceLocation Loc) {
10826   llvm_unreachable("Not supported in SIMD-only mode");
10827 }
10828
10829 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10830     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10831     const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10832   llvm_unreachable("Not supported in SIMD-only mode");
10833 }
10834
10835 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10836     CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10837     const Expr *Device) {
10838   llvm_unreachable("Not supported in SIMD-only mode");
10839 }
10840
10841 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10842                                            const OMPLoopDirective &D,
10843                                            ArrayRef<Expr *> NumIterations) {
10844   llvm_unreachable("Not supported in SIMD-only mode");
10845 }
10846
10847 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10848                                               const OMPDependClause *C) {
10849   llvm_unreachable("Not supported in SIMD-only mode");
10850 }
10851
10852 const VarDecl *
10853 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10854                                         const VarDecl *NativeParam) const {
10855   llvm_unreachable("Not supported in SIMD-only mode");
10856 }
10857
10858 Address
10859 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10860                                          const VarDecl *NativeParam,
10861                                          const VarDecl *TargetParam) const {
10862   llvm_unreachable("Not supported in SIMD-only mode");
10863 }