1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/OpenMPClause.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "clang/AST/StmtVisitor.h"
23 #include "clang/Basic/BitmaskEnum.h"
24 #include "clang/CodeGen/ConstantInitBuilder.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/SetOperations.h"
27 #include "llvm/Bitcode/BitcodeReader.h"
28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
29 #include "llvm/IR/DerivedTypes.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Format.h"
33 #include "llvm/Support/raw_ostream.h"
36 using namespace clang;
37 using namespace CodeGen;
38 using namespace llvm::omp;
41 /// Base class for handling code generation inside OpenMP regions.
42 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
44 /// Kinds of OpenMP regions used in codegen.
45 enum CGOpenMPRegionKind {
46 /// Region with outlined function for standalone 'parallel'
48 ParallelOutlinedRegion,
49 /// Region with outlined function for standalone 'task' directive.
51 /// Region for constructs that do not require function outlining,
52 /// like 'for', 'sections', 'atomic' etc. directives.
54 /// Region with outlined function for standalone 'target' directive.
58 CGOpenMPRegionInfo(const CapturedStmt &CS,
59 const CGOpenMPRegionKind RegionKind,
60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
62 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
63 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
65 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
68 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
69 Kind(Kind), HasCancel(HasCancel) {}
71 /// Get a variable or parameter for storing global thread id
72 /// inside OpenMP construct.
73 virtual const VarDecl *getThreadIDVariable() const = 0;
75 /// Emit the captured statement body.
76 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
78 /// Get an LValue for the current ThreadID variable.
79 /// \return LValue for thread id variable. This LValue always has type int32*.
80 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
82 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
84 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
86 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
88 bool hasCancel() const { return HasCancel; }
90 static bool classof(const CGCapturedStmtInfo *Info) {
91 return Info->getKind() == CR_OpenMP;
94 ~CGOpenMPRegionInfo() override = default;
97 CGOpenMPRegionKind RegionKind;
98 RegionCodeGenTy CodeGen;
99 OpenMPDirectiveKind Kind;
103 /// API for captured statement code generation in OpenMP constructs.
104 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
106 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
107 const RegionCodeGenTy &CodeGen,
108 OpenMPDirectiveKind Kind, bool HasCancel,
109 StringRef HelperName)
110 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
112 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
113 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
116 /// Get a variable or parameter for storing global thread id
117 /// inside OpenMP construct.
118 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
120 /// Get the name of the capture helper.
121 StringRef getHelperName() const override { return HelperName; }
123 static bool classof(const CGCapturedStmtInfo *Info) {
124 return CGOpenMPRegionInfo::classof(Info) &&
125 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
126 ParallelOutlinedRegion;
130 /// A variable or parameter storing global thread id for OpenMP
132 const VarDecl *ThreadIDVar;
133 StringRef HelperName;
136 /// API for captured statement code generation in OpenMP constructs.
137 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
139 class UntiedTaskActionTy final : public PrePostActionTy {
141 const VarDecl *PartIDVar;
142 const RegionCodeGenTy UntiedCodeGen;
143 llvm::SwitchInst *UntiedSwitch = nullptr;
146 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
147 const RegionCodeGenTy &UntiedCodeGen)
148 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
149 void Enter(CodeGenFunction &CGF) override {
151 // Emit task switching point.
152 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
153 CGF.GetAddrOfLocalVar(PartIDVar),
154 PartIDVar->getType()->castAs<PointerType>());
156 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
157 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
158 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
159 CGF.EmitBlock(DoneBB);
160 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
161 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
162 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
163 CGF.Builder.GetInsertBlock());
164 emitUntiedSwitch(CGF);
167 void emitUntiedSwitch(CodeGenFunction &CGF) const {
169 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
170 CGF.GetAddrOfLocalVar(PartIDVar),
171 PartIDVar->getType()->castAs<PointerType>());
172 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
175 CodeGenFunction::JumpDest CurPoint =
176 CGF.getJumpDestInCurrentScope(".untied.next.");
177 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
178 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
179 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
180 CGF.Builder.GetInsertBlock());
181 CGF.EmitBranchThroughCleanup(CurPoint);
182 CGF.EmitBlock(CurPoint.getBlock());
185 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
187 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
188 const VarDecl *ThreadIDVar,
189 const RegionCodeGenTy &CodeGen,
190 OpenMPDirectiveKind Kind, bool HasCancel,
191 const UntiedTaskActionTy &Action)
192 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
193 ThreadIDVar(ThreadIDVar), Action(Action) {
194 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
197 /// Get a variable or parameter for storing global thread id
198 /// inside OpenMP construct.
199 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
201 /// Get an LValue for the current ThreadID variable.
202 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
204 /// Get the name of the capture helper.
205 StringRef getHelperName() const override { return ".omp_outlined."; }
207 void emitUntiedSwitch(CodeGenFunction &CGF) override {
208 Action.emitUntiedSwitch(CGF);
211 static bool classof(const CGCapturedStmtInfo *Info) {
212 return CGOpenMPRegionInfo::classof(Info) &&
213 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
218 /// A variable or parameter storing global thread id for OpenMP
220 const VarDecl *ThreadIDVar;
221 /// Action for emitting code for untied tasks.
222 const UntiedTaskActionTy &Action;
225 /// API for inlined captured statement code generation in OpenMP
227 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
229 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
230 const RegionCodeGenTy &CodeGen,
231 OpenMPDirectiveKind Kind, bool HasCancel)
232 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
234 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
236 // Retrieve the value of the context parameter.
237 llvm::Value *getContextValue() const override {
239 return OuterRegionInfo->getContextValue();
240 llvm_unreachable("No context value for inlined OpenMP region");
243 void setContextValue(llvm::Value *V) override {
244 if (OuterRegionInfo) {
245 OuterRegionInfo->setContextValue(V);
248 llvm_unreachable("No context value for inlined OpenMP region");
251 /// Lookup the captured field decl for a variable.
252 const FieldDecl *lookup(const VarDecl *VD) const override {
254 return OuterRegionInfo->lookup(VD);
255 // If there is no outer outlined region,no need to lookup in a list of
256 // captured variables, we can use the original one.
260 FieldDecl *getThisFieldDecl() const override {
262 return OuterRegionInfo->getThisFieldDecl();
266 /// Get a variable or parameter for storing global thread id
267 /// inside OpenMP construct.
268 const VarDecl *getThreadIDVariable() const override {
270 return OuterRegionInfo->getThreadIDVariable();
274 /// Get an LValue for the current ThreadID variable.
275 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
277 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
278 llvm_unreachable("No LValue for inlined OpenMP construct");
281 /// Get the name of the capture helper.
282 StringRef getHelperName() const override {
283 if (auto *OuterRegionInfo = getOldCSI())
284 return OuterRegionInfo->getHelperName();
285 llvm_unreachable("No helper name for inlined OpenMP construct");
288 void emitUntiedSwitch(CodeGenFunction &CGF) override {
290 OuterRegionInfo->emitUntiedSwitch(CGF);
293 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
295 static bool classof(const CGCapturedStmtInfo *Info) {
296 return CGOpenMPRegionInfo::classof(Info) &&
297 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
300 ~CGOpenMPInlinedRegionInfo() override = default;
303 /// CodeGen info about outer OpenMP region.
304 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
305 CGOpenMPRegionInfo *OuterRegionInfo;
308 /// API for captured statement code generation in OpenMP target
309 /// constructs. For this captures, implicit parameters are used instead of the
310 /// captured fields. The name of the target region has to be unique in a given
311 /// application so it is provided by the client, because only the client has
312 /// the information to generate that.
313 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
315 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
316 const RegionCodeGenTy &CodeGen, StringRef HelperName)
317 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
318 /*HasCancel=*/false),
319 HelperName(HelperName) {}
321 /// This is unused for target regions because each starts executing
322 /// with a single thread.
323 const VarDecl *getThreadIDVariable() const override { return nullptr; }
325 /// Get the name of the capture helper.
326 StringRef getHelperName() const override { return HelperName; }
328 static bool classof(const CGCapturedStmtInfo *Info) {
329 return CGOpenMPRegionInfo::classof(Info) &&
330 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
334 StringRef HelperName;
337 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
338 llvm_unreachable("No codegen for expressions");
340 /// API for generation of expressions captured in a innermost OpenMP
342 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
344 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
345 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
347 /*HasCancel=*/false),
349 // Make sure the globals captured in the provided statement are local by
350 // using the privatization logic. We assume the same variable is not
351 // captured more than once.
352 for (const auto &C : CS.captures()) {
353 if (!C.capturesVariable() && !C.capturesVariableByCopy())
356 const VarDecl *VD = C.getCapturedVar();
357 if (VD->isLocalVarDeclOrParm())
360 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
361 /*RefersToEnclosingVariableOrCapture=*/false,
362 VD->getType().getNonReferenceType(), VK_LValue,
364 PrivScope.addPrivate(
365 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
367 (void)PrivScope.Privatize();
370 /// Lookup the captured field decl for a variable.
371 const FieldDecl *lookup(const VarDecl *VD) const override {
372 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
377 /// Emit the captured statement body.
378 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
379 llvm_unreachable("No body for expressions");
382 /// Get a variable or parameter for storing global thread id
383 /// inside OpenMP construct.
384 const VarDecl *getThreadIDVariable() const override {
385 llvm_unreachable("No thread id for expressions");
388 /// Get the name of the capture helper.
389 StringRef getHelperName() const override {
390 llvm_unreachable("No helper name for expressions");
393 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
396 /// Private scope to capture global variables.
397 CodeGenFunction::OMPPrivateScope PrivScope;
400 /// RAII for emitting code of OpenMP constructs.
401 class InlinedOpenMPRegionRAII {
402 CodeGenFunction &CGF;
403 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
404 FieldDecl *LambdaThisCaptureField = nullptr;
405 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
408 /// Constructs region for combined constructs.
409 /// \param CodeGen Code generation sequence for combined directives. Includes
410 /// a list of functions used for code generation of implicitly inlined
412 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
413 OpenMPDirectiveKind Kind, bool HasCancel)
415 // Start emission for the construct.
416 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
417 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
418 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
419 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
420 CGF.LambdaThisCaptureField = nullptr;
421 BlockInfo = CGF.BlockInfo;
422 CGF.BlockInfo = nullptr;
425 ~InlinedOpenMPRegionRAII() {
426 // Restore original CapturedStmtInfo only if we're done with code emission.
428 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
429 delete CGF.CapturedStmtInfo;
430 CGF.CapturedStmtInfo = OldCSI;
431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
433 CGF.BlockInfo = BlockInfo;
437 /// Values for bit flags used in the ident_t to describe the fields.
438 /// All enumeric elements are named and described in accordance with the code
439 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
440 enum OpenMPLocationFlags : unsigned {
441 /// Use trampoline for internal microtask.
442 OMP_IDENT_IMD = 0x01,
443 /// Use c-style ident structure.
444 OMP_IDENT_KMPC = 0x02,
445 /// Atomic reduction option for kmpc_reduce.
446 OMP_ATOMIC_REDUCE = 0x10,
447 /// Explicit 'barrier' directive.
448 OMP_IDENT_BARRIER_EXPL = 0x20,
449 /// Implicit barrier in code.
450 OMP_IDENT_BARRIER_IMPL = 0x40,
451 /// Implicit barrier in 'for' directive.
452 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
453 /// Implicit barrier in 'sections' directive.
454 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
455 /// Implicit barrier in 'single' directive.
456 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
457 /// Call of __kmp_for_static_init for static loop.
458 OMP_IDENT_WORK_LOOP = 0x200,
459 /// Call of __kmp_for_static_init for sections.
460 OMP_IDENT_WORK_SECTIONS = 0x400,
461 /// Call of __kmp_for_static_init for distribute.
462 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
463 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
467 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
468 /// Values for bit flags for marking which requires clauses have been used.
469 enum OpenMPOffloadingRequiresDirFlags : int64_t {
471 OMP_REQ_UNDEFINED = 0x000,
472 /// no requires clause present.
473 OMP_REQ_NONE = 0x001,
474 /// reverse_offload clause.
475 OMP_REQ_REVERSE_OFFLOAD = 0x002,
476 /// unified_address clause.
477 OMP_REQ_UNIFIED_ADDRESS = 0x004,
478 /// unified_shared_memory clause.
479 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
480 /// dynamic_allocators clause.
481 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
485 enum OpenMPOffloadingReservedDeviceIDs {
486 /// Device ID if the device was not defined, runtime should get it
487 /// from environment variables in the spec.
488 OMP_DEVICEID_UNDEF = -1,
490 } // anonymous namespace
492 /// Describes ident structure that describes a source location.
493 /// All descriptions are taken from
494 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
495 /// Original structure:
496 /// typedef struct ident {
497 /// kmp_int32 reserved_1; /**< might be used in Fortran;
499 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
500 /// KMP_IDENT_KMPC identifies this union
502 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
505 /// /* but currently used for storing
506 /// region-specific ITT */
507 /// /* contextual information. */
508 ///#endif /* USE_ITT_BUILD */
509 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
511 /// char const *psource; /**< String describing the source location.
512 /// The string is composed of semi-colon separated
513 // fields which describe the source file,
514 /// the function and a pair of line numbers that
515 /// delimit the construct.
518 enum IdentFieldIndex {
519 /// might be used in Fortran
520 IdentField_Reserved_1,
521 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
523 /// Not really used in Fortran any more
524 IdentField_Reserved_2,
525 /// Source[4] in Fortran, do not use for C++
526 IdentField_Reserved_3,
527 /// String describing the source location. The string is composed of
528 /// semi-colon separated fields which describe the source file, the function
529 /// and a pair of line numbers that delimit the construct.
533 /// Schedule types for 'omp for' loops (these enumerators are taken from
534 /// the enum sched_type in kmp.h).
535 enum OpenMPSchedType {
536 /// Lower bound for default (unordered) versions.
538 OMP_sch_static_chunked = 33,
540 OMP_sch_dynamic_chunked = 35,
541 OMP_sch_guided_chunked = 36,
542 OMP_sch_runtime = 37,
544 /// static with chunk adjustment (e.g., simd)
545 OMP_sch_static_balanced_chunked = 45,
546 /// Lower bound for 'ordered' versions.
548 OMP_ord_static_chunked = 65,
550 OMP_ord_dynamic_chunked = 67,
551 OMP_ord_guided_chunked = 68,
552 OMP_ord_runtime = 69,
554 OMP_sch_default = OMP_sch_static,
555 /// dist_schedule types
556 OMP_dist_sch_static_chunked = 91,
557 OMP_dist_sch_static = 92,
558 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
559 /// Set if the monotonic schedule modifier was present.
560 OMP_sch_modifier_monotonic = (1 << 29),
561 /// Set if the nonmonotonic schedule modifier was present.
562 OMP_sch_modifier_nonmonotonic = (1 << 30),
565 enum OpenMPRTLFunction {
566 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
567 /// kmpc_micro microtask, ...);
568 OMPRTL__kmpc_fork_call,
569 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
570 /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
571 OMPRTL__kmpc_threadprivate_cached,
572 /// Call to void __kmpc_threadprivate_register( ident_t *,
573 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
574 OMPRTL__kmpc_threadprivate_register,
575 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
576 OMPRTL__kmpc_global_thread_num,
577 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
578 // kmp_critical_name *crit);
579 OMPRTL__kmpc_critical,
580 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
581 // global_tid, kmp_critical_name *crit, uintptr_t hint);
582 OMPRTL__kmpc_critical_with_hint,
583 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
584 // kmp_critical_name *crit);
585 OMPRTL__kmpc_end_critical,
586 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
588 OMPRTL__kmpc_cancel_barrier,
589 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
590 OMPRTL__kmpc_barrier,
591 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
592 OMPRTL__kmpc_for_static_fini,
593 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
595 OMPRTL__kmpc_serialized_parallel,
596 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
598 OMPRTL__kmpc_end_serialized_parallel,
599 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
600 // kmp_int32 num_threads);
601 OMPRTL__kmpc_push_num_threads,
602 // Call to void __kmpc_flush(ident_t *loc);
604 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
606 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
607 OMPRTL__kmpc_end_master,
608 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
610 OMPRTL__kmpc_omp_taskyield,
611 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
613 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
614 OMPRTL__kmpc_end_single,
615 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
616 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
617 // kmp_routine_entry_t *task_entry);
618 OMPRTL__kmpc_omp_task_alloc,
619 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
620 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
621 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
622 // kmp_int64 device_id);
623 OMPRTL__kmpc_omp_target_task_alloc,
624 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
626 OMPRTL__kmpc_omp_task,
627 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
628 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
630 OMPRTL__kmpc_copyprivate,
631 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
632 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
633 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
635 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
636 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
637 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
639 OMPRTL__kmpc_reduce_nowait,
640 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
641 // kmp_critical_name *lck);
642 OMPRTL__kmpc_end_reduce,
643 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
644 // kmp_critical_name *lck);
645 OMPRTL__kmpc_end_reduce_nowait,
646 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
647 // kmp_task_t * new_task);
648 OMPRTL__kmpc_omp_task_begin_if0,
649 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
650 // kmp_task_t * new_task);
651 OMPRTL__kmpc_omp_task_complete_if0,
652 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
653 OMPRTL__kmpc_ordered,
654 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
655 OMPRTL__kmpc_end_ordered,
656 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
658 OMPRTL__kmpc_omp_taskwait,
659 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
660 OMPRTL__kmpc_taskgroup,
661 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
662 OMPRTL__kmpc_end_taskgroup,
663 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
665 OMPRTL__kmpc_push_proc_bind,
666 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
667 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
668 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
669 OMPRTL__kmpc_omp_task_with_deps,
670 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
671 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
672 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
673 OMPRTL__kmpc_omp_wait_deps,
674 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
675 // global_tid, kmp_int32 cncl_kind);
676 OMPRTL__kmpc_cancellationpoint,
677 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
678 // kmp_int32 cncl_kind);
680 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
681 // kmp_int32 num_teams, kmp_int32 thread_limit);
682 OMPRTL__kmpc_push_num_teams,
683 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
685 OMPRTL__kmpc_fork_teams,
686 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
687 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
688 // sched, kmp_uint64 grainsize, void *task_dup);
689 OMPRTL__kmpc_taskloop,
690 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
691 // num_dims, struct kmp_dim *dims);
692 OMPRTL__kmpc_doacross_init,
693 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
694 OMPRTL__kmpc_doacross_fini,
695 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
697 OMPRTL__kmpc_doacross_post,
698 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
700 OMPRTL__kmpc_doacross_wait,
701 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
703 OMPRTL__kmpc_task_reduction_init,
704 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
706 OMPRTL__kmpc_task_reduction_get_th_data,
707 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
709 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
713 // Offloading related calls
715 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
717 OMPRTL__kmpc_push_target_tripcount,
718 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
719 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
723 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
725 OMPRTL__tgt_target_nowait,
726 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
727 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
728 // *arg_types, int32_t num_teams, int32_t thread_limit);
729 OMPRTL__tgt_target_teams,
730 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
731 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
732 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
733 OMPRTL__tgt_target_teams_nowait,
734 // Call to void __tgt_register_requires(int64_t flags);
735 OMPRTL__tgt_register_requires,
736 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
737 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
738 OMPRTL__tgt_target_data_begin,
739 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
740 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
742 OMPRTL__tgt_target_data_begin_nowait,
743 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
744 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
745 OMPRTL__tgt_target_data_end,
746 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
747 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
749 OMPRTL__tgt_target_data_end_nowait,
750 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
751 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
752 OMPRTL__tgt_target_data_update,
753 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
754 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
756 OMPRTL__tgt_target_data_update_nowait,
757 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
758 OMPRTL__tgt_mapper_num_components,
759 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void
760 // *base, void *begin, int64_t size, int64_t type);
761 OMPRTL__tgt_push_mapper_component,
764 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
766 class CleanupTy final : public EHScopeStack::Cleanup {
767 PrePostActionTy *Action;
770 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
771 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
772 if (!CGF.HaveInsertPoint())
778 } // anonymous namespace
780 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
781 CodeGenFunction::RunCleanupsScope Scope(CGF);
783 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
784 Callback(CodeGen, CGF, *PrePostAction);
786 PrePostActionTy Action;
787 Callback(CodeGen, CGF, Action);
791 /// Check if the combiner is a call to UDR combiner and if it is so return the
792 /// UDR decl used for reduction.
793 static const OMPDeclareReductionDecl *
794 getReductionInit(const Expr *ReductionOp) {
795 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
796 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
797 if (const auto *DRE =
798 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
799 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
804 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
805 const OMPDeclareReductionDecl *DRD,
807 Address Private, Address Original,
809 if (DRD->getInitializer()) {
810 std::pair<llvm::Function *, llvm::Function *> Reduction =
811 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
812 const auto *CE = cast<CallExpr>(InitOp);
813 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
814 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
815 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
817 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
819 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
820 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
821 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
822 [=]() { return Private; });
823 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
824 [=]() { return Original; });
825 (void)PrivateScope.Privatize();
826 RValue Func = RValue::get(Reduction.second);
827 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
828 CGF.EmitIgnoredExpr(InitOp);
830 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
831 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
832 auto *GV = new llvm::GlobalVariable(
833 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
834 llvm::GlobalValue::PrivateLinkage, Init, Name);
835 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
837 switch (CGF.getEvaluationKind(Ty)) {
839 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
843 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
846 InitRVal = RValue::getAggregate(LV.getAddress(CGF));
849 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
850 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
851 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
852 /*IsInitializer=*/false);
856 /// Emit initialization of arrays of complex types.
857 /// \param DestAddr Address of the array.
858 /// \param Type Type of array.
859 /// \param Init Initial expression of array.
860 /// \param SrcAddr Address of the original array.
861 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
862 QualType Type, bool EmitDeclareReductionInit,
864 const OMPDeclareReductionDecl *DRD,
865 Address SrcAddr = Address::invalid()) {
866 // Perform element-by-element initialization.
869 // Drill down to the base element type on both arrays.
870 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
871 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
873 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
876 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
878 llvm::Value *SrcBegin = nullptr;
880 SrcBegin = SrcAddr.getPointer();
881 llvm::Value *DestBegin = DestAddr.getPointer();
882 // Cast from pointer to array type to pointer to single element.
883 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
884 // The basic structure here is a while-do loop.
885 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
886 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
887 llvm::Value *IsEmpty =
888 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
889 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
891 // Enter the loop body, making that address the current address.
892 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
893 CGF.EmitBlock(BodyBB);
895 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
897 llvm::PHINode *SrcElementPHI = nullptr;
898 Address SrcElementCurrent = Address::invalid();
900 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
901 "omp.arraycpy.srcElementPast");
902 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
904 Address(SrcElementPHI,
905 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
907 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
908 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
909 DestElementPHI->addIncoming(DestBegin, EntryBB);
910 Address DestElementCurrent =
911 Address(DestElementPHI,
912 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
916 CodeGenFunction::RunCleanupsScope InitScope(CGF);
917 if (EmitDeclareReductionInit) {
918 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
919 SrcElementCurrent, ElementTy);
921 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
922 /*IsInitializer=*/false);
926 // Shift the address forward by one element.
927 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
928 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
929 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
932 // Shift the address forward by one element.
933 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
934 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
935 // Check whether we've reached the end.
937 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
938 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
939 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
942 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
945 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
946 return CGF.EmitOMPSharedLValue(E);
949 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
951 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
952 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
956 void ReductionCodeGen::emitAggregateInitialization(
957 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
958 const OMPDeclareReductionDecl *DRD) {
959 // Emit VarDecl with copy init for arrays.
960 // Get the address of the original variable captured in current
962 const auto *PrivateVD =
963 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
964 bool EmitDeclareReductionInit =
965 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
966 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
967 EmitDeclareReductionInit,
968 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
969 : PrivateVD->getInit(),
970 DRD, SharedLVal.getAddress(CGF));
973 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
974 ArrayRef<const Expr *> Privates,
975 ArrayRef<const Expr *> ReductionOps) {
976 ClausesData.reserve(Shareds.size());
977 SharedAddresses.reserve(Shareds.size());
978 Sizes.reserve(Shareds.size());
979 BaseDecls.reserve(Shareds.size());
980 auto IPriv = Privates.begin();
981 auto IRed = ReductionOps.begin();
982 for (const Expr *Ref : Shareds) {
983 ClausesData.emplace_back(Ref, *IPriv, *IRed);
984 std::advance(IPriv, 1);
985 std::advance(IRed, 1);
989 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
990 assert(SharedAddresses.size() == N &&
991 "Number of generated lvalues must be exactly N.");
992 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
993 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
994 SharedAddresses.emplace_back(First, Second);
997 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
998 const auto *PrivateVD =
999 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1000 QualType PrivateType = PrivateVD->getType();
1001 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
1002 if (!PrivateType->isVariablyModifiedType()) {
1005 SharedAddresses[N].first.getType().getNonReferenceType()),
1010 llvm::Value *SizeInChars;
1011 auto *ElemType = cast<llvm::PointerType>(
1012 SharedAddresses[N].first.getPointer(CGF)->getType())
1014 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1015 if (AsArraySection) {
1016 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
1017 SharedAddresses[N].first.getPointer(CGF));
1018 Size = CGF.Builder.CreateNUWAdd(
1019 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1020 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1022 SizeInChars = CGF.getTypeSize(
1023 SharedAddresses[N].first.getType().getNonReferenceType());
1024 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1026 Sizes.emplace_back(SizeInChars, Size);
1027 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1029 cast<OpaqueValueExpr>(
1030 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1032 CGF.EmitVariablyModifiedType(PrivateType);
1035 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1036 llvm::Value *Size) {
1037 const auto *PrivateVD =
1038 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1039 QualType PrivateType = PrivateVD->getType();
1040 if (!PrivateType->isVariablyModifiedType()) {
1041 assert(!Size && !Sizes[N].second &&
1042 "Size should be nullptr for non-variably modified reduction "
1046 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1048 cast<OpaqueValueExpr>(
1049 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1051 CGF.EmitVariablyModifiedType(PrivateType);
1054 void ReductionCodeGen::emitInitialization(
1055 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1056 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1057 assert(SharedAddresses.size() > N && "No variable was generated");
1058 const auto *PrivateVD =
1059 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1060 const OMPDeclareReductionDecl *DRD =
1061 getReductionInit(ClausesData[N].ReductionOp);
1062 QualType PrivateType = PrivateVD->getType();
1063 PrivateAddr = CGF.Builder.CreateElementBitCast(
1064 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1065 QualType SharedType = SharedAddresses[N].first.getType();
1066 SharedLVal = CGF.MakeAddrLValue(
1067 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF),
1068 CGF.ConvertTypeForMem(SharedType)),
1069 SharedType, SharedAddresses[N].first.getBaseInfo(),
1070 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1071 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1072 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1073 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1074 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1075 PrivateAddr, SharedLVal.getAddress(CGF),
1076 SharedLVal.getType());
1077 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1078 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1079 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1080 PrivateVD->getType().getQualifiers(),
1081 /*IsInitializer=*/false);
1085 bool ReductionCodeGen::needCleanups(unsigned N) {
1086 const auto *PrivateVD =
1087 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1088 QualType PrivateType = PrivateVD->getType();
1089 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1090 return DTorKind != QualType::DK_none;
1093 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1094 Address PrivateAddr) {
1095 const auto *PrivateVD =
1096 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1097 QualType PrivateType = PrivateVD->getType();
1098 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1099 if (needCleanups(N)) {
1100 PrivateAddr = CGF.Builder.CreateElementBitCast(
1101 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1102 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1106 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1108 BaseTy = BaseTy.getNonReferenceType();
1109 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1110 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1111 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1112 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
1114 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
1115 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1117 BaseTy = BaseTy->getPointeeType();
1119 return CGF.MakeAddrLValue(
1120 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
1121 CGF.ConvertTypeForMem(ElTy)),
1122 BaseLV.getType(), BaseLV.getBaseInfo(),
1123 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1126 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1127 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1128 llvm::Value *Addr) {
1129 Address Tmp = Address::invalid();
1130 Address TopTmp = Address::invalid();
1131 Address MostTopTmp = Address::invalid();
1132 BaseTy = BaseTy.getNonReferenceType();
1133 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1134 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1135 Tmp = CGF.CreateMemTemp(BaseTy);
1136 if (TopTmp.isValid())
1137 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1141 BaseTy = BaseTy->getPointeeType();
1143 llvm::Type *Ty = BaseLVType;
1145 Ty = Tmp.getElementType();
1146 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1147 if (Tmp.isValid()) {
1148 CGF.Builder.CreateStore(Addr, Tmp);
1151 return Address(Addr, BaseLVAlignment);
1154 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1155 const VarDecl *OrigVD = nullptr;
1156 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1157 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1158 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1159 Base = TempOASE->getBase()->IgnoreParenImpCasts();
1160 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1161 Base = TempASE->getBase()->IgnoreParenImpCasts();
1162 DE = cast<DeclRefExpr>(Base);
1163 OrigVD = cast<VarDecl>(DE->getDecl());
1164 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1165 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1166 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1167 Base = TempASE->getBase()->IgnoreParenImpCasts();
1168 DE = cast<DeclRefExpr>(Base);
1169 OrigVD = cast<VarDecl>(DE->getDecl());
1174 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1175 Address PrivateAddr) {
1176 const DeclRefExpr *DE;
1177 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1178 BaseDecls.emplace_back(OrigVD);
1179 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1181 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1182 OriginalBaseLValue);
1183 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1184 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF));
1185 llvm::Value *PrivatePointer =
1186 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1187 PrivateAddr.getPointer(),
1188 SharedAddresses[N].first.getAddress(CGF).getType());
1189 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1190 return castToBase(CGF, OrigVD->getType(),
1191 SharedAddresses[N].first.getType(),
1192 OriginalBaseLValue.getAddress(CGF).getType(),
1193 OriginalBaseLValue.getAlignment(), Ptr);
1195 BaseDecls.emplace_back(
1196 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1200 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1201 const OMPDeclareReductionDecl *DRD =
1202 getReductionInit(ClausesData[N].ReductionOp);
1203 return DRD && DRD->getInitializer();
1206 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1207 return CGF.EmitLoadOfPointerLValue(
1208 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1209 getThreadIDVariable()->getType()->castAs<PointerType>());
1212 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1213 if (!CGF.HaveInsertPoint())
1215 // 1.2.2 OpenMP Language Terminology
1216 // Structured block - An executable statement with a single entry at the
1217 // top and a single exit at the bottom.
1218 // The point of exit cannot be a branch out of the structured block.
1219 // longjmp() and throw() must not violate the entry/exit criteria.
1220 CGF.EHStack.pushTerminate();
1222 CGF.EHStack.popTerminate();
1225 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1226 CodeGenFunction &CGF) {
1227 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1228 getThreadIDVariable()->getType(),
1229 AlignmentSource::Decl);
1232 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1234 auto *Field = FieldDecl::Create(
1235 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1236 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1237 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1238 Field->setAccess(AS_public);
1243 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1244 StringRef Separator)
1245 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1246 OffloadEntriesInfoManager(CGM) {
1247 ASTContext &C = CGM.getContext();
1248 RecordDecl *RD = C.buildImplicitRecord("ident_t");
1249 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1250 RD->startDefinition();
1252 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1254 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1256 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1258 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1260 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1261 RD->completeDefinition();
1262 IdentQTy = C.getRecordType(RD);
1263 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1264 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1266 loadOffloadInfoMetadata();
1269 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD,
1270 const GlobalDecl &OldGD,
1271 llvm::GlobalValue *OrigAddr,
1272 bool IsForDefinition) {
1273 // Emit at least a definition for the aliasee if the the address of the
1274 // original function is requested.
1275 if (IsForDefinition || OrigAddr)
1276 (void)CGM.GetAddrOfGlobal(NewGD);
1277 StringRef NewMangledName = CGM.getMangledName(NewGD);
1278 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName);
1279 if (Addr && !Addr->isDeclaration()) {
1280 const auto *D = cast<FunctionDecl>(OldGD.getDecl());
1281 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD);
1282 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI);
1284 // Create a reference to the named value. This ensures that it is emitted
1285 // if a deferred decl.
1286 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD);
1288 // Create the new alias itself, but don't set a name yet.
1290 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule());
1293 assert(OrigAddr->isDeclaration() && "Expected declaration");
1295 GA->takeName(OrigAddr);
1296 OrigAddr->replaceAllUsesWith(
1297 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType()));
1298 OrigAddr->eraseFromParent();
1300 GA->setName(CGM.getMangledName(OldGD));
1303 // Set attributes which are particular to an alias; this is a
1304 // specialization of the attributes which may be set on a global function.
1305 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() ||
1306 D->isWeakImported())
1307 GA->setLinkage(llvm::Function::WeakAnyLinkage);
1309 CGM.SetCommonAttributes(OldGD, GA);
1315 void CGOpenMPRuntime::clear() {
1316 InternalVars.clear();
1317 // Clean non-target variable declarations possibly used only in debug info.
1318 for (const auto &Data : EmittedNonTargetVariables) {
1319 if (!Data.getValue().pointsToAliveValue())
1321 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1324 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1326 GV->eraseFromParent();
1328 // Emit aliases for the deferred aliasees.
1329 for (const auto &Pair : DeferredVariantFunction) {
1330 StringRef MangledName = CGM.getMangledName(Pair.second.second);
1331 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName);
1332 // If not able to emit alias, just emit original declaration.
1333 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr,
1334 /*IsForDefinition=*/false);
1338 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1339 SmallString<128> Buffer;
1340 llvm::raw_svector_ostream OS(Buffer);
1341 StringRef Sep = FirstSeparator;
1342 for (StringRef Part : Parts) {
1349 static llvm::Function *
1350 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1351 const Expr *CombinerInitializer, const VarDecl *In,
1352 const VarDecl *Out, bool IsCombiner) {
1353 // void .omp_combiner.(Ty *in, Ty *out);
1354 ASTContext &C = CGM.getContext();
1355 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1356 FunctionArgList Args;
1357 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1358 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1359 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1360 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1361 Args.push_back(&OmpOutParm);
1362 Args.push_back(&OmpInParm);
1363 const CGFunctionInfo &FnInfo =
1364 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1365 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1366 std::string Name = CGM.getOpenMPRuntime().getName(
1367 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1368 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1369 Name, &CGM.getModule());
1370 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1371 if (CGM.getLangOpts().Optimize) {
1372 Fn->removeFnAttr(llvm::Attribute::NoInline);
1373 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1374 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1376 CodeGenFunction CGF(CGM);
1377 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1378 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1379 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1380 Out->getLocation());
1381 CodeGenFunction::OMPPrivateScope Scope(CGF);
1382 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1383 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1384 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1387 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1388 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1389 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1392 (void)Scope.Privatize();
1393 if (!IsCombiner && Out->hasInit() &&
1394 !CGF.isTrivialInitializer(Out->getInit())) {
1395 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1396 Out->getType().getQualifiers(),
1397 /*IsInitializer=*/true);
1399 if (CombinerInitializer)
1400 CGF.EmitIgnoredExpr(CombinerInitializer);
1401 Scope.ForceCleanup();
1402 CGF.FinishFunction();
1406 void CGOpenMPRuntime::emitUserDefinedReduction(
1407 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1408 if (UDRMap.count(D) > 0)
1410 llvm::Function *Combiner = emitCombinerOrInitializer(
1411 CGM, D->getType(), D->getCombiner(),
1412 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1413 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1414 /*IsCombiner=*/true);
1415 llvm::Function *Initializer = nullptr;
1416 if (const Expr *Init = D->getInitializer()) {
1417 Initializer = emitCombinerOrInitializer(
1419 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1421 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1422 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1423 /*IsCombiner=*/false);
1425 UDRMap.try_emplace(D, Combiner, Initializer);
1427 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1428 Decls.second.push_back(D);
1432 std::pair<llvm::Function *, llvm::Function *>
1433 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1434 auto I = UDRMap.find(D);
1435 if (I != UDRMap.end())
1437 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1438 return UDRMap.lookup(D);
1442 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1443 // Builder if one is present.
1444 struct PushAndPopStackRAII {
1445 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1447 : OMPBuilder(OMPBuilder) {
1451 // The following callback is the crucial part of clangs cleanup process.
1454 // Once the OpenMPIRBuilder is used to create parallel regions (and
1455 // similar), the cancellation destination (Dest below) is determined via
1456 // IP. That means if we have variables to finalize we split the block at IP,
1457 // use the new block (=BB) as destination to build a JumpDest (via
1458 // getJumpDestInCurrentScope(BB)) which then is fed to
1459 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1460 // to push & pop an FinalizationInfo object.
1461 // The FiniCB will still be needed but at the point where the
1462 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1463 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1464 assert(IP.getBlock()->end() == IP.getPoint() &&
1465 "Clang CG should cause non-terminated block!");
1466 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1467 CGF.Builder.restoreIP(IP);
1468 CodeGenFunction::JumpDest Dest =
1469 CGF.getOMPCancelDestination(OMPD_parallel);
1470 CGF.EmitBranchThroughCleanup(Dest);
1473 // TODO: Remove this once we emit parallel regions through the
1474 // OpenMPIRBuilder as it can do this setup internally.
1475 llvm::OpenMPIRBuilder::FinalizationInfo FI(
1476 {FiniCB, OMPD_parallel, HasCancel});
1477 OMPBuilder->pushFinalizationCB(std::move(FI));
1479 ~PushAndPopStackRAII() {
1481 OMPBuilder->popFinalizationCB();
1483 llvm::OpenMPIRBuilder *OMPBuilder;
1487 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1488 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1489 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1490 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1491 assert(ThreadIDVar->getType()->isPointerType() &&
1492 "thread id variable must be of type kmp_int32 *");
1493 CodeGenFunction CGF(CGM, true);
1494 bool HasCancel = false;
1495 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1496 HasCancel = OPD->hasCancel();
1497 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1498 HasCancel = OPSD->hasCancel();
1499 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1500 HasCancel = OPFD->hasCancel();
1501 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1502 HasCancel = OPFD->hasCancel();
1503 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1504 HasCancel = OPFD->hasCancel();
1505 else if (const auto *OPFD =
1506 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1507 HasCancel = OPFD->hasCancel();
1508 else if (const auto *OPFD =
1509 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1510 HasCancel = OPFD->hasCancel();
1512 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1513 // parallel region to make cancellation barriers work properly.
1514 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder();
1515 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel);
1516 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1517 HasCancel, OutlinedHelperName);
1518 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1519 return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1522 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1523 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1524 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1525 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1526 return emitParallelOrTeamsOutlinedFunction(
1527 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1530 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1531 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1532 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1533 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1534 return emitParallelOrTeamsOutlinedFunction(
1535 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1538 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1539 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1540 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1541 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1542 bool Tied, unsigned &NumberOfParts) {
1543 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1544 PrePostActionTy &) {
1545 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1546 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1547 llvm::Value *TaskArgs[] = {
1549 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1550 TaskTVar->getType()->castAs<PointerType>())
1552 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1554 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1556 CodeGen.setAction(Action);
1557 assert(!ThreadIDVar->getType()->isPointerType() &&
1558 "thread id variable must be of type kmp_int32 for tasks");
1559 const OpenMPDirectiveKind Region =
1560 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1562 const CapturedStmt *CS = D.getCapturedStmt(Region);
1563 const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1564 CodeGenFunction CGF(CGM, true);
1565 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1567 TD ? TD->hasCancel() : false, Action);
1568 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1569 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1571 NumberOfParts = Action.getNumberOfParts();
1575 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1576 const RecordDecl *RD, const CGRecordLayout &RL,
1577 ArrayRef<llvm::Constant *> Data) {
1578 llvm::StructType *StructTy = RL.getLLVMType();
1579 unsigned PrevIdx = 0;
1580 ConstantInitBuilder CIBuilder(CGM);
1581 auto DI = Data.begin();
1582 for (const FieldDecl *FD : RD->fields()) {
1583 unsigned Idx = RL.getLLVMFieldNo(FD);
1584 // Fill the alignment.
1585 for (unsigned I = PrevIdx; I < Idx; ++I)
1586 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1593 template <class... As>
1594 static llvm::GlobalVariable *
1595 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1596 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1598 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1599 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1600 ConstantInitBuilder CIBuilder(CGM);
1601 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1602 buildStructValue(Fields, CGM, RD, RL, Data);
1603 return Fields.finishAndCreateGlobal(
1604 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1605 std::forward<As>(Args)...);
1608 template <typename T>
1610 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1611 ArrayRef<llvm::Constant *> Data,
1613 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1614 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1615 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1616 buildStructValue(Fields, CGM, RD, RL, Data);
1617 Fields.finishAndAddTo(Parent);
1620 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1621 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1622 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1623 FlagsTy FlagsKey(Flags, Reserved2Flags);
1624 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1626 if (!DefaultOpenMPPSource) {
1627 // Initialize default location for psource field of ident_t structure of
1628 // all ident_t objects. Format is ";file;function;line;column;;".
1630 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1631 DefaultOpenMPPSource =
1632 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1633 DefaultOpenMPPSource =
1634 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1637 llvm::Constant *Data[] = {
1638 llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1639 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1640 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1641 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1642 llvm::GlobalValue *DefaultOpenMPLocation =
1643 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1644 llvm::GlobalValue::PrivateLinkage);
1645 DefaultOpenMPLocation->setUnnamedAddr(
1646 llvm::GlobalValue::UnnamedAddr::Global);
1648 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1650 return Address(Entry, Align);
1653 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1654 bool AtCurrentPoint) {
1655 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1656 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1658 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1659 if (AtCurrentPoint) {
1660 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1661 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1663 Elem.second.ServiceInsertPt =
1664 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1665 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1669 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1670 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1671 if (Elem.second.ServiceInsertPt) {
1672 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1673 Elem.second.ServiceInsertPt = nullptr;
1674 Ptr->eraseFromParent();
1678 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1681 Flags |= OMP_IDENT_KMPC;
1682 // If no debug info is generated - return global default location.
1683 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1685 return getOrCreateDefaultLocation(Flags).getPointer();
1687 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1689 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1690 Address LocValue = Address::invalid();
1691 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1692 if (I != OpenMPLocThreadIDMap.end())
1693 LocValue = Address(I->second.DebugLoc, Align);
1695 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1696 // GetOpenMPThreadID was called before this routine.
1697 if (!LocValue.isValid()) {
1698 // Generate "ident_t .kmpc_loc.addr;"
1699 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1700 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1701 Elem.second.DebugLoc = AI.getPointer();
1704 if (!Elem.second.ServiceInsertPt)
1705 setLocThreadIdInsertPt(CGF);
1706 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1707 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1708 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1709 CGF.getTypeSize(IdentQTy));
1712 // char **psource = &.kmpc_loc_<flags>.addr.psource;
1713 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1714 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1716 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1718 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1719 if (OMPDebugLoc == nullptr) {
1720 SmallString<128> Buffer2;
1721 llvm::raw_svector_ostream OS2(Buffer2);
1722 // Build debug location
1723 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1724 OS2 << ";" << PLoc.getFilename() << ";";
1725 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1726 OS2 << FD->getQualifiedNameAsString();
1727 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1728 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1729 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1731 // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1732 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1734 // Our callers always pass this to a runtime function, so for
1735 // convenience, go ahead and return a naked pointer.
1736 return LocValue.getPointer();
1739 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1740 SourceLocation Loc) {
1741 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1743 llvm::Value *ThreadID = nullptr;
1744 // Check whether we've already cached a load of the thread id in this
1746 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1747 if (I != OpenMPLocThreadIDMap.end()) {
1748 ThreadID = I->second.ThreadID;
1749 if (ThreadID != nullptr)
1752 // If exceptions are enabled, do not use parameter to avoid possible crash.
1753 if (auto *OMPRegionInfo =
1754 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1755 if (OMPRegionInfo->getThreadIDVariable()) {
1756 // Check if this an outlined function with thread id passed as argument.
1757 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1758 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1759 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1760 !CGF.getLangOpts().CXXExceptions ||
1761 CGF.Builder.GetInsertBlock() == TopBlock ||
1762 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1763 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1765 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1766 CGF.Builder.GetInsertBlock()) {
1767 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1768 // If value loaded in entry block, cache it and use it everywhere in
1770 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1771 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1772 Elem.second.ThreadID = ThreadID;
1779 // This is not an outlined function region - need to call __kmpc_int32
1780 // kmpc_global_thread_num(ident_t *loc).
1781 // Generate thread id value and cache this value for use across the
1783 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1784 if (!Elem.second.ServiceInsertPt)
1785 setLocThreadIdInsertPt(CGF);
1786 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1787 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1788 llvm::CallInst *Call = CGF.Builder.CreateCall(
1789 createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1790 emitUpdateLocation(CGF, Loc));
1791 Call->setCallingConv(CGF.getRuntimeCC());
1792 Elem.second.ThreadID = Call;
1796 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1797 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1798 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1799 clearLocThreadIdInsertPt(CGF);
1800 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1802 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1803 for(auto *D : FunctionUDRMap[CGF.CurFn])
1805 FunctionUDRMap.erase(CGF.CurFn);
1807 auto I = FunctionUDMMap.find(CGF.CurFn);
1808 if (I != FunctionUDMMap.end()) {
1809 for(auto *D : I->second)
1811 FunctionUDMMap.erase(I);
1815 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1816 return IdentTy->getPointerTo();
1819 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1820 if (!Kmpc_MicroTy) {
1821 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1822 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1823 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1824 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1826 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1829 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1830 llvm::FunctionCallee RTLFn = nullptr;
1831 switch (static_cast<OpenMPRTLFunction>(Function)) {
1832 case OMPRTL__kmpc_fork_call: {
1833 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1835 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1836 getKmpc_MicroPointerTy()};
1838 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1839 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1840 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1841 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1842 llvm::LLVMContext &Ctx = F->getContext();
1843 llvm::MDBuilder MDB(Ctx);
1844 // Annotate the callback behavior of the __kmpc_fork_call:
1845 // - The callback callee is argument number 2 (microtask).
1846 // - The first two arguments of the callback callee are unknown (-1).
1847 // - All variadic arguments to the __kmpc_fork_call are passed to the
1850 llvm::LLVMContext::MD_callback,
1851 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1853 /* VarArgsArePassed */ true)}));
1858 case OMPRTL__kmpc_global_thread_num: {
1859 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1860 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1862 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1863 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1866 case OMPRTL__kmpc_threadprivate_cached: {
1867 // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1868 // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1869 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1870 CGM.VoidPtrTy, CGM.SizeTy,
1871 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1873 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1874 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1877 case OMPRTL__kmpc_critical: {
1878 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1879 // kmp_critical_name *crit);
1880 llvm::Type *TypeParams[] = {
1881 getIdentTyPointerTy(), CGM.Int32Ty,
1882 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1884 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1885 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1888 case OMPRTL__kmpc_critical_with_hint: {
1889 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1890 // kmp_critical_name *crit, uintptr_t hint);
1891 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1892 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1895 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1896 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1899 case OMPRTL__kmpc_threadprivate_register: {
1900 // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1901 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1902 // typedef void *(*kmpc_ctor)(void *);
1904 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1905 /*isVarArg*/ false)->getPointerTo();
1906 // typedef void *(*kmpc_cctor)(void *, void *);
1907 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1908 auto *KmpcCopyCtorTy =
1909 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1912 // typedef void (*kmpc_dtor)(void *);
1914 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1916 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1917 KmpcCopyCtorTy, KmpcDtorTy};
1918 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1919 /*isVarArg*/ false);
1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1923 case OMPRTL__kmpc_end_critical: {
1924 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1925 // kmp_critical_name *crit);
1926 llvm::Type *TypeParams[] = {
1927 getIdentTyPointerTy(), CGM.Int32Ty,
1928 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1930 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1931 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1934 case OMPRTL__kmpc_cancel_barrier: {
1935 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1937 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1939 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1943 case OMPRTL__kmpc_barrier: {
1944 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1945 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1947 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1951 case OMPRTL__kmpc_for_static_fini: {
1952 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1953 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1955 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1956 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1959 case OMPRTL__kmpc_push_num_threads: {
1960 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1961 // kmp_int32 num_threads)
1962 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1965 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1966 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1969 case OMPRTL__kmpc_serialized_parallel: {
1970 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1972 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1974 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1975 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1978 case OMPRTL__kmpc_end_serialized_parallel: {
1979 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1981 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1983 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1984 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1987 case OMPRTL__kmpc_flush: {
1988 // Build void __kmpc_flush(ident_t *loc);
1989 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1991 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1992 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1995 case OMPRTL__kmpc_master: {
1996 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1997 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1999 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2000 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
2003 case OMPRTL__kmpc_end_master: {
2004 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
2005 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2007 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2008 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
2011 case OMPRTL__kmpc_omp_taskyield: {
2012 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2016 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2017 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
2020 case OMPRTL__kmpc_single: {
2021 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
2022 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2024 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2025 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
2028 case OMPRTL__kmpc_end_single: {
2029 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
2030 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2032 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
2036 case OMPRTL__kmpc_omp_task_alloc: {
2037 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
2038 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2039 // kmp_routine_entry_t *task_entry);
2040 assert(KmpRoutineEntryPtrTy != nullptr &&
2041 "Type kmp_routine_entry_t must be created.");
2042 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2043 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
2044 // Return void * and then cast to particular kmp_task_t type.
2046 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2047 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
2050 case OMPRTL__kmpc_omp_target_task_alloc: {
2051 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
2052 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
2053 // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
2054 assert(KmpRoutineEntryPtrTy != nullptr &&
2055 "Type kmp_routine_entry_t must be created.");
2056 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2057 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
2059 // Return void * and then cast to particular kmp_task_t type.
2061 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2062 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
2065 case OMPRTL__kmpc_omp_task: {
2066 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2068 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2071 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2072 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
2075 case OMPRTL__kmpc_copyprivate: {
2076 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
2077 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
2078 // kmp_int32 didit);
2079 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2081 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
2082 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
2083 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
2086 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2087 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
2090 case OMPRTL__kmpc_reduce: {
2091 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
2092 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
2093 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
2094 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2095 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2096 /*isVarArg=*/false);
2097 llvm::Type *TypeParams[] = {
2098 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2099 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2100 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2102 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2103 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
2106 case OMPRTL__kmpc_reduce_nowait: {
2107 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
2108 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2109 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
2111 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2112 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
2113 /*isVarArg=*/false);
2114 llvm::Type *TypeParams[] = {
2115 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
2116 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
2117 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2119 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2120 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
2123 case OMPRTL__kmpc_end_reduce: {
2124 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2125 // kmp_critical_name *lck);
2126 llvm::Type *TypeParams[] = {
2127 getIdentTyPointerTy(), CGM.Int32Ty,
2128 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2130 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2131 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2134 case OMPRTL__kmpc_end_reduce_nowait: {
2135 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2136 // kmp_critical_name *lck);
2137 llvm::Type *TypeParams[] = {
2138 getIdentTyPointerTy(), CGM.Int32Ty,
2139 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2141 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2143 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2146 case OMPRTL__kmpc_omp_task_begin_if0: {
2147 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2152 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2154 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2157 case OMPRTL__kmpc_omp_task_complete_if0: {
2158 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2160 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2164 RTLFn = CGM.CreateRuntimeFunction(FnTy,
2165 /*Name=*/"__kmpc_omp_task_complete_if0");
2168 case OMPRTL__kmpc_ordered: {
2169 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2170 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2172 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2173 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2176 case OMPRTL__kmpc_end_ordered: {
2177 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2178 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2180 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2181 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2184 case OMPRTL__kmpc_omp_taskwait: {
2185 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2186 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2188 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2189 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2192 case OMPRTL__kmpc_taskgroup: {
2193 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2194 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2196 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2197 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2200 case OMPRTL__kmpc_end_taskgroup: {
2201 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2202 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2204 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2205 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2208 case OMPRTL__kmpc_push_proc_bind: {
2209 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2211 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2213 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2214 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2217 case OMPRTL__kmpc_omp_task_with_deps: {
2218 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2219 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2220 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2221 llvm::Type *TypeParams[] = {
2222 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2223 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
2225 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2227 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2230 case OMPRTL__kmpc_omp_wait_deps: {
2231 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2232 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2233 // kmp_depend_info_t *noalias_dep_list);
2234 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2235 CGM.Int32Ty, CGM.VoidPtrTy,
2236 CGM.Int32Ty, CGM.VoidPtrTy};
2238 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2239 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2242 case OMPRTL__kmpc_cancellationpoint: {
2243 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2244 // global_tid, kmp_int32 cncl_kind)
2245 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2247 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2248 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2251 case OMPRTL__kmpc_cancel: {
2252 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2253 // kmp_int32 cncl_kind)
2254 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2256 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2257 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2260 case OMPRTL__kmpc_push_num_teams: {
2261 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2262 // kmp_int32 num_teams, kmp_int32 num_threads)
2263 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2266 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2267 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2270 case OMPRTL__kmpc_fork_teams: {
2271 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2273 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2274 getKmpc_MicroPointerTy()};
2276 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2277 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2278 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2279 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2280 llvm::LLVMContext &Ctx = F->getContext();
2281 llvm::MDBuilder MDB(Ctx);
2282 // Annotate the callback behavior of the __kmpc_fork_teams:
2283 // - The callback callee is argument number 2 (microtask).
2284 // - The first two arguments of the callback callee are unknown (-1).
2285 // - All variadic arguments to the __kmpc_fork_teams are passed to the
2288 llvm::LLVMContext::MD_callback,
2289 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2291 /* VarArgsArePassed */ true)}));
2296 case OMPRTL__kmpc_taskloop: {
2297 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2298 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2299 // sched, kmp_uint64 grainsize, void *task_dup);
2300 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2304 CGM.Int64Ty->getPointerTo(),
2305 CGM.Int64Ty->getPointerTo(),
2312 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2313 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2316 case OMPRTL__kmpc_doacross_init: {
2317 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2318 // num_dims, struct kmp_dim *dims);
2319 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2324 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2325 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2328 case OMPRTL__kmpc_doacross_fini: {
2329 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2330 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2332 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2333 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2336 case OMPRTL__kmpc_doacross_post: {
2337 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2339 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2340 CGM.Int64Ty->getPointerTo()};
2342 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2346 case OMPRTL__kmpc_doacross_wait: {
2347 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2349 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2350 CGM.Int64Ty->getPointerTo()};
2352 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2353 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2356 case OMPRTL__kmpc_task_reduction_init: {
2357 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2359 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2361 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2363 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2366 case OMPRTL__kmpc_task_reduction_get_th_data: {
2367 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2369 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2371 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2372 RTLFn = CGM.CreateRuntimeFunction(
2373 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2376 case OMPRTL__kmpc_alloc: {
2377 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2378 // al); omp_allocator_handle_t type is void *.
2379 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2381 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2382 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2385 case OMPRTL__kmpc_free: {
2386 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2387 // al); omp_allocator_handle_t type is void *.
2388 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2390 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2391 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2394 case OMPRTL__kmpc_push_target_tripcount: {
2395 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2397 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2398 llvm::FunctionType *FnTy =
2399 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2400 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2403 case OMPRTL__tgt_target: {
2404 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2405 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2407 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2412 CGM.Int64Ty->getPointerTo(),
2413 CGM.Int64Ty->getPointerTo()};
2415 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2416 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2419 case OMPRTL__tgt_target_nowait: {
2420 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2421 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2422 // int64_t *arg_types);
2423 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2428 CGM.Int64Ty->getPointerTo(),
2429 CGM.Int64Ty->getPointerTo()};
2431 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2432 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2435 case OMPRTL__tgt_target_teams: {
2436 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2437 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2438 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2439 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2444 CGM.Int64Ty->getPointerTo(),
2445 CGM.Int64Ty->getPointerTo(),
2449 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2450 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2453 case OMPRTL__tgt_target_teams_nowait: {
2454 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2455 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2456 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2457 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2462 CGM.Int64Ty->getPointerTo(),
2463 CGM.Int64Ty->getPointerTo(),
2467 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2468 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2471 case OMPRTL__tgt_register_requires: {
2472 // Build void __tgt_register_requires(int64_t flags);
2473 llvm::Type *TypeParams[] = {CGM.Int64Ty};
2475 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2479 case OMPRTL__tgt_target_data_begin: {
2480 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2481 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2482 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2486 CGM.Int64Ty->getPointerTo(),
2487 CGM.Int64Ty->getPointerTo()};
2489 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2493 case OMPRTL__tgt_target_data_begin_nowait: {
2494 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2495 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2497 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2501 CGM.Int64Ty->getPointerTo(),
2502 CGM.Int64Ty->getPointerTo()};
2504 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2505 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2508 case OMPRTL__tgt_target_data_end: {
2509 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2510 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2511 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2515 CGM.Int64Ty->getPointerTo(),
2516 CGM.Int64Ty->getPointerTo()};
2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2519 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2522 case OMPRTL__tgt_target_data_end_nowait: {
2523 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2524 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2526 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2530 CGM.Int64Ty->getPointerTo(),
2531 CGM.Int64Ty->getPointerTo()};
2533 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2534 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2537 case OMPRTL__tgt_target_data_update: {
2538 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2539 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2540 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2544 CGM.Int64Ty->getPointerTo(),
2545 CGM.Int64Ty->getPointerTo()};
2547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2551 case OMPRTL__tgt_target_data_update_nowait: {
2552 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2553 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2555 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2559 CGM.Int64Ty->getPointerTo(),
2560 CGM.Int64Ty->getPointerTo()};
2562 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2563 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2566 case OMPRTL__tgt_mapper_num_components: {
2567 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle);
2568 llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
2570 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
2571 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components");
2574 case OMPRTL__tgt_push_mapper_component: {
2575 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void
2576 // *base, void *begin, int64_t size, int64_t type);
2577 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy,
2578 CGM.Int64Ty, CGM.Int64Ty};
2580 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2581 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component");
2585 assert(RTLFn && "Unable to find OpenMP runtime function");
2589 llvm::FunctionCallee
2590 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2591 assert((IVSize == 32 || IVSize == 64) &&
2592 "IV size is not compatible with the omp runtime");
2593 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2594 : "__kmpc_for_static_init_4u")
2595 : (IVSigned ? "__kmpc_for_static_init_8"
2596 : "__kmpc_for_static_init_8u");
2597 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2598 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2599 llvm::Type *TypeParams[] = {
2600 getIdentTyPointerTy(), // loc
2602 CGM.Int32Ty, // schedtype
2603 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2611 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2612 return CGM.CreateRuntimeFunction(FnTy, Name);
2615 llvm::FunctionCallee
2616 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2617 assert((IVSize == 32 || IVSize == 64) &&
2618 "IV size is not compatible with the omp runtime");
2621 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2622 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2623 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2624 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2626 CGM.Int32Ty, // schedtype
2633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2634 return CGM.CreateRuntimeFunction(FnTy, Name);
2637 llvm::FunctionCallee
2638 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2639 assert((IVSize == 32 || IVSize == 64) &&
2640 "IV size is not compatible with the omp runtime");
2643 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2644 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2645 llvm::Type *TypeParams[] = {
2646 getIdentTyPointerTy(), // loc
2650 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2651 return CGM.CreateRuntimeFunction(FnTy, Name);
2654 llvm::FunctionCallee
2655 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2656 assert((IVSize == 32 || IVSize == 64) &&
2657 "IV size is not compatible with the omp runtime");
2660 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2661 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2662 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2663 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2664 llvm::Type *TypeParams[] = {
2665 getIdentTyPointerTy(), // loc
2667 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2673 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2674 return CGM.CreateRuntimeFunction(FnTy, Name);
2677 /// Obtain information that uniquely identifies a target entry. This
2678 /// consists of the file and device IDs as well as line number associated with
2679 /// the relevant entry source location.
2680 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2681 unsigned &DeviceID, unsigned &FileID,
2682 unsigned &LineNum) {
2683 SourceManager &SM = C.getSourceManager();
2685 // The loc should be always valid and have a file ID (the user cannot use
2686 // #pragma directives in macros)
2688 assert(Loc.isValid() && "Source location is expected to be always valid.");
2690 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2691 assert(PLoc.isValid() && "Source location is expected to be always valid.");
2693 llvm::sys::fs::UniqueID ID;
2694 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2695 SM.getDiagnostics().Report(diag::err_cannot_open_file)
2696 << PLoc.getFilename() << EC.message();
2698 DeviceID = ID.getDevice();
2699 FileID = ID.getFile();
2700 LineNum = PLoc.getLine();
2703 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2704 if (CGM.getLangOpts().OpenMPSimd)
2705 return Address::invalid();
2706 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2707 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2708 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2709 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2710 HasRequiresUnifiedSharedMemory))) {
2711 SmallString<64> PtrName;
2713 llvm::raw_svector_ostream OS(PtrName);
2714 OS << CGM.getMangledName(GlobalDecl(VD));
2715 if (!VD->isExternallyVisible()) {
2716 unsigned DeviceID, FileID, Line;
2717 getTargetEntryUniqueInfo(CGM.getContext(),
2718 VD->getCanonicalDecl()->getBeginLoc(),
2719 DeviceID, FileID, Line);
2720 OS << llvm::format("_%x", FileID);
2722 OS << "_decl_tgt_ref_ptr";
2724 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2726 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2727 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2730 auto *GV = cast<llvm::GlobalVariable>(Ptr);
2731 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
2733 if (!CGM.getLangOpts().OpenMPIsDevice)
2734 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2735 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2737 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2739 return Address::invalid();
2743 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2744 assert(!CGM.getLangOpts().OpenMPUseTLS ||
2745 !CGM.getContext().getTargetInfo().isTLSSupported());
2746 // Lookup the entry, lazily creating it if necessary.
2747 std::string Suffix = getName({"cache", ""});
2748 return getOrCreateInternalVariable(
2749 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2752 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2755 SourceLocation Loc) {
2756 if (CGM.getLangOpts().OpenMPUseTLS &&
2757 CGM.getContext().getTargetInfo().isTLSSupported())
2760 llvm::Type *VarTy = VDAddr.getElementType();
2761 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2762 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2764 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2765 getOrCreateThreadPrivateCache(VD)};
2766 return Address(CGF.EmitRuntimeCall(
2767 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2768 VDAddr.getAlignment());
2771 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2772 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2773 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2774 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2776 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2777 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2779 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2780 // to register constructor/destructor for variable.
2781 llvm::Value *Args[] = {
2782 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2783 Ctor, CopyCtor, Dtor};
2784 CGF.EmitRuntimeCall(
2785 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2788 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2789 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2790 bool PerformInit, CodeGenFunction *CGF) {
2791 if (CGM.getLangOpts().OpenMPUseTLS &&
2792 CGM.getContext().getTargetInfo().isTLSSupported())
2795 VD = VD->getDefinition(CGM.getContext());
2796 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2797 QualType ASTTy = VD->getType();
2799 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2800 const Expr *Init = VD->getAnyInitializer();
2801 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2802 // Generate function that re-emits the declaration's initializer into the
2803 // threadprivate copy of the variable VD
2804 CodeGenFunction CtorCGF(CGM);
2805 FunctionArgList Args;
2806 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2807 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2808 ImplicitParamDecl::Other);
2809 Args.push_back(&Dst);
2811 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2812 CGM.getContext().VoidPtrTy, Args);
2813 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2814 std::string Name = getName({"__kmpc_global_ctor_", ""});
2815 llvm::Function *Fn =
2816 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2817 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2819 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2820 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2821 CGM.getContext().VoidPtrTy, Dst.getLocation());
2822 Address Arg = Address(ArgVal, VDAddr.getAlignment());
2823 Arg = CtorCGF.Builder.CreateElementBitCast(
2824 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2825 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2826 /*IsInitializer=*/true);
2827 ArgVal = CtorCGF.EmitLoadOfScalar(
2828 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2829 CGM.getContext().VoidPtrTy, Dst.getLocation());
2830 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2831 CtorCGF.FinishFunction();
2834 if (VD->getType().isDestructedType() != QualType::DK_none) {
2835 // Generate function that emits destructor call for the threadprivate copy
2836 // of the variable VD
2837 CodeGenFunction DtorCGF(CGM);
2838 FunctionArgList Args;
2839 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2840 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2841 ImplicitParamDecl::Other);
2842 Args.push_back(&Dst);
2844 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2845 CGM.getContext().VoidTy, Args);
2846 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2847 std::string Name = getName({"__kmpc_global_dtor_", ""});
2848 llvm::Function *Fn =
2849 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2850 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2851 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2853 // Create a scope with an artificial location for the body of this function.
2854 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2855 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2856 DtorCGF.GetAddrOfLocalVar(&Dst),
2857 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2858 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2859 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2860 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2861 DtorCGF.FinishFunction();
2864 // Do not emit init function if it is not required.
2868 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2869 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2872 // Copying constructor for the threadprivate variable.
2873 // Must be NULL - reserved by runtime, but currently it requires that this
2874 // parameter is always NULL. Otherwise it fires assertion.
2875 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2876 if (Ctor == nullptr) {
2877 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2880 Ctor = llvm::Constant::getNullValue(CtorTy);
2882 if (Dtor == nullptr) {
2883 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2886 Dtor = llvm::Constant::getNullValue(DtorTy);
2889 auto *InitFunctionTy =
2890 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2891 std::string Name = getName({"__omp_threadprivate_init_", ""});
2892 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2893 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2894 CodeGenFunction InitCGF(CGM);
2895 FunctionArgList ArgList;
2896 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2897 CGM.getTypes().arrangeNullaryFunction(), ArgList,
2899 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2900 InitCGF.FinishFunction();
2901 return InitFunction;
2903 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2908 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2909 llvm::GlobalVariable *Addr,
2911 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
2912 !CGM.getLangOpts().OpenMPIsDevice)
2914 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2915 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2916 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2917 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2918 HasRequiresUnifiedSharedMemory))
2919 return CGM.getLangOpts().OpenMPIsDevice;
2920 VD = VD->getDefinition(CGM.getContext());
2921 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2922 return CGM.getLangOpts().OpenMPIsDevice;
2924 QualType ASTTy = VD->getType();
2926 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2927 // Produce the unique prefix to identify the new target regions. We use
2928 // the source location of the variable declaration which we know to not
2929 // conflict with any target region.
2933 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2934 SmallString<128> Buffer, Out;
2936 llvm::raw_svector_ostream OS(Buffer);
2937 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2938 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2941 const Expr *Init = VD->getAnyInitializer();
2942 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2943 llvm::Constant *Ctor;
2945 if (CGM.getLangOpts().OpenMPIsDevice) {
2946 // Generate function that re-emits the declaration's initializer into
2947 // the threadprivate copy of the variable VD
2948 CodeGenFunction CtorCGF(CGM);
2950 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2951 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2952 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2953 FTy, Twine(Buffer, "_ctor"), FI, Loc);
2954 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2955 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2956 FunctionArgList(), Loc, Loc);
2957 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2958 CtorCGF.EmitAnyExprToMem(Init,
2959 Address(Addr, CGM.getContext().getDeclAlign(VD)),
2960 Init->getType().getQualifiers(),
2961 /*IsInitializer=*/true);
2962 CtorCGF.FinishFunction();
2964 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2965 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2967 Ctor = new llvm::GlobalVariable(
2968 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2969 llvm::GlobalValue::PrivateLinkage,
2970 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2974 // Register the information for the entry associated with the constructor.
2976 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2977 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2978 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2980 if (VD->getType().isDestructedType() != QualType::DK_none) {
2981 llvm::Constant *Dtor;
2983 if (CGM.getLangOpts().OpenMPIsDevice) {
2984 // Generate function that emits destructor call for the threadprivate
2985 // copy of the variable VD
2986 CodeGenFunction DtorCGF(CGM);
2988 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2989 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2990 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2991 FTy, Twine(Buffer, "_dtor"), FI, Loc);
2992 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2993 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2994 FunctionArgList(), Loc, Loc);
2995 // Create a scope with an artificial location for the body of this
2997 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2998 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2999 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
3000 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
3001 DtorCGF.FinishFunction();
3003 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
3004 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
3006 Dtor = new llvm::GlobalVariable(
3007 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
3008 llvm::GlobalValue::PrivateLinkage,
3009 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
3012 // Register the information for the entry associated with the destructor.
3014 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
3015 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
3016 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
3018 return CGM.getLangOpts().OpenMPIsDevice;
3021 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
3024 std::string Suffix = getName({"artificial", ""});
3025 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
3026 llvm::Value *GAddr =
3027 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
3028 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
3029 CGM.getTarget().isTLSSupported()) {
3030 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true);
3031 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType));
3033 std::string CacheSuffix = getName({"cache", ""});
3034 llvm::Value *Args[] = {
3035 emitUpdateLocation(CGF, SourceLocation()),
3036 getThreadID(CGF, SourceLocation()),
3037 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
3038 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
3039 /*isSigned=*/false),
3040 getOrCreateInternalVariable(
3041 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
3043 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044 CGF.EmitRuntimeCall(
3045 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
3046 VarLVType->getPointerTo(/*AddrSpace=*/0)),
3047 CGM.getContext().getTypeAlignInChars(VarType));
3050 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
3051 const RegionCodeGenTy &ThenGen,
3052 const RegionCodeGenTy &ElseGen) {
3053 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
3055 // If the condition constant folds and can be elided, try to avoid emitting
3056 // the condition and the dead arm of the if/else.
3058 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
3066 // Otherwise, the condition did not fold, or we couldn't elide it. Just
3067 // emit the conditional branch.
3068 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
3069 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
3070 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
3071 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
3073 // Emit the 'then' code.
3074 CGF.EmitBlock(ThenBlock);
3076 CGF.EmitBranch(ContBlock);
3077 // Emit the 'else' code if present.
3078 // There is no need to emit line number for unconditional branch.
3079 (void)ApplyDebugLocation::CreateEmpty(CGF);
3080 CGF.EmitBlock(ElseBlock);
3082 // There is no need to emit line number for unconditional branch.
3083 (void)ApplyDebugLocation::CreateEmpty(CGF);
3084 CGF.EmitBranch(ContBlock);
3085 // Emit the continuation block for code after the if.
3086 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
3089 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
3090 llvm::Function *OutlinedFn,
3091 ArrayRef<llvm::Value *> CapturedVars,
3092 const Expr *IfCond) {
3093 if (!CGF.HaveInsertPoint())
3095 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
3096 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
3097 PrePostActionTy &) {
3098 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
3099 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3100 llvm::Value *Args[] = {
3102 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
3103 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
3104 llvm::SmallVector<llvm::Value *, 16> RealArgs;
3105 RealArgs.append(std::begin(Args), std::end(Args));
3106 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
3108 llvm::FunctionCallee RTLFn =
3109 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
3110 CGF.EmitRuntimeCall(RTLFn, RealArgs);
3112 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
3113 PrePostActionTy &) {
3114 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
3115 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
3117 // __kmpc_serialized_parallel(&Loc, GTid);
3118 llvm::Value *Args[] = {RTLoc, ThreadID};
3119 CGF.EmitRuntimeCall(
3120 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
3122 // OutlinedFn(>id, &zero_bound, CapturedStruct);
3123 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
3124 Address ZeroAddrBound =
3125 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
3126 /*Name=*/".bound.zero.addr");
3127 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
3128 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
3129 // ThreadId for serialized parallels is 0.
3130 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
3131 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
3132 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
3133 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
3135 // __kmpc_end_serialized_parallel(&Loc, GTid);
3136 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
3137 CGF.EmitRuntimeCall(
3138 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3142 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
3144 RegionCodeGenTy ThenRCG(ThenGen);
3149 // If we're inside an (outlined) parallel region, use the region info's
3150 // thread-ID variable (it is passed in a first argument of the outlined function
3151 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3152 // regular serial code region, get thread ID by calling kmp_int32
3153 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3154 // return the address of that temp.
3155 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3156 SourceLocation Loc) {
3157 if (auto *OMPRegionInfo =
3158 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3159 if (OMPRegionInfo->getThreadIDVariable())
3160 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
3162 llvm::Value *ThreadID = getThreadID(CGF, Loc);
3164 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3165 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3166 CGF.EmitStoreOfScalar(ThreadID,
3167 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3169 return ThreadIDTemp;
3172 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3173 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3174 SmallString<256> Buffer;
3175 llvm::raw_svector_ostream Out(Buffer);
3177 StringRef RuntimeName = Out.str();
3178 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3180 assert(Elem.second->getType()->getPointerElementType() == Ty &&
3181 "OMP internal variable has different type than requested");
3182 return &*Elem.second;
3185 return Elem.second = new llvm::GlobalVariable(
3186 CGM.getModule(), Ty, /*IsConstant*/ false,
3187 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3188 Elem.first(), /*InsertBefore=*/nullptr,
3189 llvm::GlobalValue::NotThreadLocal, AddressSpace);
3192 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3193 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3194 std::string Name = getName({Prefix, "var"});
3195 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3199 /// Common pre(post)-action for different OpenMP constructs.
3200 class CommonActionTy final : public PrePostActionTy {
3201 llvm::FunctionCallee EnterCallee;
3202 ArrayRef<llvm::Value *> EnterArgs;
3203 llvm::FunctionCallee ExitCallee;
3204 ArrayRef<llvm::Value *> ExitArgs;
3206 llvm::BasicBlock *ContBlock = nullptr;
3209 CommonActionTy(llvm::FunctionCallee EnterCallee,
3210 ArrayRef<llvm::Value *> EnterArgs,
3211 llvm::FunctionCallee ExitCallee,
3212 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3213 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3214 ExitArgs(ExitArgs), Conditional(Conditional) {}
3215 void Enter(CodeGenFunction &CGF) override {
3216 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3218 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3219 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3220 ContBlock = CGF.createBasicBlock("omp_if.end");
3221 // Generate the branch (If-stmt)
3222 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3223 CGF.EmitBlock(ThenBlock);
3226 void Done(CodeGenFunction &CGF) {
3227 // Emit the rest of blocks/branches
3228 CGF.EmitBranch(ContBlock);
3229 CGF.EmitBlock(ContBlock, true);
3231 void Exit(CodeGenFunction &CGF) override {
3232 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3235 } // anonymous namespace
3237 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3238 StringRef CriticalName,
3239 const RegionCodeGenTy &CriticalOpGen,
3240 SourceLocation Loc, const Expr *Hint) {
3241 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3243 // __kmpc_end_critical(ident_t *, gtid, Lock);
3244 // Prepare arguments and build a call to __kmpc_critical
3245 if (!CGF.HaveInsertPoint())
3247 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3248 getCriticalRegionLock(CriticalName)};
3249 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3252 EnterArgs.push_back(CGF.Builder.CreateIntCast(
3253 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3255 CommonActionTy Action(
3256 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3257 : OMPRTL__kmpc_critical),
3258 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3259 CriticalOpGen.setAction(Action);
3260 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3263 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3264 const RegionCodeGenTy &MasterOpGen,
3265 SourceLocation Loc) {
3266 if (!CGF.HaveInsertPoint())
3268 // if(__kmpc_master(ident_t *, gtid)) {
3270 // __kmpc_end_master(ident_t *, gtid);
3272 // Prepare arguments and build a call to __kmpc_master
3273 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3274 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3275 createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3276 /*Conditional=*/true);
3277 MasterOpGen.setAction(Action);
3278 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3282 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3283 SourceLocation Loc) {
3284 if (!CGF.HaveInsertPoint())
3286 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3287 llvm::Value *Args[] = {
3288 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3289 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3290 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3291 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3292 Region->emitUntiedSwitch(CGF);
3295 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3296 const RegionCodeGenTy &TaskgroupOpGen,
3297 SourceLocation Loc) {
3298 if (!CGF.HaveInsertPoint())
3300 // __kmpc_taskgroup(ident_t *, gtid);
3301 // TaskgroupOpGen();
3302 // __kmpc_end_taskgroup(ident_t *, gtid);
3303 // Prepare arguments and build a call to __kmpc_taskgroup
3304 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3305 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3306 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3308 TaskgroupOpGen.setAction(Action);
3309 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3312 /// Given an array of pointers to variables, project the address of a
3314 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3315 unsigned Index, const VarDecl *Var) {
3316 // Pull out the pointer to the variable.
3317 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3318 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3320 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3321 Addr = CGF.Builder.CreateElementBitCast(
3322 Addr, CGF.ConvertTypeForMem(Var->getType()));
3326 static llvm::Value *emitCopyprivateCopyFunction(
3327 CodeGenModule &CGM, llvm::Type *ArgsType,
3328 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3329 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3330 SourceLocation Loc) {
3331 ASTContext &C = CGM.getContext();
3332 // void copy_func(void *LHSArg, void *RHSArg);
3333 FunctionArgList Args;
3334 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3335 ImplicitParamDecl::Other);
3336 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3337 ImplicitParamDecl::Other);
3338 Args.push_back(&LHSArg);
3339 Args.push_back(&RHSArg);
3341 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3343 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3344 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3345 llvm::GlobalValue::InternalLinkage, Name,
3347 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3348 Fn->setDoesNotRecurse();
3349 CodeGenFunction CGF(CGM);
3350 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3351 // Dest = (void*[n])(LHSArg);
3352 // Src = (void*[n])(RHSArg);
3353 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3354 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3355 ArgsType), CGF.getPointerAlign());
3356 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3357 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3358 ArgsType), CGF.getPointerAlign());
3359 // *(Type0*)Dst[0] = *(Type0*)Src[0];
3360 // *(Type1*)Dst[1] = *(Type1*)Src[1];
3362 // *(Typen*)Dst[n] = *(Typen*)Src[n];
3363 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3364 const auto *DestVar =
3365 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3366 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3368 const auto *SrcVar =
3369 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3370 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3372 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3373 QualType Type = VD->getType();
3374 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3376 CGF.FinishFunction();
3380 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3381 const RegionCodeGenTy &SingleOpGen,
3383 ArrayRef<const Expr *> CopyprivateVars,
3384 ArrayRef<const Expr *> SrcExprs,
3385 ArrayRef<const Expr *> DstExprs,
3386 ArrayRef<const Expr *> AssignmentOps) {
3387 if (!CGF.HaveInsertPoint())
3389 assert(CopyprivateVars.size() == SrcExprs.size() &&
3390 CopyprivateVars.size() == DstExprs.size() &&
3391 CopyprivateVars.size() == AssignmentOps.size());
3392 ASTContext &C = CGM.getContext();
3393 // int32 did_it = 0;
3394 // if(__kmpc_single(ident_t *, gtid)) {
3396 // __kmpc_end_single(ident_t *, gtid);
3399 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3400 // <copy_func>, did_it);
3402 Address DidIt = Address::invalid();
3403 if (!CopyprivateVars.empty()) {
3404 // int32 did_it = 0;
3405 QualType KmpInt32Ty =
3406 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3407 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3408 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3410 // Prepare arguments and build a call to __kmpc_single
3411 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3412 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3413 createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3414 /*Conditional=*/true);
3415 SingleOpGen.setAction(Action);
3416 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3417 if (DidIt.isValid()) {
3419 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3422 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3423 // <copy_func>, did_it);
3424 if (DidIt.isValid()) {
3425 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3426 QualType CopyprivateArrayTy = C.getConstantArrayType(
3427 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
3428 /*IndexTypeQuals=*/0);
3429 // Create a list of all private variables for copyprivate.
3430 Address CopyprivateList =
3431 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3432 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3433 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3434 CGF.Builder.CreateStore(
3435 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3436 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
3440 // Build function that copies private values from single region to all other
3441 // threads in the corresponding parallel region.
3442 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3443 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3444 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3445 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3447 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3449 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3450 llvm::Value *Args[] = {
3451 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3452 getThreadID(CGF, Loc), // i32 <gtid>
3453 BufSize, // size_t <buf_size>
3454 CL.getPointer(), // void *<copyprivate list>
3455 CpyFn, // void (*) (void *, void *) <copy_func>
3456 DidItVal // i32 did_it
3458 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3462 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3463 const RegionCodeGenTy &OrderedOpGen,
3464 SourceLocation Loc, bool IsThreads) {
3465 if (!CGF.HaveInsertPoint())
3467 // __kmpc_ordered(ident_t *, gtid);
3469 // __kmpc_end_ordered(ident_t *, gtid);
3470 // Prepare arguments and build a call to __kmpc_ordered
3472 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3473 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3474 createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3476 OrderedOpGen.setAction(Action);
3477 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3480 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3483 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3485 if (Kind == OMPD_for)
3486 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3487 else if (Kind == OMPD_sections)
3488 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3489 else if (Kind == OMPD_single)
3490 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3491 else if (Kind == OMPD_barrier)
3492 Flags = OMP_IDENT_BARRIER_EXPL;
3494 Flags = OMP_IDENT_BARRIER_IMPL;
3498 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3499 CodeGenFunction &CGF, const OMPLoopDirective &S,
3500 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3501 // Check if the loop directive is actually a doacross loop directive. In this
3502 // case choose static, 1 schedule.
3504 S.getClausesOfKind<OMPOrderedClause>(),
3505 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3506 ScheduleKind = OMPC_SCHEDULE_static;
3507 // Chunk size is 1 in this case.
3508 llvm::APInt ChunkSize(32, 1);
3509 ChunkExpr = IntegerLiteral::Create(
3510 CGF.getContext(), ChunkSize,
3511 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3516 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3517 OpenMPDirectiveKind Kind, bool EmitChecks,
3518 bool ForceSimpleCall) {
3519 // Check if we should use the OMPBuilder
3520 auto *OMPRegionInfo =
3521 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
3522 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
3524 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier(
3525 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
3529 if (!CGF.HaveInsertPoint())
3531 // Build call __kmpc_cancel_barrier(loc, thread_id);
3532 // Build call __kmpc_barrier(loc, thread_id);
3533 unsigned Flags = getDefaultFlagsForBarriers(Kind);
3534 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3536 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3537 getThreadID(CGF, Loc)};
3538 if (OMPRegionInfo) {
3539 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3540 llvm::Value *Result = CGF.EmitRuntimeCall(
3541 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3543 // if (__kmpc_cancel_barrier()) {
3544 // exit from construct;
3546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3547 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3548 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3549 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3550 CGF.EmitBlock(ExitBB);
3551 // exit from construct;
3552 CodeGenFunction::JumpDest CancelDestination =
3553 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3554 CGF.EmitBranchThroughCleanup(CancelDestination);
3555 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3560 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3563 /// Map the OpenMP loop schedule to the runtime enumeration.
3564 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3565 bool Chunked, bool Ordered) {
3566 switch (ScheduleKind) {
3567 case OMPC_SCHEDULE_static:
3568 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3569 : (Ordered ? OMP_ord_static : OMP_sch_static);
3570 case OMPC_SCHEDULE_dynamic:
3571 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3572 case OMPC_SCHEDULE_guided:
3573 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3574 case OMPC_SCHEDULE_runtime:
3575 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3576 case OMPC_SCHEDULE_auto:
3577 return Ordered ? OMP_ord_auto : OMP_sch_auto;
3578 case OMPC_SCHEDULE_unknown:
3579 assert(!Chunked && "chunk was specified but schedule kind not known");
3580 return Ordered ? OMP_ord_static : OMP_sch_static;
3582 llvm_unreachable("Unexpected runtime schedule");
3585 /// Map the OpenMP distribute schedule to the runtime enumeration.
3586 static OpenMPSchedType
3587 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3588 // only static is allowed for dist_schedule
3589 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3592 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3593 bool Chunked) const {
3594 OpenMPSchedType Schedule =
3595 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3596 return Schedule == OMP_sch_static;
3599 bool CGOpenMPRuntime::isStaticNonchunked(
3600 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3601 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3602 return Schedule == OMP_dist_sch_static;
3605 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3606 bool Chunked) const {
3607 OpenMPSchedType Schedule =
3608 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3609 return Schedule == OMP_sch_static_chunked;
3612 bool CGOpenMPRuntime::isStaticChunked(
3613 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3614 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3615 return Schedule == OMP_dist_sch_static_chunked;
3618 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3619 OpenMPSchedType Schedule =
3620 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3621 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3622 return Schedule != OMP_sch_static;
3625 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
3626 OpenMPScheduleClauseModifier M1,
3627 OpenMPScheduleClauseModifier M2) {
3630 case OMPC_SCHEDULE_MODIFIER_monotonic:
3631 Modifier = OMP_sch_modifier_monotonic;
3633 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3634 Modifier = OMP_sch_modifier_nonmonotonic;
3636 case OMPC_SCHEDULE_MODIFIER_simd:
3637 if (Schedule == OMP_sch_static_chunked)
3638 Schedule = OMP_sch_static_balanced_chunked;
3640 case OMPC_SCHEDULE_MODIFIER_last:
3641 case OMPC_SCHEDULE_MODIFIER_unknown:
3645 case OMPC_SCHEDULE_MODIFIER_monotonic:
3646 Modifier = OMP_sch_modifier_monotonic;
3648 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3649 Modifier = OMP_sch_modifier_nonmonotonic;
3651 case OMPC_SCHEDULE_MODIFIER_simd:
3652 if (Schedule == OMP_sch_static_chunked)
3653 Schedule = OMP_sch_static_balanced_chunked;
3655 case OMPC_SCHEDULE_MODIFIER_last:
3656 case OMPC_SCHEDULE_MODIFIER_unknown:
3659 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
3660 // If the static schedule kind is specified or if the ordered clause is
3661 // specified, and if the nonmonotonic modifier is not specified, the effect is
3662 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
3663 // modifier is specified, the effect is as if the nonmonotonic modifier is
3665 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
3666 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
3667 Schedule == OMP_sch_static_balanced_chunked ||
3668 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
3669 Schedule == OMP_dist_sch_static_chunked ||
3670 Schedule == OMP_dist_sch_static))
3671 Modifier = OMP_sch_modifier_nonmonotonic;
3673 return Schedule | Modifier;
3676 void CGOpenMPRuntime::emitForDispatchInit(
3677 CodeGenFunction &CGF, SourceLocation Loc,
3678 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3679 bool Ordered, const DispatchRTInput &DispatchValues) {
3680 if (!CGF.HaveInsertPoint())
3682 OpenMPSchedType Schedule = getRuntimeSchedule(
3683 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3685 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3686 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3687 Schedule != OMP_sch_static_balanced_chunked));
3688 // Call __kmpc_dispatch_init(
3689 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3690 // kmp_int[32|64] lower, kmp_int[32|64] upper,
3691 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3693 // If the Chunk was not specified in the clause - use default value 1.
3694 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3695 : CGF.Builder.getIntN(IVSize, 1);
3696 llvm::Value *Args[] = {
3697 emitUpdateLocation(CGF, Loc),
3698 getThreadID(CGF, Loc),
3699 CGF.Builder.getInt32(addMonoNonMonoModifier(
3700 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3701 DispatchValues.LB, // Lower
3702 DispatchValues.UB, // Upper
3703 CGF.Builder.getIntN(IVSize, 1), // Stride
3706 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3709 static void emitForStaticInitCall(
3710 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3711 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3712 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3713 const CGOpenMPRuntime::StaticRTInput &Values) {
3714 if (!CGF.HaveInsertPoint())
3717 assert(!Values.Ordered);
3718 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3719 Schedule == OMP_sch_static_balanced_chunked ||
3720 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3721 Schedule == OMP_dist_sch_static ||
3722 Schedule == OMP_dist_sch_static_chunked);
3724 // Call __kmpc_for_static_init(
3725 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3726 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3727 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3728 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3729 llvm::Value *Chunk = Values.Chunk;
3730 if (Chunk == nullptr) {
3731 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3732 Schedule == OMP_dist_sch_static) &&
3733 "expected static non-chunked schedule");
3734 // If the Chunk was not specified in the clause - use default value 1.
3735 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3737 assert((Schedule == OMP_sch_static_chunked ||
3738 Schedule == OMP_sch_static_balanced_chunked ||
3739 Schedule == OMP_ord_static_chunked ||
3740 Schedule == OMP_dist_sch_static_chunked) &&
3741 "expected static chunked schedule");
3743 llvm::Value *Args[] = {
3746 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
3747 M2)), // Schedule type
3748 Values.IL.getPointer(), // &isLastIter
3749 Values.LB.getPointer(), // &LB
3750 Values.UB.getPointer(), // &UB
3751 Values.ST.getPointer(), // &Stride
3752 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3755 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3758 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3760 OpenMPDirectiveKind DKind,
3761 const OpenMPScheduleTy &ScheduleKind,
3762 const StaticRTInput &Values) {
3763 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3764 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3765 assert(isOpenMPWorksharingDirective(DKind) &&
3766 "Expected loop-based or sections-based directive.");
3767 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3768 isOpenMPLoopDirective(DKind)
3769 ? OMP_IDENT_WORK_LOOP
3770 : OMP_IDENT_WORK_SECTIONS);
3771 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3772 llvm::FunctionCallee StaticInitFunction =
3773 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3774 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3775 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3778 void CGOpenMPRuntime::emitDistributeStaticInit(
3779 CodeGenFunction &CGF, SourceLocation Loc,
3780 OpenMPDistScheduleClauseKind SchedKind,
3781 const CGOpenMPRuntime::StaticRTInput &Values) {
3782 OpenMPSchedType ScheduleNum =
3783 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3784 llvm::Value *UpdatedLocation =
3785 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3786 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3787 llvm::FunctionCallee StaticInitFunction =
3788 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3789 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3790 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3791 OMPC_SCHEDULE_MODIFIER_unknown, Values);
3794 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3796 OpenMPDirectiveKind DKind) {
3797 if (!CGF.HaveInsertPoint())
3799 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3800 llvm::Value *Args[] = {
3801 emitUpdateLocation(CGF, Loc,
3802 isOpenMPDistributeDirective(DKind)
3803 ? OMP_IDENT_WORK_DISTRIBUTE
3804 : isOpenMPLoopDirective(DKind)
3805 ? OMP_IDENT_WORK_LOOP
3806 : OMP_IDENT_WORK_SECTIONS),
3807 getThreadID(CGF, Loc)};
3808 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3812 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3816 if (!CGF.HaveInsertPoint())
3818 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3819 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3820 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3823 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3824 SourceLocation Loc, unsigned IVSize,
3825 bool IVSigned, Address IL,
3826 Address LB, Address UB,
3828 // Call __kmpc_dispatch_next(
3829 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3830 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3831 // kmp_int[32|64] *p_stride);
3832 llvm::Value *Args[] = {
3833 emitUpdateLocation(CGF, Loc),
3834 getThreadID(CGF, Loc),
3835 IL.getPointer(), // &isLastIter
3836 LB.getPointer(), // &Lower
3837 UB.getPointer(), // &Upper
3838 ST.getPointer() // &Stride
3841 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3842 return CGF.EmitScalarConversion(
3843 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3844 CGF.getContext().BoolTy, Loc);
3847 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3848 llvm::Value *NumThreads,
3849 SourceLocation Loc) {
3850 if (!CGF.HaveInsertPoint())
3852 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3853 llvm::Value *Args[] = {
3854 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3855 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3856 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3860 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3861 ProcBindKind ProcBind,
3862 SourceLocation Loc) {
3863 if (!CGF.HaveInsertPoint())
3865 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
3866 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3867 llvm::Value *Args[] = {
3868 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3869 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
3870 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3873 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3874 SourceLocation Loc) {
3875 if (!CGF.HaveInsertPoint())
3877 // Build call void __kmpc_flush(ident_t *loc)
3878 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3879 emitUpdateLocation(CGF, Loc));
3883 /// Indexes of fields for type kmp_task_t.
3884 enum KmpTaskTFields {
3885 /// List of shared variables.
3889 /// Partition id for the untied tasks.
3891 /// Function with call of destructors for private variables.
3895 /// (Taskloops only) Lower bound.
3897 /// (Taskloops only) Upper bound.
3899 /// (Taskloops only) Stride.
3901 /// (Taskloops only) Is last iteration flag.
3903 /// (Taskloops only) Reduction data.
3906 } // anonymous namespace
3908 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3909 return OffloadEntriesTargetRegion.empty() &&
3910 OffloadEntriesDeviceGlobalVar.empty();
3913 /// Initialize target region entry.
3914 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3915 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3916 StringRef ParentName, unsigned LineNum,
3918 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3919 "only required for the device "
3920 "code generation.");
3921 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3922 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3923 OMPTargetRegionEntryTargetRegion);
3924 ++OffloadingEntriesNum;
3927 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3928 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3929 StringRef ParentName, unsigned LineNum,
3930 llvm::Constant *Addr, llvm::Constant *ID,
3931 OMPTargetRegionEntryKind Flags) {
3932 // If we are emitting code for a target, the entry is already initialized,
3933 // only has to be registered.
3934 if (CGM.getLangOpts().OpenMPIsDevice) {
3935 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3936 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3937 DiagnosticsEngine::Error,
3938 "Unable to find target region on line '%0' in the device code.");
3939 CGM.getDiags().Report(DiagID) << LineNum;
3943 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3944 assert(Entry.isValid() && "Entry not initialized!");
3945 Entry.setAddress(Addr);
3947 Entry.setFlags(Flags);
3949 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3950 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3951 ++OffloadingEntriesNum;
3955 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3956 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3957 unsigned LineNum) const {
3958 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3959 if (PerDevice == OffloadEntriesTargetRegion.end())
3961 auto PerFile = PerDevice->second.find(FileID);
3962 if (PerFile == PerDevice->second.end())
3964 auto PerParentName = PerFile->second.find(ParentName);
3965 if (PerParentName == PerFile->second.end())
3967 auto PerLine = PerParentName->second.find(LineNum);
3968 if (PerLine == PerParentName->second.end())
3970 // Fail if this entry is already registered.
3971 if (PerLine->second.getAddress() || PerLine->second.getID())
3976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3977 const OffloadTargetRegionEntryInfoActTy &Action) {
3978 // Scan all target region entries and perform the provided action.
3979 for (const auto &D : OffloadEntriesTargetRegion)
3980 for (const auto &F : D.second)
3981 for (const auto &P : F.second)
3982 for (const auto &L : P.second)
3983 Action(D.first, F.first, P.first(), L.first, L.second);
3986 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3987 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3988 OMPTargetGlobalVarEntryKind Flags,
3990 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3991 "only required for the device "
3992 "code generation.");
3993 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3994 ++OffloadingEntriesNum;
3997 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3998 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
4000 OMPTargetGlobalVarEntryKind Flags,
4001 llvm::GlobalValue::LinkageTypes Linkage) {
4002 if (CGM.getLangOpts().OpenMPIsDevice) {
4003 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4004 assert(Entry.isValid() && Entry.getFlags() == Flags &&
4005 "Entry not initialized!");
4006 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4007 "Resetting with the new address.");
4008 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
4009 if (Entry.getVarSize().isZero()) {
4010 Entry.setVarSize(VarSize);
4011 Entry.setLinkage(Linkage);
4015 Entry.setVarSize(VarSize);
4016 Entry.setLinkage(Linkage);
4017 Entry.setAddress(Addr);
4019 if (hasDeviceGlobalVarEntryInfo(VarName)) {
4020 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
4021 assert(Entry.isValid() && Entry.getFlags() == Flags &&
4022 "Entry not initialized!");
4023 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
4024 "Resetting with the new address.");
4025 if (Entry.getVarSize().isZero()) {
4026 Entry.setVarSize(VarSize);
4027 Entry.setLinkage(Linkage);
4031 OffloadEntriesDeviceGlobalVar.try_emplace(
4032 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
4033 ++OffloadingEntriesNum;
4037 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
4038 actOnDeviceGlobalVarEntriesInfo(
4039 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
4040 // Scan all target region entries and perform the provided action.
4041 for (const auto &E : OffloadEntriesDeviceGlobalVar)
4042 Action(E.getKey(), E.getValue());
4045 void CGOpenMPRuntime::createOffloadEntry(
4046 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4047 llvm::GlobalValue::LinkageTypes Linkage) {
4048 StringRef Name = Addr->getName();
4049 llvm::Module &M = CGM.getModule();
4050 llvm::LLVMContext &C = M.getContext();
4052 // Create constant string with the name.
4053 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4055 std::string StringName = getName({"omp_offloading", "entry_name"});
4056 auto *Str = new llvm::GlobalVariable(
4057 M, StrPtrInit->getType(), /*isConstant=*/true,
4058 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4059 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4061 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4062 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4063 llvm::ConstantInt::get(CGM.SizeTy, Size),
4064 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4065 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4066 std::string EntryName = getName({"omp_offloading", "entry", ""});
4067 llvm::GlobalVariable *Entry = createGlobalStruct(
4068 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4069 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4071 // The entry has to be created in the section the linker expects it to be.
4072 Entry->setSection("omp_offloading_entries");
4075 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4076 // Emit the offloading entries and metadata so that the device codegen side
4077 // can easily figure out what to emit. The produced metadata looks like
4080 // !omp_offload.info = !{!1, ...}
4082 // Right now we only generate metadata for function that contain target
4085 // If we are in simd mode or there are no entries, we don't need to do
4087 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
4090 llvm::Module &M = CGM.getModule();
4091 llvm::LLVMContext &C = M.getContext();
4092 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
4093 SourceLocation, StringRef>,
4095 OrderedEntries(OffloadEntriesInfoManager.size());
4096 llvm::SmallVector<StringRef, 16> ParentFunctions(
4097 OffloadEntriesInfoManager.size());
4099 // Auxiliary methods to create metadata values and strings.
4100 auto &&GetMDInt = [this](unsigned V) {
4101 return llvm::ConstantAsMetadata::get(
4102 llvm::ConstantInt::get(CGM.Int32Ty, V));
4105 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4107 // Create the offloading info metadata node.
4108 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4110 // Create function that emits metadata for each target region entry;
4111 auto &&TargetRegionMetadataEmitter =
4112 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
4114 unsigned DeviceID, unsigned FileID, StringRef ParentName,
4116 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4117 // Generate metadata for target regions. Each entry of this metadata
4119 // - Entry 0 -> Kind of this type of metadata (0).
4120 // - Entry 1 -> Device ID of the file where the entry was identified.
4121 // - Entry 2 -> File ID of the file where the entry was identified.
4122 // - Entry 3 -> Mangled name of the function where the entry was
4124 // - Entry 4 -> Line in the file where the entry was identified.
4125 // - Entry 5 -> Order the entry was created.
4126 // The first element of the metadata node is the kind.
4127 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4128 GetMDInt(FileID), GetMDString(ParentName),
4129 GetMDInt(Line), GetMDInt(E.getOrder())};
4132 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
4133 E = CGM.getContext().getSourceManager().fileinfo_end();
4135 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
4136 I->getFirst()->getUniqueID().getFile() == FileID) {
4137 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
4138 I->getFirst(), Line, 1);
4142 // Save this entry in the right position of the ordered entries array.
4143 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
4144 ParentFunctions[E.getOrder()] = ParentName;
4146 // Add metadata to the named metadata node.
4147 MD->addOperand(llvm::MDNode::get(C, Ops));
4150 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4151 TargetRegionMetadataEmitter);
4153 // Create function that emits metadata for each device global variable entry;
4154 auto &&DeviceGlobalVarMetadataEmitter =
4155 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4156 MD](StringRef MangledName,
4157 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4159 // Generate metadata for global variables. Each entry of this metadata
4161 // - Entry 0 -> Kind of this type of metadata (1).
4162 // - Entry 1 -> Mangled name of the variable.
4163 // - Entry 2 -> Declare target kind.
4164 // - Entry 3 -> Order the entry was created.
4165 // The first element of the metadata node is the kind.
4166 llvm::Metadata *Ops[] = {
4167 GetMDInt(E.getKind()), GetMDString(MangledName),
4168 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4170 // Save this entry in the right position of the ordered entries array.
4171 OrderedEntries[E.getOrder()] =
4172 std::make_tuple(&E, SourceLocation(), MangledName);
4174 // Add metadata to the named metadata node.
4175 MD->addOperand(llvm::MDNode::get(C, Ops));
4178 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4179 DeviceGlobalVarMetadataEmitter);
4181 for (const auto &E : OrderedEntries) {
4182 assert(std::get<0>(E) && "All ordered entries must exist!");
4183 if (const auto *CE =
4184 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4186 if (!CE->getID() || !CE->getAddress()) {
4187 // Do not blame the entry if the parent funtion is not emitted.
4188 StringRef FnName = ParentFunctions[CE->getOrder()];
4189 if (!CGM.GetGlobalValue(FnName))
4191 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4192 DiagnosticsEngine::Error,
4193 "Offloading entry for target region in %0 is incorrect: either the "
4194 "address or the ID is invalid.");
4195 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
4198 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4199 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4200 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
4201 OffloadEntryInfoDeviceGlobalVar>(
4203 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4204 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4207 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4208 if (CGM.getLangOpts().OpenMPIsDevice &&
4209 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4211 if (!CE->getAddress()) {
4212 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4213 DiagnosticsEngine::Error, "Offloading entry for declare target "
4214 "variable %0 is incorrect: the "
4215 "address is invalid.");
4216 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
4219 // The vaiable has no definition - no need to add the entry.
4220 if (CE->getVarSize().isZero())
4224 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4225 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4226 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4227 "Declaret target link address is set.");
4228 if (CGM.getLangOpts().OpenMPIsDevice)
4230 if (!CE->getAddress()) {
4231 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4232 DiagnosticsEngine::Error,
4233 "Offloading entry for declare target variable is incorrect: the "
4234 "address is invalid.");
4235 CGM.getDiags().Report(DiagID);
4240 createOffloadEntry(CE->getAddress(), CE->getAddress(),
4241 CE->getVarSize().getQuantity(), Flags,
4244 llvm_unreachable("Unsupported entry kind.");
4249 /// Loads all the offload entries information from the host IR
4251 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4252 // If we are in target mode, load the metadata from the host IR. This code has
4253 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4255 if (!CGM.getLangOpts().OpenMPIsDevice)
4258 if (CGM.getLangOpts().OMPHostIRFile.empty())
4261 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4262 if (auto EC = Buf.getError()) {
4263 CGM.getDiags().Report(diag::err_cannot_open_file)
4264 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4268 llvm::LLVMContext C;
4269 auto ME = expectedToErrorOrAndEmitErrors(
4270 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4272 if (auto EC = ME.getError()) {
4273 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4274 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4275 CGM.getDiags().Report(DiagID)
4276 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4280 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4284 for (llvm::MDNode *MN : MD->operands()) {
4285 auto &&GetMDInt = [MN](unsigned Idx) {
4286 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4287 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4290 auto &&GetMDString = [MN](unsigned Idx) {
4291 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4292 return V->getString();
4295 switch (GetMDInt(0)) {
4297 llvm_unreachable("Unexpected metadata!");
4299 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300 OffloadingEntryInfoTargetRegion:
4301 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4302 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4303 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4304 /*Order=*/GetMDInt(5));
4306 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4307 OffloadingEntryInfoDeviceGlobalVar:
4308 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4309 /*MangledName=*/GetMDString(1),
4310 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4311 /*Flags=*/GetMDInt(2)),
4312 /*Order=*/GetMDInt(3));
4318 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4319 if (!KmpRoutineEntryPtrTy) {
4320 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4321 ASTContext &C = CGM.getContext();
4322 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4323 FunctionProtoType::ExtProtoInfo EPI;
4324 KmpRoutineEntryPtrQTy = C.getPointerType(
4325 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4326 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4330 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4331 // Make sure the type of the entry is already created. This is the type we
4333 // struct __tgt_offload_entry{
4334 // void *addr; // Pointer to the offload entry info.
4335 // // (function or global)
4336 // char *name; // Name of the function or global.
4337 // size_t size; // Size of the entry info (0 if it a function).
4338 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4339 // int32_t reserved; // Reserved, to use by the runtime library.
4341 if (TgtOffloadEntryQTy.isNull()) {
4342 ASTContext &C = CGM.getContext();
4343 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4344 RD->startDefinition();
4345 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4346 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4347 addFieldToRecordDecl(C, RD, C.getSizeType());
4348 addFieldToRecordDecl(
4349 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4350 addFieldToRecordDecl(
4351 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4352 RD->completeDefinition();
4353 RD->addAttr(PackedAttr::CreateImplicit(C));
4354 TgtOffloadEntryQTy = C.getRecordType(RD);
4356 return TgtOffloadEntryQTy;
4360 struct PrivateHelpersTy {
4361 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4362 const VarDecl *PrivateElemInit)
4363 : Original(Original), PrivateCopy(PrivateCopy),
4364 PrivateElemInit(PrivateElemInit) {}
4365 const VarDecl *Original;
4366 const VarDecl *PrivateCopy;
4367 const VarDecl *PrivateElemInit;
4369 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4370 } // anonymous namespace
4373 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4374 if (!Privates.empty()) {
4375 ASTContext &C = CGM.getContext();
4376 // Build struct .kmp_privates_t. {
4377 // /* private vars */
4379 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4380 RD->startDefinition();
4381 for (const auto &Pair : Privates) {
4382 const VarDecl *VD = Pair.second.Original;
4383 QualType Type = VD->getType().getNonReferenceType();
4384 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4385 if (VD->hasAttrs()) {
4386 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4387 E(VD->getAttrs().end());
4392 RD->completeDefinition();
4399 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4400 QualType KmpInt32Ty,
4401 QualType KmpRoutineEntryPointerQTy) {
4402 ASTContext &C = CGM.getContext();
4403 // Build struct kmp_task_t {
4405 // kmp_routine_entry_t routine;
4406 // kmp_int32 part_id;
4407 // kmp_cmplrdata_t data1;
4408 // kmp_cmplrdata_t data2;
4409 // For taskloops additional fields:
4414 // void * reductions;
4416 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4417 UD->startDefinition();
4418 addFieldToRecordDecl(C, UD, KmpInt32Ty);
4419 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4420 UD->completeDefinition();
4421 QualType KmpCmplrdataTy = C.getRecordType(UD);
4422 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4423 RD->startDefinition();
4424 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4425 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4426 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4427 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4428 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4429 if (isOpenMPTaskLoopDirective(Kind)) {
4430 QualType KmpUInt64Ty =
4431 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4432 QualType KmpInt64Ty =
4433 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4434 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4435 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4436 addFieldToRecordDecl(C, RD, KmpInt64Ty);
4437 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4438 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4440 RD->completeDefinition();
4445 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4446 ArrayRef<PrivateDataTy> Privates) {
4447 ASTContext &C = CGM.getContext();
4448 // Build struct kmp_task_t_with_privates {
4449 // kmp_task_t task_data;
4450 // .kmp_privates_t. privates;
4452 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4453 RD->startDefinition();
4454 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4455 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4456 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4457 RD->completeDefinition();
4461 /// Emit a proxy function which accepts kmp_task_t as the second
4464 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4465 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4467 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4468 /// tt->reductions, tt->shareds);
4472 static llvm::Function *
4473 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4474 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4475 QualType KmpTaskTWithPrivatesPtrQTy,
4476 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4477 QualType SharedsPtrTy, llvm::Function *TaskFunction,
4478 llvm::Value *TaskPrivatesMap) {
4479 ASTContext &C = CGM.getContext();
4480 FunctionArgList Args;
4481 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4482 ImplicitParamDecl::Other);
4483 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4484 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4485 ImplicitParamDecl::Other);
4486 Args.push_back(&GtidArg);
4487 Args.push_back(&TaskTypeArg);
4488 const auto &TaskEntryFnInfo =
4489 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4490 llvm::FunctionType *TaskEntryTy =
4491 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4492 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4493 auto *TaskEntry = llvm::Function::Create(
4494 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4495 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4496 TaskEntry->setDoesNotRecurse();
4497 CodeGenFunction CGF(CGM);
4498 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4501 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4504 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4505 // tt->task_data.shareds);
4506 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4507 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4508 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4509 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4510 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4511 const auto *KmpTaskTWithPrivatesQTyRD =
4512 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4514 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4515 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4516 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4517 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4518 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
4520 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4521 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4522 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4524 CGF.ConvertTypeForMem(SharedsPtrTy));
4526 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4527 llvm::Value *PrivatesParam;
4528 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4529 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4530 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4531 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
4533 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4536 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4539 .CreatePointerBitCastOrAddrSpaceCast(
4540 TDBase.getAddress(CGF), CGF.VoidPtrTy)
4542 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4543 std::end(CommonArgs));
4544 if (isOpenMPTaskLoopDirective(Kind)) {
4545 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4546 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4547 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4548 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4549 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4550 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4551 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4552 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4553 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4554 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4555 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4556 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4557 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4558 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4559 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4560 CallArgs.push_back(LBParam);
4561 CallArgs.push_back(UBParam);
4562 CallArgs.push_back(StParam);
4563 CallArgs.push_back(LIParam);
4564 CallArgs.push_back(RParam);
4566 CallArgs.push_back(SharedsParam);
4568 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4570 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4571 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4572 CGF.FinishFunction();
4576 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4578 QualType KmpInt32Ty,
4579 QualType KmpTaskTWithPrivatesPtrQTy,
4580 QualType KmpTaskTWithPrivatesQTy) {
4581 ASTContext &C = CGM.getContext();
4582 FunctionArgList Args;
4583 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4584 ImplicitParamDecl::Other);
4585 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4586 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4587 ImplicitParamDecl::Other);
4588 Args.push_back(&GtidArg);
4589 Args.push_back(&TaskTypeArg);
4590 const auto &DestructorFnInfo =
4591 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4592 llvm::FunctionType *DestructorFnTy =
4593 CGM.getTypes().GetFunctionType(DestructorFnInfo);
4595 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4596 auto *DestructorFn =
4597 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4598 Name, &CGM.getModule());
4599 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4601 DestructorFn->setDoesNotRecurse();
4602 CodeGenFunction CGF(CGM);
4603 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4606 LValue Base = CGF.EmitLoadOfPointerLValue(
4607 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4608 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4609 const auto *KmpTaskTWithPrivatesQTyRD =
4610 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4611 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4612 Base = CGF.EmitLValueForField(Base, *FI);
4613 for (const auto *Field :
4614 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4615 if (QualType::DestructionKind DtorKind =
4616 Field->getType().isDestructedType()) {
4617 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4618 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
4621 CGF.FinishFunction();
4622 return DestructorFn;
4625 /// Emit a privates mapping function for correct handling of private and
4626 /// firstprivate variables.
4628 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4629 /// **noalias priv1,..., <tyn> **noalias privn) {
4630 /// *priv1 = &.privates.priv1;
4632 /// *privn = &.privates.privn;
4635 static llvm::Value *
4636 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4637 ArrayRef<const Expr *> PrivateVars,
4638 ArrayRef<const Expr *> FirstprivateVars,
4639 ArrayRef<const Expr *> LastprivateVars,
4640 QualType PrivatesQTy,
4641 ArrayRef<PrivateDataTy> Privates) {
4642 ASTContext &C = CGM.getContext();
4643 FunctionArgList Args;
4644 ImplicitParamDecl TaskPrivatesArg(
4645 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4646 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4647 ImplicitParamDecl::Other);
4648 Args.push_back(&TaskPrivatesArg);
4649 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4650 unsigned Counter = 1;
4651 for (const Expr *E : PrivateVars) {
4652 Args.push_back(ImplicitParamDecl::Create(
4653 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4654 C.getPointerType(C.getPointerType(E->getType()))
4657 ImplicitParamDecl::Other));
4658 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4659 PrivateVarsPos[VD] = Counter;
4662 for (const Expr *E : FirstprivateVars) {
4663 Args.push_back(ImplicitParamDecl::Create(
4664 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4665 C.getPointerType(C.getPointerType(E->getType()))
4668 ImplicitParamDecl::Other));
4669 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4670 PrivateVarsPos[VD] = Counter;
4673 for (const Expr *E : LastprivateVars) {
4674 Args.push_back(ImplicitParamDecl::Create(
4675 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4676 C.getPointerType(C.getPointerType(E->getType()))
4679 ImplicitParamDecl::Other));
4680 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4681 PrivateVarsPos[VD] = Counter;
4684 const auto &TaskPrivatesMapFnInfo =
4685 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4686 llvm::FunctionType *TaskPrivatesMapTy =
4687 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4689 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4690 auto *TaskPrivatesMap = llvm::Function::Create(
4691 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4693 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4694 TaskPrivatesMapFnInfo);
4695 if (CGM.getLangOpts().Optimize) {
4696 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4697 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4698 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4700 CodeGenFunction CGF(CGM);
4701 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4702 TaskPrivatesMapFnInfo, Args, Loc, Loc);
4704 // *privi = &.privates.privi;
4705 LValue Base = CGF.EmitLoadOfPointerLValue(
4706 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4707 TaskPrivatesArg.getType()->castAs<PointerType>());
4708 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4710 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4711 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4712 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4714 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4715 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4716 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
4717 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
4720 CGF.FinishFunction();
4721 return TaskPrivatesMap;
4724 /// Emit initialization for private variables in task-based directives.
4725 static void emitPrivatesInit(CodeGenFunction &CGF,
4726 const OMPExecutableDirective &D,
4727 Address KmpTaskSharedsPtr, LValue TDBase,
4728 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4729 QualType SharedsTy, QualType SharedsPtrTy,
4730 const OMPTaskDataTy &Data,
4731 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4732 ASTContext &C = CGF.getContext();
4733 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4734 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4735 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4738 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4739 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4742 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4743 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4744 // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4745 // PointersArray and SizesArray. The original variables for these arrays are
4746 // not captured and we get their addresses explicitly.
4747 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4748 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4749 SrcBase = CGF.MakeAddrLValue(
4750 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4751 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4754 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4755 for (const PrivateDataTy &Pair : Privates) {
4756 const VarDecl *VD = Pair.second.PrivateCopy;
4757 const Expr *Init = VD->getAnyInitializer();
4758 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4759 !CGF.isTrivialInitializer(Init)))) {
4760 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4761 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4762 const VarDecl *OriginalVD = Pair.second.Original;
4763 // Check if the variable is the target-based BasePointersArray,
4764 // PointersArray or SizesArray.
4765 LValue SharedRefLValue;
4766 QualType Type = PrivateLValue.getType();
4767 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4768 if (IsTargetTask && !SharedField) {
4769 assert(isa<ImplicitParamDecl>(OriginalVD) &&
4770 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4771 cast<CapturedDecl>(OriginalVD->getDeclContext())
4772 ->getNumParams() == 0 &&
4773 isa<TranslationUnitDecl>(
4774 cast<CapturedDecl>(OriginalVD->getDeclContext())
4775 ->getDeclContext()) &&
4776 "Expected artificial target data variable.");
4778 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4780 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4781 SharedRefLValue = CGF.MakeAddrLValue(
4782 Address(SharedRefLValue.getPointer(CGF),
4783 C.getDeclAlign(OriginalVD)),
4784 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4785 SharedRefLValue.getTBAAInfo());
4787 if (Type->isArrayType()) {
4788 // Initialize firstprivate array.
4789 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4790 // Perform simple memcpy.
4791 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4793 // Initialize firstprivate array using element-by-element
4795 CGF.EmitOMPAggregateAssign(
4796 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
4798 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4799 Address SrcElement) {
4800 // Clean up any temporaries needed by the initialization.
4801 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4802 InitScope.addPrivate(
4803 Elem, [SrcElement]() -> Address { return SrcElement; });
4804 (void)InitScope.Privatize();
4805 // Emit initialization for single element.
4806 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4807 CGF, &CapturesInfo);
4808 CGF.EmitAnyExprToMem(Init, DestElement,
4809 Init->getType().getQualifiers(),
4810 /*IsInitializer=*/false);
4814 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4815 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
4816 return SharedRefLValue.getAddress(CGF);
4818 (void)InitScope.Privatize();
4819 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4820 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4821 /*capturedByInit=*/false);
4824 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4831 /// Check if duplication function is required for taskloops.
4832 static bool checkInitIsRequired(CodeGenFunction &CGF,
4833 ArrayRef<PrivateDataTy> Privates) {
4834 bool InitRequired = false;
4835 for (const PrivateDataTy &Pair : Privates) {
4836 const VarDecl *VD = Pair.second.PrivateCopy;
4837 const Expr *Init = VD->getAnyInitializer();
4838 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4839 !CGF.isTrivialInitializer(Init));
4843 return InitRequired;
4847 /// Emit task_dup function (for initialization of
4848 /// private/firstprivate/lastprivate vars and last_iter flag)
4850 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4852 /// // setup lastprivate flag
4853 /// task_dst->last = lastpriv;
4854 /// // could be constructor calls here...
4857 static llvm::Value *
4858 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4859 const OMPExecutableDirective &D,
4860 QualType KmpTaskTWithPrivatesPtrQTy,
4861 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4862 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4863 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4864 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4865 ASTContext &C = CGM.getContext();
4866 FunctionArgList Args;
4867 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4868 KmpTaskTWithPrivatesPtrQTy,
4869 ImplicitParamDecl::Other);
4870 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4871 KmpTaskTWithPrivatesPtrQTy,
4872 ImplicitParamDecl::Other);
4873 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4874 ImplicitParamDecl::Other);
4875 Args.push_back(&DstArg);
4876 Args.push_back(&SrcArg);
4877 Args.push_back(&LastprivArg);
4878 const auto &TaskDupFnInfo =
4879 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4880 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4881 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4882 auto *TaskDup = llvm::Function::Create(
4883 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4884 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4885 TaskDup->setDoesNotRecurse();
4886 CodeGenFunction CGF(CGM);
4887 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4890 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4891 CGF.GetAddrOfLocalVar(&DstArg),
4892 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4893 // task_dst->liter = lastpriv;
4895 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4896 LValue Base = CGF.EmitLValueForField(
4897 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4898 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4899 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4900 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4901 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4904 // Emit initial values for private copies (if any).
4905 assert(!Privates.empty());
4906 Address KmpTaskSharedsPtr = Address::invalid();
4907 if (!Data.FirstprivateVars.empty()) {
4908 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4909 CGF.GetAddrOfLocalVar(&SrcArg),
4910 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4911 LValue Base = CGF.EmitLValueForField(
4912 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4913 KmpTaskSharedsPtr = Address(
4914 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4915 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4918 CGF.getNaturalTypeAlignment(SharedsTy));
4920 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4921 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4922 CGF.FinishFunction();
4926 /// Checks if destructor function is required to be generated.
4927 /// \return true if cleanups are required, false otherwise.
4929 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4930 bool NeedsCleanup = false;
4931 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4932 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4933 for (const FieldDecl *FD : PrivateRD->fields()) {
4934 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4938 return NeedsCleanup;
4941 CGOpenMPRuntime::TaskResultTy
4942 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4943 const OMPExecutableDirective &D,
4944 llvm::Function *TaskFunction, QualType SharedsTy,
4945 Address Shareds, const OMPTaskDataTy &Data) {
4946 ASTContext &C = CGM.getContext();
4947 llvm::SmallVector<PrivateDataTy, 4> Privates;
4948 // Aggregate privates and sort them by the alignment.
4949 auto I = Data.PrivateCopies.begin();
4950 for (const Expr *E : Data.PrivateVars) {
4951 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4952 Privates.emplace_back(
4954 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4955 /*PrivateElemInit=*/nullptr));
4958 I = Data.FirstprivateCopies.begin();
4959 auto IElemInitRef = Data.FirstprivateInits.begin();
4960 for (const Expr *E : Data.FirstprivateVars) {
4961 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4962 Privates.emplace_back(
4965 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4966 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4970 I = Data.LastprivateCopies.begin();
4971 for (const Expr *E : Data.LastprivateVars) {
4972 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4973 Privates.emplace_back(
4975 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4976 /*PrivateElemInit=*/nullptr));
4979 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
4980 return L.first > R.first;
4982 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4983 // Build type kmp_routine_entry_t (if not built yet).
4984 emitKmpRoutineEntryT(KmpInt32Ty);
4985 // Build type kmp_task_t (if not built yet).
4986 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4987 if (SavedKmpTaskloopTQTy.isNull()) {
4988 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4989 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4991 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4993 assert((D.getDirectiveKind() == OMPD_task ||
4994 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
4995 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
4996 "Expected taskloop, task or target directive");
4997 if (SavedKmpTaskTQTy.isNull()) {
4998 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4999 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5001 KmpTaskTQTy = SavedKmpTaskTQTy;
5003 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5004 // Build particular struct kmp_task_t for the given task.
5005 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5006 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5007 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5008 QualType KmpTaskTWithPrivatesPtrQTy =
5009 C.getPointerType(KmpTaskTWithPrivatesQTy);
5010 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5011 llvm::Type *KmpTaskTWithPrivatesPtrTy =
5012 KmpTaskTWithPrivatesTy->getPointerTo();
5013 llvm::Value *KmpTaskTWithPrivatesTySize =
5014 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5015 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5017 // Emit initial values for private copies (if any).
5018 llvm::Value *TaskPrivatesMap = nullptr;
5019 llvm::Type *TaskPrivatesMapTy =
5020 std::next(TaskFunction->arg_begin(), 3)->getType();
5021 if (!Privates.empty()) {
5022 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5023 TaskPrivatesMap = emitTaskPrivateMappingFunction(
5024 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5025 FI->getType(), Privates);
5026 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5027 TaskPrivatesMap, TaskPrivatesMapTy);
5029 TaskPrivatesMap = llvm::ConstantPointerNull::get(
5030 cast<llvm::PointerType>(TaskPrivatesMapTy));
5032 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5034 llvm::Function *TaskEntry = emitProxyTaskFunction(
5035 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5036 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5039 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5040 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5041 // kmp_routine_entry_t *task_entry);
5042 // Task flags. Format is taken from
5043 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5044 // description of kmp_tasking_flags struct.
5048 DestructorsFlag = 0x8,
5051 unsigned Flags = Data.Tied ? TiedFlag : 0;
5052 bool NeedsCleanup = false;
5053 if (!Privates.empty()) {
5054 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5056 Flags = Flags | DestructorsFlag;
5058 if (Data.Priority.getInt())
5059 Flags = Flags | PriorityFlag;
5060 llvm::Value *TaskFlags =
5061 Data.Final.getPointer()
5062 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5063 CGF.Builder.getInt32(FinalFlag),
5064 CGF.Builder.getInt32(/*C=*/0))
5065 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5066 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5067 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5068 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5069 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5070 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5071 TaskEntry, KmpRoutineEntryPtrTy)};
5072 llvm::Value *NewTask;
5073 if (D.hasClausesOfKind<OMPNowaitClause>()) {
5074 // Check if we have any device clause associated with the directive.
5075 const Expr *Device = nullptr;
5076 if (auto *C = D.getSingleClause<OMPDeviceClause>())
5077 Device = C->getDevice();
5078 // Emit device ID if any otherwise use default value.
5079 llvm::Value *DeviceID;
5081 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5082 CGF.Int64Ty, /*isSigned=*/true);
5084 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5085 AllocArgs.push_back(DeviceID);
5086 NewTask = CGF.EmitRuntimeCall(
5087 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5089 NewTask = CGF.EmitRuntimeCall(
5090 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5092 llvm::Value *NewTaskNewTaskTTy =
5093 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5094 NewTask, KmpTaskTWithPrivatesPtrTy);
5095 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5096 KmpTaskTWithPrivatesQTy);
5098 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5099 // Fill the data in the resulting kmp_task_t record.
5100 // Copy shareds if there are any.
5101 Address KmpTaskSharedsPtr = Address::invalid();
5102 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5104 Address(CGF.EmitLoadOfScalar(
5105 CGF.EmitLValueForField(
5106 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5109 CGF.getNaturalTypeAlignment(SharedsTy));
5110 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5111 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5112 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5114 // Emit initial values for private copies (if any).
5115 TaskResultTy Result;
5116 if (!Privates.empty()) {
5117 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5118 SharedsTy, SharedsPtrTy, Data, Privates,
5120 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5121 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5122 Result.TaskDupFn = emitTaskDupFunction(
5123 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5124 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5125 /*WithLastIter=*/!Data.LastprivateVars.empty());
5128 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5129 enum { Priority = 0, Destructors = 1 };
5130 // Provide pointer to function with destructors for privates.
5131 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5132 const RecordDecl *KmpCmplrdataUD =
5133 (*FI)->getType()->getAsUnionType()->getDecl();
5135 llvm::Value *DestructorFn = emitDestructorsFunction(
5136 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5137 KmpTaskTWithPrivatesQTy);
5138 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5139 LValue DestructorsLV = CGF.EmitLValueForField(
5140 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5141 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5142 DestructorFn, KmpRoutineEntryPtrTy),
5146 if (Data.Priority.getInt()) {
5147 LValue Data2LV = CGF.EmitLValueForField(
5148 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5149 LValue PriorityLV = CGF.EmitLValueForField(
5150 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5151 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5153 Result.NewTask = NewTask;
5154 Result.TaskEntry = TaskEntry;
5155 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5156 Result.TDBase = TDBase;
5157 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5161 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5162 const OMPExecutableDirective &D,
5163 llvm::Function *TaskFunction,
5164 QualType SharedsTy, Address Shareds,
5166 const OMPTaskDataTy &Data) {
5167 if (!CGF.HaveInsertPoint())
5170 TaskResultTy Result =
5171 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5172 llvm::Value *NewTask = Result.NewTask;
5173 llvm::Function *TaskEntry = Result.TaskEntry;
5174 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5175 LValue TDBase = Result.TDBase;
5176 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5177 ASTContext &C = CGM.getContext();
5178 // Process list of dependences.
5179 Address DependenciesArray = Address::invalid();
5180 unsigned NumDependencies = Data.Dependences.size();
5181 if (NumDependencies) {
5182 // Dependence kind for RTL.
5183 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5184 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5185 RecordDecl *KmpDependInfoRD;
5187 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5188 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5189 if (KmpDependInfoTy.isNull()) {
5190 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5191 KmpDependInfoRD->startDefinition();
5192 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5193 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5194 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5195 KmpDependInfoRD->completeDefinition();
5196 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5198 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5200 // Define type kmp_depend_info[<Dependences.size()>];
5201 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5202 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5203 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5204 // kmp_depend_info[<Dependences.size()>] deps;
5206 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5207 for (unsigned I = 0; I < NumDependencies; ++I) {
5208 const Expr *E = Data.Dependences[I].second;
5209 LValue Addr = CGF.EmitLValue(E);
5211 QualType Ty = E->getType();
5212 if (const auto *ASE =
5213 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5215 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5216 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
5217 UpAddrLVal.getPointer(CGF), /*Idx0=*/1);
5218 llvm::Value *LowIntPtr =
5219 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy);
5220 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5221 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5223 Size = CGF.getTypeSize(Ty);
5225 LValue Base = CGF.MakeAddrLValue(
5226 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5228 // deps[i].base_addr = &<Dependences[i].second>;
5229 LValue BaseAddrLVal = CGF.EmitLValueForField(
5230 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5231 CGF.EmitStoreOfScalar(
5232 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy),
5234 // deps[i].len = sizeof(<Dependences[i].second>);
5235 LValue LenLVal = CGF.EmitLValueForField(
5236 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5237 CGF.EmitStoreOfScalar(Size, LenLVal);
5238 // deps[i].flags = <Dependences[i].first>;
5239 RTLDependenceKindTy DepKind;
5240 switch (Data.Dependences[I].first) {
5241 case OMPC_DEPEND_in:
5244 // Out and InOut dependencies must use the same code.
5245 case OMPC_DEPEND_out:
5246 case OMPC_DEPEND_inout:
5249 case OMPC_DEPEND_mutexinoutset:
5250 DepKind = DepMutexInOutSet;
5252 case OMPC_DEPEND_source:
5253 case OMPC_DEPEND_sink:
5254 case OMPC_DEPEND_unknown:
5255 llvm_unreachable("Unknown task dependence type");
5257 LValue FlagsLVal = CGF.EmitLValueForField(
5258 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5259 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5262 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5263 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5266 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5268 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5269 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5270 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5271 // list is not empty
5272 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5273 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5274 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5275 llvm::Value *DepTaskArgs[7];
5276 if (NumDependencies) {
5277 DepTaskArgs[0] = UpLoc;
5278 DepTaskArgs[1] = ThreadID;
5279 DepTaskArgs[2] = NewTask;
5280 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5281 DepTaskArgs[4] = DependenciesArray.getPointer();
5282 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5283 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5285 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5287 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5289 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5290 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5291 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5293 if (NumDependencies) {
5294 CGF.EmitRuntimeCall(
5295 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5297 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5300 // Check if parent region is untied and build return for untied task;
5302 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5303 Region->emitUntiedSwitch(CGF);
5306 llvm::Value *DepWaitTaskArgs[6];
5307 if (NumDependencies) {
5308 DepWaitTaskArgs[0] = UpLoc;
5309 DepWaitTaskArgs[1] = ThreadID;
5310 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5311 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5312 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5313 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5315 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5316 NumDependencies, &DepWaitTaskArgs,
5317 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5318 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5319 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5320 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5321 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5322 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5324 if (NumDependencies)
5325 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5327 // Call proxy_task_entry(gtid, new_task);
5328 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5329 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5331 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5332 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5336 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5337 // kmp_task_t *new_task);
5338 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5339 // kmp_task_t *new_task);
5340 RegionCodeGenTy RCG(CodeGen);
5341 CommonActionTy Action(
5342 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5343 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5344 RCG.setAction(Action);
5349 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5351 RegionCodeGenTy ThenRCG(ThenCodeGen);
5356 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5357 const OMPLoopDirective &D,
5358 llvm::Function *TaskFunction,
5359 QualType SharedsTy, Address Shareds,
5361 const OMPTaskDataTy &Data) {
5362 if (!CGF.HaveInsertPoint())
5364 TaskResultTy Result =
5365 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5366 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5368 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5369 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5370 // sched, kmp_uint64 grainsize, void *task_dup);
5371 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5372 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5375 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5378 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5381 LValue LBLVal = CGF.EmitLValueForField(
5383 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5385 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5386 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5388 /*IsInitializer=*/true);
5389 LValue UBLVal = CGF.EmitLValueForField(
5391 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5393 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5394 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5396 /*IsInitializer=*/true);
5397 LValue StLVal = CGF.EmitLValueForField(
5399 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5401 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5402 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5404 /*IsInitializer=*/true);
5405 // Store reductions address.
5406 LValue RedLVal = CGF.EmitLValueForField(
5408 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5409 if (Data.Reductions) {
5410 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5412 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5413 CGF.getContext().VoidPtrTy);
5415 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5416 llvm::Value *TaskArgs[] = {
5421 LBLVal.getPointer(CGF),
5422 UBLVal.getPointer(CGF),
5423 CGF.EmitLoadOfScalar(StLVal, Loc),
5424 llvm::ConstantInt::getSigned(
5425 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5426 llvm::ConstantInt::getSigned(
5427 CGF.IntTy, Data.Schedule.getPointer()
5428 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5430 Data.Schedule.getPointer()
5431 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5433 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5434 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5435 Result.TaskDupFn, CGF.VoidPtrTy)
5436 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5437 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5440 /// Emit reduction operation for each element of array (required for
5441 /// array sections) LHS op = RHS.
5442 /// \param Type Type of array.
5443 /// \param LHSVar Variable on the left side of the reduction operation
5444 /// (references element of array in original variable).
5445 /// \param RHSVar Variable on the right side of the reduction operation
5446 /// (references element of array in original variable).
5447 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5449 static void EmitOMPAggregateReduction(
5450 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5451 const VarDecl *RHSVar,
5452 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5453 const Expr *, const Expr *)> &RedOpGen,
5454 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5455 const Expr *UpExpr = nullptr) {
5456 // Perform element-by-element initialization.
5458 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5459 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5461 // Drill down to the base element type on both arrays.
5462 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5463 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5465 llvm::Value *RHSBegin = RHSAddr.getPointer();
5466 llvm::Value *LHSBegin = LHSAddr.getPointer();
5467 // Cast from pointer to array type to pointer to single element.
5468 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5469 // The basic structure here is a while-do loop.
5470 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5471 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5472 llvm::Value *IsEmpty =
5473 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5474 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5476 // Enter the loop body, making that address the current address.
5477 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5478 CGF.EmitBlock(BodyBB);
5480 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5482 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5483 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5484 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5485 Address RHSElementCurrent =
5486 Address(RHSElementPHI,
5487 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5489 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5490 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5491 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5492 Address LHSElementCurrent =
5493 Address(LHSElementPHI,
5494 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5497 CodeGenFunction::OMPPrivateScope Scope(CGF);
5498 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5499 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5501 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5502 Scope.ForceCleanup();
5504 // Shift the address forward by one element.
5505 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5506 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5507 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5508 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5509 // Check whether we've reached the end.
5511 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5512 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5513 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5514 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5517 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5520 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5521 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5522 /// UDR combiner function.
5523 static void emitReductionCombiner(CodeGenFunction &CGF,
5524 const Expr *ReductionOp) {
5525 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5526 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5527 if (const auto *DRE =
5528 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5529 if (const auto *DRD =
5530 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5531 std::pair<llvm::Function *, llvm::Function *> Reduction =
5532 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5533 RValue Func = RValue::get(Reduction.first);
5534 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5535 CGF.EmitIgnoredExpr(ReductionOp);
5538 CGF.EmitIgnoredExpr(ReductionOp);
5541 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5542 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5543 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5544 ArrayRef<const Expr *> ReductionOps) {
5545 ASTContext &C = CGM.getContext();
5547 // void reduction_func(void *LHSArg, void *RHSArg);
5548 FunctionArgList Args;
5549 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5550 ImplicitParamDecl::Other);
5551 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5552 ImplicitParamDecl::Other);
5553 Args.push_back(&LHSArg);
5554 Args.push_back(&RHSArg);
5556 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5557 std::string Name = getName({"omp", "reduction", "reduction_func"});
5558 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5559 llvm::GlobalValue::InternalLinkage, Name,
5561 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5562 Fn->setDoesNotRecurse();
5563 CodeGenFunction CGF(CGM);
5564 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5566 // Dst = (void*[n])(LHSArg);
5567 // Src = (void*[n])(RHSArg);
5568 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5569 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5570 ArgsType), CGF.getPointerAlign());
5571 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5572 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5573 ArgsType), CGF.getPointerAlign());
5576 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5578 CodeGenFunction::OMPPrivateScope Scope(CGF);
5579 auto IPriv = Privates.begin();
5581 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5582 const auto *RHSVar =
5583 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5584 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5585 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5587 const auto *LHSVar =
5588 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5589 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5590 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5592 QualType PrivTy = (*IPriv)->getType();
5593 if (PrivTy->isVariablyModifiedType()) {
5594 // Get array size and emit VLA type.
5596 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5597 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5598 const VariableArrayType *VLA =
5599 CGF.getContext().getAsVariableArrayType(PrivTy);
5600 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5601 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5602 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5603 CGF.EmitVariablyModifiedType(PrivTy);
5607 IPriv = Privates.begin();
5608 auto ILHS = LHSExprs.begin();
5609 auto IRHS = RHSExprs.begin();
5610 for (const Expr *E : ReductionOps) {
5611 if ((*IPriv)->getType()->isArrayType()) {
5612 // Emit reduction for array section.
5613 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5614 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5615 EmitOMPAggregateReduction(
5616 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5617 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5618 emitReductionCombiner(CGF, E);
5621 // Emit reduction for array subscript or single variable.
5622 emitReductionCombiner(CGF, E);
5628 Scope.ForceCleanup();
5629 CGF.FinishFunction();
5633 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5634 const Expr *ReductionOp,
5635 const Expr *PrivateRef,
5636 const DeclRefExpr *LHS,
5637 const DeclRefExpr *RHS) {
5638 if (PrivateRef->getType()->isArrayType()) {
5639 // Emit reduction for array section.
5640 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5641 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5642 EmitOMPAggregateReduction(
5643 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5644 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5645 emitReductionCombiner(CGF, ReductionOp);
5648 // Emit reduction for array subscript or single variable.
5649 emitReductionCombiner(CGF, ReductionOp);
5653 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5654 ArrayRef<const Expr *> Privates,
5655 ArrayRef<const Expr *> LHSExprs,
5656 ArrayRef<const Expr *> RHSExprs,
5657 ArrayRef<const Expr *> ReductionOps,
5658 ReductionOptionsTy Options) {
5659 if (!CGF.HaveInsertPoint())
5662 bool WithNowait = Options.WithNowait;
5663 bool SimpleReduction = Options.SimpleReduction;
5665 // Next code should be emitted for reduction:
5667 // static kmp_critical_name lock = { 0 };
5669 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5670 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5672 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5673 // *(Type<n>-1*)rhs[<n>-1]);
5677 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5678 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5679 // RedList, reduce_func, &<lock>)) {
5682 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5684 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5688 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5690 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5695 // if SimpleReduction is true, only the next code is generated:
5697 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5700 ASTContext &C = CGM.getContext();
5702 if (SimpleReduction) {
5703 CodeGenFunction::RunCleanupsScope Scope(CGF);
5704 auto IPriv = Privates.begin();
5705 auto ILHS = LHSExprs.begin();
5706 auto IRHS = RHSExprs.begin();
5707 for (const Expr *E : ReductionOps) {
5708 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5709 cast<DeclRefExpr>(*IRHS));
5717 // 1. Build a list of reduction variables.
5718 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5719 auto Size = RHSExprs.size();
5720 for (const Expr *E : Privates) {
5721 if (E->getType()->isVariablyModifiedType())
5722 // Reserve place for array size.
5725 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5726 QualType ReductionArrayTy =
5727 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5728 /*IndexTypeQuals=*/0);
5729 Address ReductionList =
5730 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5731 auto IPriv = Privates.begin();
5733 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5734 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5735 CGF.Builder.CreateStore(
5736 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5737 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5739 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5740 // Store array size.
5742 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5743 llvm::Value *Size = CGF.Builder.CreateIntCast(
5745 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5747 CGF.SizeTy, /*isSigned=*/false);
5748 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5753 // 2. Emit reduce_func().
5754 llvm::Function *ReductionFn = emitReductionFunction(
5755 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5756 LHSExprs, RHSExprs, ReductionOps);
5758 // 3. Create static kmp_critical_name lock = { 0 };
5759 std::string Name = getName({"reduction"});
5760 llvm::Value *Lock = getCriticalRegionLock(Name);
5762 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5763 // RedList, reduce_func, &<lock>);
5764 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5765 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5766 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5767 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5768 ReductionList.getPointer(), CGF.VoidPtrTy);
5769 llvm::Value *Args[] = {
5770 IdentTLoc, // ident_t *<loc>
5771 ThreadId, // i32 <gtid>
5772 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5773 ReductionArrayTySize, // size_type sizeof(RedList)
5774 RL, // void *RedList
5775 ReductionFn, // void (*) (void *, void *) <reduce_func>
5776 Lock // kmp_critical_name *&<lock>
5778 llvm::Value *Res = CGF.EmitRuntimeCall(
5779 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5780 : OMPRTL__kmpc_reduce),
5783 // 5. Build switch(res)
5784 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5785 llvm::SwitchInst *SwInst =
5786 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5790 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5792 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5794 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5795 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5796 CGF.EmitBlock(Case1BB);
5798 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5799 llvm::Value *EndArgs[] = {
5800 IdentTLoc, // ident_t *<loc>
5801 ThreadId, // i32 <gtid>
5802 Lock // kmp_critical_name *&<lock>
5804 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5805 CodeGenFunction &CGF, PrePostActionTy &Action) {
5806 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5807 auto IPriv = Privates.begin();
5808 auto ILHS = LHSExprs.begin();
5809 auto IRHS = RHSExprs.begin();
5810 for (const Expr *E : ReductionOps) {
5811 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5812 cast<DeclRefExpr>(*IRHS));
5818 RegionCodeGenTy RCG(CodeGen);
5819 CommonActionTy Action(
5820 nullptr, llvm::None,
5821 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5822 : OMPRTL__kmpc_end_reduce),
5824 RCG.setAction(Action);
5827 CGF.EmitBranch(DefaultBB);
5831 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5834 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5835 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5836 CGF.EmitBlock(Case2BB);
5838 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5839 CodeGenFunction &CGF, PrePostActionTy &Action) {
5840 auto ILHS = LHSExprs.begin();
5841 auto IRHS = RHSExprs.begin();
5842 auto IPriv = Privates.begin();
5843 for (const Expr *E : ReductionOps) {
5844 const Expr *XExpr = nullptr;
5845 const Expr *EExpr = nullptr;
5846 const Expr *UpExpr = nullptr;
5847 BinaryOperatorKind BO = BO_Comma;
5848 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5849 if (BO->getOpcode() == BO_Assign) {
5850 XExpr = BO->getLHS();
5851 UpExpr = BO->getRHS();
5854 // Try to emit update expression as a simple atomic.
5855 const Expr *RHSExpr = UpExpr;
5857 // Analyze RHS part of the whole expression.
5858 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5859 RHSExpr->IgnoreParenImpCasts())) {
5860 // If this is a conditional operator, analyze its condition for
5861 // min/max reduction operator.
5862 RHSExpr = ACO->getCond();
5864 if (const auto *BORHS =
5865 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5866 EExpr = BORHS->getRHS();
5867 BO = BORHS->getOpcode();
5871 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5872 auto &&AtomicRedGen = [BO, VD,
5873 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5874 const Expr *EExpr, const Expr *UpExpr) {
5875 LValue X = CGF.EmitLValue(XExpr);
5878 E = CGF.EmitAnyExpr(EExpr);
5879 CGF.EmitOMPAtomicSimpleUpdateExpr(
5880 X, E, BO, /*IsXLHSInRHSPart=*/true,
5881 llvm::AtomicOrdering::Monotonic, Loc,
5882 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5883 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5884 PrivateScope.addPrivate(
5885 VD, [&CGF, VD, XRValue, Loc]() {
5886 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5887 CGF.emitOMPSimpleStore(
5888 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5889 VD->getType().getNonReferenceType(), Loc);
5892 (void)PrivateScope.Privatize();
5893 return CGF.EmitAnyExpr(UpExpr);
5896 if ((*IPriv)->getType()->isArrayType()) {
5897 // Emit atomic reduction for array section.
5898 const auto *RHSVar =
5899 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5900 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5901 AtomicRedGen, XExpr, EExpr, UpExpr);
5903 // Emit atomic reduction for array subscript or single variable.
5904 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5907 // Emit as a critical region.
5908 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5909 const Expr *, const Expr *) {
5910 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5911 std::string Name = RT.getName({"atomic_reduction"});
5912 RT.emitCriticalRegion(
5914 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5916 emitReductionCombiner(CGF, E);
5920 if ((*IPriv)->getType()->isArrayType()) {
5921 const auto *LHSVar =
5922 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5923 const auto *RHSVar =
5924 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5925 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5928 CritRedGen(CGF, nullptr, nullptr, nullptr);
5936 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5938 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5939 llvm::Value *EndArgs[] = {
5940 IdentTLoc, // ident_t *<loc>
5941 ThreadId, // i32 <gtid>
5942 Lock // kmp_critical_name *&<lock>
5944 CommonActionTy Action(nullptr, llvm::None,
5945 createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5947 AtomicRCG.setAction(Action);
5953 CGF.EmitBranch(DefaultBB);
5954 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5957 /// Generates unique name for artificial threadprivate variables.
5958 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5959 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5961 SmallString<256> Buffer;
5962 llvm::raw_svector_ostream Out(Buffer);
5963 const clang::DeclRefExpr *DE;
5964 const VarDecl *D = ::getBaseDecl(Ref, DE);
5966 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5967 D = D->getCanonicalDecl();
5968 std::string Name = CGM.getOpenMPRuntime().getName(
5969 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5970 Out << Prefix << Name << "_"
5971 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5975 /// Emits reduction initializer function:
5977 /// void @.red_init(void* %arg) {
5978 /// %0 = bitcast void* %arg to <type>*
5979 /// store <type> <init>, <type>* %0
5983 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5985 ReductionCodeGen &RCG, unsigned N) {
5986 ASTContext &C = CGM.getContext();
5987 FunctionArgList Args;
5988 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5989 ImplicitParamDecl::Other);
5990 Args.emplace_back(&Param);
5991 const auto &FnInfo =
5992 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5993 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5994 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5995 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5996 Name, &CGM.getModule());
5997 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5998 Fn->setDoesNotRecurse();
5999 CodeGenFunction CGF(CGM);
6000 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6001 Address PrivateAddr = CGF.EmitLoadOfPointer(
6002 CGF.GetAddrOfLocalVar(&Param),
6003 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6004 llvm::Value *Size = nullptr;
6005 // If the size of the reduction item is non-constant, load it from global
6006 // threadprivate variable.
6007 if (RCG.getSizes(N).second) {
6008 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6009 CGF, CGM.getContext().getSizeType(),
6010 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6011 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6012 CGM.getContext().getSizeType(), Loc);
6014 RCG.emitAggregateType(CGF, N, Size);
6016 // If initializer uses initializer from declare reduction construct, emit a
6017 // pointer to the address of the original reduction item (reuired by reduction
6019 if (RCG.usesReductionInitializer(N)) {
6020 Address SharedAddr =
6021 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6022 CGF, CGM.getContext().VoidPtrTy,
6023 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6024 SharedAddr = CGF.EmitLoadOfPointer(
6026 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6027 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6029 SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6030 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6031 CGM.getContext().VoidPtrTy);
6033 // Emit the initializer:
6034 // %0 = bitcast void* %arg to <type>*
6035 // store <type> <init>, <type>* %0
6036 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6037 [](CodeGenFunction &) { return false; });
6038 CGF.FinishFunction();
6042 /// Emits reduction combiner function:
6044 /// void @.red_comb(void* %arg0, void* %arg1) {
6045 /// %lhs = bitcast void* %arg0 to <type>*
6046 /// %rhs = bitcast void* %arg1 to <type>*
6047 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6048 /// store <type> %2, <type>* %lhs
6052 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6054 ReductionCodeGen &RCG, unsigned N,
6055 const Expr *ReductionOp,
6056 const Expr *LHS, const Expr *RHS,
6057 const Expr *PrivateRef) {
6058 ASTContext &C = CGM.getContext();
6059 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6060 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6061 FunctionArgList Args;
6062 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6063 C.VoidPtrTy, ImplicitParamDecl::Other);
6064 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6065 ImplicitParamDecl::Other);
6066 Args.emplace_back(&ParamInOut);
6067 Args.emplace_back(&ParamIn);
6068 const auto &FnInfo =
6069 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6070 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6071 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6072 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6073 Name, &CGM.getModule());
6074 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6075 Fn->setDoesNotRecurse();
6076 CodeGenFunction CGF(CGM);
6077 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6078 llvm::Value *Size = nullptr;
6079 // If the size of the reduction item is non-constant, load it from global
6080 // threadprivate variable.
6081 if (RCG.getSizes(N).second) {
6082 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6083 CGF, CGM.getContext().getSizeType(),
6084 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6085 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6086 CGM.getContext().getSizeType(), Loc);
6088 RCG.emitAggregateType(CGF, N, Size);
6089 // Remap lhs and rhs variables to the addresses of the function arguments.
6090 // %lhs = bitcast void* %arg0 to <type>*
6091 // %rhs = bitcast void* %arg1 to <type>*
6092 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6093 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6094 // Pull out the pointer to the variable.
6095 Address PtrAddr = CGF.EmitLoadOfPointer(
6096 CGF.GetAddrOfLocalVar(&ParamInOut),
6097 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6098 return CGF.Builder.CreateElementBitCast(
6099 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6101 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6102 // Pull out the pointer to the variable.
6103 Address PtrAddr = CGF.EmitLoadOfPointer(
6104 CGF.GetAddrOfLocalVar(&ParamIn),
6105 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6106 return CGF.Builder.CreateElementBitCast(
6107 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6109 PrivateScope.Privatize();
6110 // Emit the combiner body:
6111 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6112 // store <type> %2, <type>* %lhs
6113 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6114 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6115 cast<DeclRefExpr>(RHS));
6116 CGF.FinishFunction();
6120 /// Emits reduction finalizer function:
6122 /// void @.red_fini(void* %arg) {
6123 /// %0 = bitcast void* %arg to <type>*
6124 /// <destroy>(<type>* %0)
6128 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6130 ReductionCodeGen &RCG, unsigned N) {
6131 if (!RCG.needCleanups(N))
6133 ASTContext &C = CGM.getContext();
6134 FunctionArgList Args;
6135 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6136 ImplicitParamDecl::Other);
6137 Args.emplace_back(&Param);
6138 const auto &FnInfo =
6139 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6140 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6141 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6142 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6143 Name, &CGM.getModule());
6144 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6145 Fn->setDoesNotRecurse();
6146 CodeGenFunction CGF(CGM);
6147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6148 Address PrivateAddr = CGF.EmitLoadOfPointer(
6149 CGF.GetAddrOfLocalVar(&Param),
6150 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6151 llvm::Value *Size = nullptr;
6152 // If the size of the reduction item is non-constant, load it from global
6153 // threadprivate variable.
6154 if (RCG.getSizes(N).second) {
6155 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6156 CGF, CGM.getContext().getSizeType(),
6157 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6158 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6159 CGM.getContext().getSizeType(), Loc);
6161 RCG.emitAggregateType(CGF, N, Size);
6162 // Emit the finalizer body:
6163 // <destroy>(<type>* %0)
6164 RCG.emitCleanups(CGF, N, PrivateAddr);
6165 CGF.FinishFunction(Loc);
6169 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6170 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6171 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6172 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6175 // Build typedef struct:
6176 // kmp_task_red_input {
6177 // void *reduce_shar; // shared reduction item
6178 // size_t reduce_size; // size of data item
6179 // void *reduce_init; // data initialization routine
6180 // void *reduce_fini; // data finalization routine
6181 // void *reduce_comb; // data combiner routine
6182 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6183 // } kmp_task_red_input_t;
6184 ASTContext &C = CGM.getContext();
6185 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6186 RD->startDefinition();
6187 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6188 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6189 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6190 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6191 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6192 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6193 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6194 RD->completeDefinition();
6195 QualType RDType = C.getRecordType(RD);
6196 unsigned Size = Data.ReductionVars.size();
6197 llvm::APInt ArraySize(/*numBits=*/64, Size);
6198 QualType ArrayRDType = C.getConstantArrayType(
6199 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6200 // kmp_task_red_input_t .rd_input.[Size];
6201 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6202 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6204 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6205 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6206 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6207 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6208 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6209 TaskRedInput.getPointer(), Idxs,
6210 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6212 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6213 // ElemLVal.reduce_shar = &Shareds[Cnt];
6214 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6215 RCG.emitSharedLValue(CGF, Cnt);
6216 llvm::Value *CastedShared =
6217 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6218 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6219 RCG.emitAggregateType(CGF, Cnt);
6220 llvm::Value *SizeValInChars;
6221 llvm::Value *SizeVal;
6222 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6223 // We use delayed creation/initialization for VLAs, array sections and
6224 // custom reduction initializations. It is required because runtime does not
6225 // provide the way to pass the sizes of VLAs/array sections to
6226 // initializer/combiner/finalizer functions and does not pass the pointer to
6227 // original reduction item to the initializer. Instead threadprivate global
6228 // variables are used to store these values and use them in the functions.
6229 bool DelayedCreation = !!SizeVal;
6230 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6231 /*isSigned=*/false);
6232 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6233 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6234 // ElemLVal.reduce_init = init;
6235 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6236 llvm::Value *InitAddr =
6237 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6238 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6239 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6240 // ElemLVal.reduce_fini = fini;
6241 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6242 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6243 llvm::Value *FiniAddr = Fini
6244 ? CGF.EmitCastToVoidPtr(Fini)
6245 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6246 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6247 // ElemLVal.reduce_comb = comb;
6248 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6249 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6250 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6251 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6252 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6253 // ElemLVal.flags = 0;
6254 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6255 if (DelayedCreation) {
6256 CGF.EmitStoreOfScalar(
6257 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6260 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6261 FlagsLVal.getType());
6263 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6265 llvm::Value *Args[] = {
6266 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6268 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6269 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6271 return CGF.EmitRuntimeCall(
6272 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6275 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6277 ReductionCodeGen &RCG,
6279 auto Sizes = RCG.getSizes(N);
6280 // Emit threadprivate global variable if the type is non-constant
6281 // (Sizes.second = nullptr).
6283 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6284 /*isSigned=*/false);
6285 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6286 CGF, CGM.getContext().getSizeType(),
6287 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6288 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6290 // Store address of the original reduction item if custom initializer is used.
6291 if (RCG.usesReductionInitializer(N)) {
6292 Address SharedAddr = getAddrOfArtificialThreadPrivate(
6293 CGF, CGM.getContext().VoidPtrTy,
6294 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6295 CGF.Builder.CreateStore(
6296 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6297 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
6298 SharedAddr, /*IsVolatile=*/false);
6302 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6304 llvm::Value *ReductionsPtr,
6305 LValue SharedLVal) {
6306 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6308 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6312 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6313 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6315 CGF.EmitRuntimeCall(
6316 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6317 SharedLVal.getAlignment());
6320 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6321 SourceLocation Loc) {
6322 if (!CGF.HaveInsertPoint())
6324 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6326 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6327 // Ignore return result until untied tasks are supported.
6328 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6329 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6330 Region->emitUntiedSwitch(CGF);
6333 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6334 OpenMPDirectiveKind InnerKind,
6335 const RegionCodeGenTy &CodeGen,
6337 if (!CGF.HaveInsertPoint())
6339 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6340 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6351 } // anonymous namespace
6353 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6354 RTCancelKind CancelKind = CancelNoreq;
6355 if (CancelRegion == OMPD_parallel)
6356 CancelKind = CancelParallel;
6357 else if (CancelRegion == OMPD_for)
6358 CancelKind = CancelLoop;
6359 else if (CancelRegion == OMPD_sections)
6360 CancelKind = CancelSections;
6362 assert(CancelRegion == OMPD_taskgroup);
6363 CancelKind = CancelTaskgroup;
6368 void CGOpenMPRuntime::emitCancellationPointCall(
6369 CodeGenFunction &CGF, SourceLocation Loc,
6370 OpenMPDirectiveKind CancelRegion) {
6371 if (!CGF.HaveInsertPoint())
6373 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6374 // global_tid, kmp_int32 cncl_kind);
6375 if (auto *OMPRegionInfo =
6376 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6377 // For 'cancellation point taskgroup', the task region info may not have a
6378 // cancel. This may instead happen in another adjacent task.
6379 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6380 llvm::Value *Args[] = {
6381 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6382 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6383 // Ignore return result until untied tasks are supported.
6384 llvm::Value *Result = CGF.EmitRuntimeCall(
6385 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6386 // if (__kmpc_cancellationpoint()) {
6387 // exit from construct;
6389 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6390 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6391 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6392 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6393 CGF.EmitBlock(ExitBB);
6394 // exit from construct;
6395 CodeGenFunction::JumpDest CancelDest =
6396 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6397 CGF.EmitBranchThroughCleanup(CancelDest);
6398 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6403 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6405 OpenMPDirectiveKind CancelRegion) {
6406 if (!CGF.HaveInsertPoint())
6408 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6409 // kmp_int32 cncl_kind);
6410 if (auto *OMPRegionInfo =
6411 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6412 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6413 PrePostActionTy &) {
6414 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6415 llvm::Value *Args[] = {
6416 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6417 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6418 // Ignore return result until untied tasks are supported.
6419 llvm::Value *Result = CGF.EmitRuntimeCall(
6420 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6421 // if (__kmpc_cancel()) {
6422 // exit from construct;
6424 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6425 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6426 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6427 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6428 CGF.EmitBlock(ExitBB);
6429 // exit from construct;
6430 CodeGenFunction::JumpDest CancelDest =
6431 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6432 CGF.EmitBranchThroughCleanup(CancelDest);
6433 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6436 emitIfClause(CGF, IfCond, ThenGen,
6437 [](CodeGenFunction &, PrePostActionTy &) {});
6439 RegionCodeGenTy ThenRCG(ThenGen);
6445 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6446 const OMPExecutableDirective &D, StringRef ParentName,
6447 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6448 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6449 assert(!ParentName.empty() && "Invalid target region parent name!");
6450 HasEmittedTargetRegion = true;
6451 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6452 IsOffloadEntry, CodeGen);
6455 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6456 const OMPExecutableDirective &D, StringRef ParentName,
6457 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6458 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6459 // Create a unique name for the entry function using the source location
6460 // information of the current target region. The name will be something like:
6462 // __omp_offloading_DD_FFFF_PP_lBB
6464 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6465 // mangled name of the function that encloses the target region and BB is the
6466 // line number of the target region.
6471 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6473 SmallString<64> EntryFnName;
6475 llvm::raw_svector_ostream OS(EntryFnName);
6476 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6477 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6480 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6482 CodeGenFunction CGF(CGM, true);
6483 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6484 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6486 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6488 // If this target outline function is not an offload entry, we don't need to
6490 if (!IsOffloadEntry)
6493 // The target region ID is used by the runtime library to identify the current
6494 // target region, so it only has to be unique and not necessarily point to
6495 // anything. It could be the pointer to the outlined function that implements
6496 // the target region, but we aren't using that so that the compiler doesn't
6497 // need to keep that, and could therefore inline the host function if proven
6498 // worthwhile during optimization. In the other hand, if emitting code for the
6499 // device, the ID has to be the function address so that it can retrieved from
6500 // the offloading entry and launched by the runtime library. We also mark the
6501 // outlined function to have external linkage in case we are emitting code for
6502 // the device, because these functions will be entry points to the device.
6504 if (CGM.getLangOpts().OpenMPIsDevice) {
6505 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6506 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6507 OutlinedFn->setDSOLocal(false);
6509 std::string Name = getName({EntryFnName, "region_id"});
6510 OutlinedFnID = new llvm::GlobalVariable(
6511 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6512 llvm::GlobalValue::WeakAnyLinkage,
6513 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6516 // Register the information for the entry associated with this target region.
6517 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6518 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6519 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6522 /// Checks if the expression is constant or does not have non-trivial function
6524 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6525 // We can skip constant expressions.
6526 // We can skip expressions with trivial calls or simple expressions.
6527 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6528 !E->hasNonTrivialCall(Ctx)) &&
6529 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6532 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6534 const Stmt *Child = Body->IgnoreContainers();
6535 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6537 for (const Stmt *S : C->body()) {
6538 if (const auto *E = dyn_cast<Expr>(S)) {
6539 if (isTrivial(Ctx, E))
6542 // Some of the statements can be ignored.
6543 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6544 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6546 // Analyze declarations.
6547 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6548 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6549 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6550 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6551 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6552 isa<UsingDirectiveDecl>(D) ||
6553 isa<OMPDeclareReductionDecl>(D) ||
6554 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6556 const auto *VD = dyn_cast<VarDecl>(D);
6559 return VD->isConstexpr() ||
6560 ((VD->getType().isTrivialType(Ctx) ||
6561 VD->getType()->isReferenceType()) &&
6562 (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6566 // Found multiple children - cannot get the one child only.
6572 Child = Child->IgnoreContainers();
6577 /// Emit the number of teams for a target directive. Inspect the num_teams
6578 /// clause associated with a teams construct combined or closely nested
6579 /// with the target directive.
6581 /// Emit a team of size one for directives such as 'target parallel' that
6582 /// have no associated teams construct.
6584 /// Otherwise, return nullptr.
6585 static llvm::Value *
6586 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6587 const OMPExecutableDirective &D) {
6588 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6589 "Clauses associated with the teams directive expected to be emitted "
6590 "only for the host!");
6591 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6592 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6593 "Expected target-based executable directive.");
6594 CGBuilderTy &Bld = CGF.Builder;
6595 switch (DirectiveKind) {
6597 const auto *CS = D.getInnermostCapturedStmt();
6599 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6600 const Stmt *ChildStmt =
6601 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6602 if (const auto *NestedDir =
6603 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6604 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6605 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6606 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6607 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6608 const Expr *NumTeams =
6609 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6610 llvm::Value *NumTeamsVal =
6611 CGF.EmitScalarExpr(NumTeams,
6612 /*IgnoreResultAssign*/ true);
6613 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6616 return Bld.getInt32(0);
6618 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6619 isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6620 return Bld.getInt32(1);
6621 return Bld.getInt32(0);
6625 case OMPD_target_teams:
6626 case OMPD_target_teams_distribute:
6627 case OMPD_target_teams_distribute_simd:
6628 case OMPD_target_teams_distribute_parallel_for:
6629 case OMPD_target_teams_distribute_parallel_for_simd: {
6630 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6631 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6632 const Expr *NumTeams =
6633 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6634 llvm::Value *NumTeamsVal =
6635 CGF.EmitScalarExpr(NumTeams,
6636 /*IgnoreResultAssign*/ true);
6637 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6640 return Bld.getInt32(0);
6642 case OMPD_target_parallel:
6643 case OMPD_target_parallel_for:
6644 case OMPD_target_parallel_for_simd:
6645 case OMPD_target_simd:
6646 return Bld.getInt32(1);
6649 case OMPD_parallel_for:
6650 case OMPD_parallel_master:
6651 case OMPD_parallel_sections:
6653 case OMPD_parallel_for_simd:
6655 case OMPD_cancellation_point:
6657 case OMPD_threadprivate:
6666 case OMPD_taskyield:
6669 case OMPD_taskgroup:
6673 case OMPD_target_data:
6674 case OMPD_target_exit_data:
6675 case OMPD_target_enter_data:
6676 case OMPD_distribute:
6677 case OMPD_distribute_simd:
6678 case OMPD_distribute_parallel_for:
6679 case OMPD_distribute_parallel_for_simd:
6680 case OMPD_teams_distribute:
6681 case OMPD_teams_distribute_simd:
6682 case OMPD_teams_distribute_parallel_for:
6683 case OMPD_teams_distribute_parallel_for_simd:
6684 case OMPD_target_update:
6685 case OMPD_declare_simd:
6686 case OMPD_declare_variant:
6687 case OMPD_declare_target:
6688 case OMPD_end_declare_target:
6689 case OMPD_declare_reduction:
6690 case OMPD_declare_mapper:
6692 case OMPD_taskloop_simd:
6693 case OMPD_master_taskloop:
6694 case OMPD_master_taskloop_simd:
6695 case OMPD_parallel_master_taskloop:
6696 case OMPD_parallel_master_taskloop_simd:
6701 llvm_unreachable("Unexpected directive kind.");
6704 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6705 llvm::Value *DefaultThreadLimitVal) {
6706 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6707 CGF.getContext(), CS->getCapturedStmt());
6708 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6709 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6710 llvm::Value *NumThreads = nullptr;
6711 llvm::Value *CondVal = nullptr;
6712 // Handle if clause. If if clause present, the number of threads is
6713 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6714 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6715 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6716 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6717 const OMPIfClause *IfClause = nullptr;
6718 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6719 if (C->getNameModifier() == OMPD_unknown ||
6720 C->getNameModifier() == OMPD_parallel) {
6726 const Expr *Cond = IfClause->getCondition();
6728 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6730 return CGF.Builder.getInt32(1);
6732 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6733 if (const auto *PreInit =
6734 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6735 for (const auto *I : PreInit->decls()) {
6736 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6737 CGF.EmitVarDecl(cast<VarDecl>(*I));
6739 CodeGenFunction::AutoVarEmission Emission =
6740 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6741 CGF.EmitAutoVarCleanups(Emission);
6745 CondVal = CGF.EvaluateExprAsBool(Cond);
6749 // Check the value of num_threads clause iff if clause was not specified
6750 // or is not evaluated to false.
6751 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6752 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6753 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6754 const auto *NumThreadsClause =
6755 Dir->getSingleClause<OMPNumThreadsClause>();
6756 CodeGenFunction::LexicalScope Scope(
6757 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6758 if (const auto *PreInit =
6759 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6760 for (const auto *I : PreInit->decls()) {
6761 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6762 CGF.EmitVarDecl(cast<VarDecl>(*I));
6764 CodeGenFunction::AutoVarEmission Emission =
6765 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6766 CGF.EmitAutoVarCleanups(Emission);
6770 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6771 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6772 /*isSigned=*/false);
6773 if (DefaultThreadLimitVal)
6774 NumThreads = CGF.Builder.CreateSelect(
6775 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6776 DefaultThreadLimitVal, NumThreads);
6778 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6779 : CGF.Builder.getInt32(0);
6781 // Process condition of the if clause.
6783 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6784 CGF.Builder.getInt32(1));
6788 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6789 return CGF.Builder.getInt32(1);
6790 return DefaultThreadLimitVal;
6792 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6793 : CGF.Builder.getInt32(0);
6796 /// Emit the number of threads for a target directive. Inspect the
6797 /// thread_limit clause associated with a teams construct combined or closely
6798 /// nested with the target directive.
6800 /// Emit the num_threads clause for directives such as 'target parallel' that
6801 /// have no associated teams construct.
6803 /// Otherwise, return nullptr.
6804 static llvm::Value *
6805 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6806 const OMPExecutableDirective &D) {
6807 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6808 "Clauses associated with the teams directive expected to be emitted "
6809 "only for the host!");
6810 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6811 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6812 "Expected target-based executable directive.");
6813 CGBuilderTy &Bld = CGF.Builder;
6814 llvm::Value *ThreadLimitVal = nullptr;
6815 llvm::Value *NumThreadsVal = nullptr;
6816 switch (DirectiveKind) {
6818 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6819 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6821 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6822 CGF.getContext(), CS->getCapturedStmt());
6823 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6824 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6825 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6827 const auto *ThreadLimitClause =
6828 Dir->getSingleClause<OMPThreadLimitClause>();
6829 CodeGenFunction::LexicalScope Scope(
6830 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6831 if (const auto *PreInit =
6832 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6833 for (const auto *I : PreInit->decls()) {
6834 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6835 CGF.EmitVarDecl(cast<VarDecl>(*I));
6837 CodeGenFunction::AutoVarEmission Emission =
6838 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6839 CGF.EmitAutoVarCleanups(Emission);
6843 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6844 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6846 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6848 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6849 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6850 CS = Dir->getInnermostCapturedStmt();
6851 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6852 CGF.getContext(), CS->getCapturedStmt());
6853 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6855 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6856 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6857 CS = Dir->getInnermostCapturedStmt();
6858 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6861 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6862 return Bld.getInt32(1);
6864 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6866 case OMPD_target_teams: {
6867 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6868 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6869 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6870 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6871 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6873 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6875 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6876 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6878 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6879 CGF.getContext(), CS->getCapturedStmt());
6880 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6881 if (Dir->getDirectiveKind() == OMPD_distribute) {
6882 CS = Dir->getInnermostCapturedStmt();
6883 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6887 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6889 case OMPD_target_teams_distribute:
6890 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6891 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6892 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6893 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6894 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6896 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6898 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6899 case OMPD_target_parallel:
6900 case OMPD_target_parallel_for:
6901 case OMPD_target_parallel_for_simd:
6902 case OMPD_target_teams_distribute_parallel_for:
6903 case OMPD_target_teams_distribute_parallel_for_simd: {
6904 llvm::Value *CondVal = nullptr;
6905 // Handle if clause. If if clause present, the number of threads is
6906 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6907 if (D.hasClausesOfKind<OMPIfClause>()) {
6908 const OMPIfClause *IfClause = nullptr;
6909 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6910 if (C->getNameModifier() == OMPD_unknown ||
6911 C->getNameModifier() == OMPD_parallel) {
6917 const Expr *Cond = IfClause->getCondition();
6919 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6921 return Bld.getInt32(1);
6923 CodeGenFunction::RunCleanupsScope Scope(CGF);
6924 CondVal = CGF.EvaluateExprAsBool(Cond);
6928 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6929 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6930 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6931 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6932 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6934 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6936 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6937 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6938 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6939 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6940 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6942 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6943 ThreadLimitVal = ThreadLimitVal
6944 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6946 NumThreadsVal, ThreadLimitVal)
6949 if (!ThreadLimitVal)
6950 ThreadLimitVal = Bld.getInt32(0);
6952 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6953 return ThreadLimitVal;
6955 case OMPD_target_teams_distribute_simd:
6956 case OMPD_target_simd:
6957 return Bld.getInt32(1);
6960 case OMPD_parallel_for:
6961 case OMPD_parallel_master:
6962 case OMPD_parallel_sections:
6964 case OMPD_parallel_for_simd:
6966 case OMPD_cancellation_point:
6968 case OMPD_threadprivate:
6977 case OMPD_taskyield:
6980 case OMPD_taskgroup:
6984 case OMPD_target_data:
6985 case OMPD_target_exit_data:
6986 case OMPD_target_enter_data:
6987 case OMPD_distribute:
6988 case OMPD_distribute_simd:
6989 case OMPD_distribute_parallel_for:
6990 case OMPD_distribute_parallel_for_simd:
6991 case OMPD_teams_distribute:
6992 case OMPD_teams_distribute_simd:
6993 case OMPD_teams_distribute_parallel_for:
6994 case OMPD_teams_distribute_parallel_for_simd:
6995 case OMPD_target_update:
6996 case OMPD_declare_simd:
6997 case OMPD_declare_variant:
6998 case OMPD_declare_target:
6999 case OMPD_end_declare_target:
7000 case OMPD_declare_reduction:
7001 case OMPD_declare_mapper:
7003 case OMPD_taskloop_simd:
7004 case OMPD_master_taskloop:
7005 case OMPD_master_taskloop_simd:
7006 case OMPD_parallel_master_taskloop:
7007 case OMPD_parallel_master_taskloop_simd:
7012 llvm_unreachable("Unsupported directive kind.");
7016 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7018 // Utility to handle information from clauses associated with a given
7019 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7020 // It provides a convenient interface to obtain the information and generate
7021 // code for that information.
7022 class MappableExprsHandler {
7024 /// Values for bit flags used to specify the mapping type for
7026 enum OpenMPOffloadMappingFlags : uint64_t {
7029 /// Allocate memory on the device and move data from host to device.
7031 /// Allocate memory on the device and move data from device to host.
7032 OMP_MAP_FROM = 0x02,
7033 /// Always perform the requested mapping action on the element, even
7034 /// if it was already mapped before.
7035 OMP_MAP_ALWAYS = 0x04,
7036 /// Delete the element from the device environment, ignoring the
7037 /// current reference count associated with the element.
7038 OMP_MAP_DELETE = 0x08,
7039 /// The element being mapped is a pointer-pointee pair; both the
7040 /// pointer and the pointee should be mapped.
7041 OMP_MAP_PTR_AND_OBJ = 0x10,
7042 /// This flags signals that the base address of an entry should be
7043 /// passed to the target kernel as an argument.
7044 OMP_MAP_TARGET_PARAM = 0x20,
7045 /// Signal that the runtime library has to return the device pointer
7046 /// in the current position for the data being mapped. Used when we have the
7047 /// use_device_ptr clause.
7048 OMP_MAP_RETURN_PARAM = 0x40,
7049 /// This flag signals that the reference being passed is a pointer to
7051 OMP_MAP_PRIVATE = 0x80,
7052 /// Pass the element to the device by value.
7053 OMP_MAP_LITERAL = 0x100,
7055 OMP_MAP_IMPLICIT = 0x200,
7056 /// Close is a hint to the runtime to allocate memory close to
7057 /// the target device.
7058 OMP_MAP_CLOSE = 0x400,
7059 /// The 16 MSBs of the flags indicate whether the entry is member of some
7061 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7062 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7065 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7066 static unsigned getFlagMemberOffset() {
7067 unsigned Offset = 0;
7068 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7069 Remain = Remain >> 1)
7074 /// Class that associates information with a base pointer to be passed to the
7075 /// runtime library.
7076 class BasePointerInfo {
7077 /// The base pointer.
7078 llvm::Value *Ptr = nullptr;
7079 /// The base declaration that refers to this device pointer, or null if
7081 const ValueDecl *DevPtrDecl = nullptr;
7084 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7085 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7086 llvm::Value *operator*() const { return Ptr; }
7087 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7088 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7091 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7092 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7093 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7095 /// Map between a struct and the its lowest & highest elements which have been
7097 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7098 /// HE(FieldIndex, Pointer)}
7099 struct StructRangeInfoTy {
7100 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7101 0, Address::invalid()};
7102 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7103 0, Address::invalid()};
7104 Address Base = Address::invalid();
7108 /// Kind that defines how a device pointer has to be returned.
7110 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7111 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7112 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7113 bool ReturnDevicePointer = false;
7114 bool IsImplicit = false;
7116 MapInfo() = default;
7118 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7119 OpenMPMapClauseKind MapType,
7120 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7121 bool ReturnDevicePointer, bool IsImplicit)
7122 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7123 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7126 /// If use_device_ptr is used on a pointer which is a struct member and there
7127 /// is no map information about it, then emission of that entry is deferred
7128 /// until the whole struct has been processed.
7129 struct DeferredDevicePtrEntryTy {
7130 const Expr *IE = nullptr;
7131 const ValueDecl *VD = nullptr;
7133 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7137 /// The target directive from where the mappable clauses were extracted. It
7138 /// is either a executable directive or a user-defined mapper directive.
7139 llvm::PointerUnion<const OMPExecutableDirective *,
7140 const OMPDeclareMapperDecl *>
7143 /// Function the directive is being generated for.
7144 CodeGenFunction &CGF;
7146 /// Set of all first private variables in the current directive.
7147 /// bool data is set to true if the variable is implicitly marked as
7148 /// firstprivate, false otherwise.
7149 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7151 /// Map between device pointer declarations and their expression components.
7152 /// The key value for declarations in 'this' is null.
7155 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7158 llvm::Value *getExprTypeSize(const Expr *E) const {
7159 QualType ExprTy = E->getType().getCanonicalType();
7161 // Reference types are ignored for mapping purposes.
7162 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7163 ExprTy = RefTy->getPointeeType().getCanonicalType();
7165 // Given that an array section is considered a built-in type, we need to
7166 // do the calculation based on the length of the section instead of relying
7167 // on CGF.getTypeSize(E->getType()).
7168 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7169 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7170 OAE->getBase()->IgnoreParenImpCasts())
7171 .getCanonicalType();
7173 // If there is no length associated with the expression and lower bound is
7174 // not specified too, that means we are using the whole length of the
7176 if (!OAE->getLength() && OAE->getColonLoc().isValid() &&
7177 !OAE->getLowerBound())
7178 return CGF.getTypeSize(BaseTy);
7180 llvm::Value *ElemSize;
7181 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7182 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7184 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7185 assert(ATy && "Expecting array type if not a pointer type.");
7186 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7189 // If we don't have a length at this point, that is because we have an
7190 // array section with a single element.
7191 if (!OAE->getLength() && OAE->getColonLoc().isInvalid())
7194 if (const Expr *LenExpr = OAE->getLength()) {
7195 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7196 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7197 CGF.getContext().getSizeType(),
7198 LenExpr->getExprLoc());
7199 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7201 assert(!OAE->getLength() && OAE->getColonLoc().isValid() &&
7202 OAE->getLowerBound() && "expected array_section[lb:].");
7203 // Size = sizetype - lb * elemtype;
7204 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7205 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7206 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7207 CGF.getContext().getSizeType(),
7208 OAE->getLowerBound()->getExprLoc());
7209 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7210 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7211 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7212 LengthVal = CGF.Builder.CreateSelect(
7213 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7216 return CGF.getTypeSize(ExprTy);
7219 /// Return the corresponding bits for a given map clause modifier. Add
7220 /// a flag marking the map as a pointer if requested. Add a flag marking the
7221 /// map as the first one of a series of maps that relate to the same map
7223 OpenMPOffloadMappingFlags getMapTypeBits(
7224 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7225 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7226 OpenMPOffloadMappingFlags Bits =
7227 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7229 case OMPC_MAP_alloc:
7230 case OMPC_MAP_release:
7231 // alloc and release is the default behavior in the runtime library, i.e.
7232 // if we don't pass any bits alloc/release that is what the runtime is
7233 // going to do. Therefore, we don't need to signal anything for these two
7240 Bits |= OMP_MAP_FROM;
7242 case OMPC_MAP_tofrom:
7243 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7245 case OMPC_MAP_delete:
7246 Bits |= OMP_MAP_DELETE;
7248 case OMPC_MAP_unknown:
7249 llvm_unreachable("Unexpected map type!");
7252 Bits |= OMP_MAP_PTR_AND_OBJ;
7253 if (AddIsTargetParamFlag)
7254 Bits |= OMP_MAP_TARGET_PARAM;
7255 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7256 != MapModifiers.end())
7257 Bits |= OMP_MAP_ALWAYS;
7258 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
7259 != MapModifiers.end())
7260 Bits |= OMP_MAP_CLOSE;
7264 /// Return true if the provided expression is a final array section. A
7265 /// final array section, is one whose length can't be proved to be one.
7266 bool isFinalArraySectionExpression(const Expr *E) const {
7267 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7269 // It is not an array section and therefore not a unity-size one.
7273 // An array section with no colon always refer to a single element.
7274 if (OASE->getColonLoc().isInvalid())
7277 const Expr *Length = OASE->getLength();
7279 // If we don't have a length we have to check if the array has size 1
7280 // for this dimension. Also, we should always expect a length if the
7281 // base type is pointer.
7283 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7284 OASE->getBase()->IgnoreParenImpCasts())
7285 .getCanonicalType();
7286 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7287 return ATy->getSize().getSExtValue() != 1;
7288 // If we don't have a constant dimension length, we have to consider
7289 // the current section as having any size, so it is not necessarily
7290 // unitary. If it happen to be unity size, that's user fault.
7294 // Check if the length evaluates to 1.
7295 Expr::EvalResult Result;
7296 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7297 return true; // Can have more that size 1.
7299 llvm::APSInt ConstLength = Result.Val.getInt();
7300 return ConstLength.getSExtValue() != 1;
7303 /// Generate the base pointers, section pointers, sizes and map type
7304 /// bits for the provided map type, map modifier, and expression components.
7305 /// \a IsFirstComponent should be set to true if the provided set of
7306 /// components is the first associated with a capture.
7307 void generateInfoForComponentList(
7308 OpenMPMapClauseKind MapType,
7309 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7310 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7311 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7312 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7313 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7315 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7316 OverlappedElements = llvm::None) const {
7317 // The following summarizes what has to be generated for each map and the
7318 // types below. The generated information is expressed in this order:
7319 // base pointer, section pointer, size, flags
7320 // (to add to the ones that come from the map type and modifier).
7341 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7344 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7347 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7350 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7353 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7356 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7359 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7362 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7365 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7367 // map(to: s.p[:22])
7368 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7369 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7370 // &(s.p), &(s.p[0]), 22*sizeof(double),
7371 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7372 // (*) alloc space for struct members, only this is a target parameter
7373 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7374 // optimizes this entry out, same in the examples below)
7375 // (***) map the pointee (map: to)
7378 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7380 // map(from: s.ps->s.i)
7381 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7382 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7383 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7385 // map(to: s.ps->ps)
7386 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7390 // map(s.ps->ps->ps)
7391 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7392 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7393 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7394 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7396 // map(to: s.ps->ps->s.f[:22])
7397 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7398 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7399 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7400 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7403 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7406 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7409 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7412 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7414 // map(to: ps->p[:22])
7415 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7416 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7417 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7420 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7422 // map(from: ps->ps->s.i)
7423 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7424 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7425 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7427 // map(from: ps->ps->ps)
7428 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7432 // map(ps->ps->ps->ps)
7433 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7434 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7435 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7436 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7438 // map(to: ps->ps->ps->s.f[:22])
7439 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7440 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7441 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7442 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7444 // map(to: s.f[:22]) map(from: s.p[:33])
7445 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7446 // sizeof(double*) (**), TARGET_PARAM
7447 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7448 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7449 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7450 // (*) allocate contiguous space needed to fit all mapped members even if
7451 // we allocate space for members not mapped (in this example,
7452 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7453 // them as well because they fall between &s.f[0] and &s.p)
7455 // map(from: s.f[:22]) map(to: ps->p[:33])
7456 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7457 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7458 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7459 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7460 // (*) the struct this entry pertains to is the 2nd element in the list of
7461 // arguments, hence MEMBER_OF(2)
7463 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7464 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7465 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7466 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7467 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7468 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7469 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7470 // (*) the struct this entry pertains to is the 4th element in the list
7471 // of arguments, hence MEMBER_OF(4)
7473 // Track if the map information being generated is the first for a capture.
7474 bool IsCaptureFirstInfo = IsFirstComponentList;
7475 // When the variable is on a declare target link or in a to clause with
7476 // unified memory, a reference is needed to hold the host/device address
7478 bool RequiresReference = false;
7480 // Scan the components from the base to the complete expression.
7481 auto CI = Components.rbegin();
7482 auto CE = Components.rend();
7485 // Track if the map information being generated is the first for a list of
7487 bool IsExpressionFirstInfo = true;
7488 Address BP = Address::invalid();
7489 const Expr *AssocExpr = I->getAssociatedExpression();
7490 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7491 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7493 if (isa<MemberExpr>(AssocExpr)) {
7494 // The base is the 'this' pointer. The content of the pointer is going
7495 // to be the base of the field being mapped.
7496 BP = CGF.LoadCXXThisAddress();
7497 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7499 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7500 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7502 // The base is the reference to the variable.
7504 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7505 if (const auto *VD =
7506 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7507 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7508 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7509 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7510 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7511 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7512 RequiresReference = true;
7513 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7518 // If the variable is a pointer and is being dereferenced (i.e. is not
7519 // the last component), the base has to be the pointer itself, not its
7520 // reference. References are ignored for mapping purposes.
7522 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7523 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7524 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7526 // We do not need to generate individual map information for the
7527 // pointer, it can be associated with the combined storage.
7532 // Track whether a component of the list should be marked as MEMBER_OF some
7533 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7534 // in a component list should be marked as MEMBER_OF, all subsequent entries
7535 // do not belong to the base struct. E.g.
7537 // s.ps->ps->ps->f[:]
7539 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7540 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7541 // is the pointee of ps(2) which is not member of struct s, so it should not
7542 // be marked as such (it is still PTR_AND_OBJ).
7543 // The variable is initialized to false so that PTR_AND_OBJ entries which
7544 // are not struct members are not considered (e.g. array of pointers to
7546 bool ShouldBeMemberOf = false;
7548 // Variable keeping track of whether or not we have encountered a component
7549 // in the component list which is a member expression. Useful when we have a
7550 // pointer or a final array section, in which case it is the previous
7551 // component in the list which tells us whether we have a member expression.
7553 // While processing the final array section "[:]" it is "f" which tells us
7554 // whether we are dealing with a member of a declared struct.
7555 const MemberExpr *EncounteredME = nullptr;
7557 for (; I != CE; ++I) {
7558 // If the current component is member of a struct (parent struct) mark it.
7559 if (!EncounteredME) {
7560 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7561 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7562 // as MEMBER_OF the parent struct.
7564 ShouldBeMemberOf = true;
7567 auto Next = std::next(I);
7569 // We need to generate the addresses and sizes if this is the last
7570 // component, if the component is a pointer or if it is an array section
7571 // whose length can't be proved to be one. If this is a pointer, it
7572 // becomes the base address for the following components.
7574 // A final array section, is one whose length can't be proved to be one.
7575 bool IsFinalArraySection =
7576 isFinalArraySectionExpression(I->getAssociatedExpression());
7578 // Get information on whether the element is a pointer. Have to do a
7579 // special treatment for array sections given that they are built-in
7582 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7584 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7586 ->isAnyPointerType()) ||
7587 I->getAssociatedExpression()->getType()->isAnyPointerType();
7589 if (Next == CE || IsPointer || IsFinalArraySection) {
7590 // If this is not the last component, we expect the pointer to be
7591 // associated with an array expression or member expression.
7592 assert((Next == CE ||
7593 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7594 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7595 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7596 "Unexpected expression");
7598 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7601 // If this component is a pointer inside the base struct then we don't
7602 // need to create any entry for it - it will be combined with the object
7603 // it is pointing to into a single PTR_AND_OBJ entry.
7604 bool IsMemberPointer =
7605 IsPointer && EncounteredME &&
7606 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7608 if (!OverlappedElements.empty()) {
7609 // Handle base element with the info for overlapped elements.
7610 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7611 assert(Next == CE &&
7612 "Expected last element for the overlapped elements.");
7613 assert(!IsPointer &&
7614 "Unexpected base element with the pointer type.");
7615 // Mark the whole struct as the struct that requires allocation on the
7617 PartialStruct.LowestElem = {0, LB};
7618 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7619 I->getAssociatedExpression()->getType());
7620 Address HB = CGF.Builder.CreateConstGEP(
7621 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7623 TypeSize.getQuantity() - 1);
7624 PartialStruct.HighestElem = {
7625 std::numeric_limits<decltype(
7626 PartialStruct.HighestElem.first)>::max(),
7628 PartialStruct.Base = BP;
7629 // Emit data for non-overlapped data.
7630 OpenMPOffloadMappingFlags Flags =
7632 getMapTypeBits(MapType, MapModifiers, IsImplicit,
7633 /*AddPtrFlag=*/false,
7634 /*AddIsTargetParamFlag=*/false);
7636 llvm::Value *Size = nullptr;
7637 // Do bitcopy of all non-overlapped structure elements.
7638 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7639 Component : OverlappedElements) {
7640 Address ComponentLB = Address::invalid();
7641 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7643 if (MC.getAssociatedDeclaration()) {
7645 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7647 Size = CGF.Builder.CreatePtrDiff(
7648 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7649 CGF.EmitCastToVoidPtr(LB.getPointer()));
7653 BasePointers.push_back(BP.getPointer());
7654 Pointers.push_back(LB.getPointer());
7655 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7656 /*isSigned=*/true));
7657 Types.push_back(Flags);
7658 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7660 BasePointers.push_back(BP.getPointer());
7661 Pointers.push_back(LB.getPointer());
7662 Size = CGF.Builder.CreatePtrDiff(
7663 CGF.EmitCastToVoidPtr(
7664 CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7665 CGF.EmitCastToVoidPtr(LB.getPointer()));
7667 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7668 Types.push_back(Flags);
7671 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7672 if (!IsMemberPointer) {
7673 BasePointers.push_back(BP.getPointer());
7674 Pointers.push_back(LB.getPointer());
7676 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7678 // We need to add a pointer flag for each map that comes from the
7679 // same expression except for the first one. We also need to signal
7680 // this map is the first one that relates with the current capture
7681 // (there is a set of entries for each capture).
7682 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7683 MapType, MapModifiers, IsImplicit,
7684 !IsExpressionFirstInfo || RequiresReference,
7685 IsCaptureFirstInfo && !RequiresReference);
7687 if (!IsExpressionFirstInfo) {
7688 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7689 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7691 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7692 OMP_MAP_DELETE | OMP_MAP_CLOSE);
7694 if (ShouldBeMemberOf) {
7695 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696 // should be later updated with the correct value of MEMBER_OF.
7697 Flags |= OMP_MAP_MEMBER_OF;
7698 // From now on, all subsequent PTR_AND_OBJ entries should not be
7699 // marked as MEMBER_OF.
7700 ShouldBeMemberOf = false;
7704 Types.push_back(Flags);
7707 // If we have encountered a member expression so far, keep track of the
7708 // mapped member. If the parent is "*this", then the value declaration
7710 if (EncounteredME) {
7711 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7712 unsigned FieldIndex = FD->getFieldIndex();
7714 // Update info about the lowest and highest elements for this struct
7715 if (!PartialStruct.Base.isValid()) {
7716 PartialStruct.LowestElem = {FieldIndex, LB};
7717 PartialStruct.HighestElem = {FieldIndex, LB};
7718 PartialStruct.Base = BP;
7719 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7720 PartialStruct.LowestElem = {FieldIndex, LB};
7721 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7722 PartialStruct.HighestElem = {FieldIndex, LB};
7726 // If we have a final array section, we are done with this expression.
7727 if (IsFinalArraySection)
7730 // The pointer becomes the base for the next element.
7734 IsExpressionFirstInfo = false;
7735 IsCaptureFirstInfo = false;
7740 /// Return the adjusted map modifiers if the declaration a capture refers to
7741 /// appears in a first-private clause. This is expected to be used only with
7742 /// directives that start with 'target'.
7743 MappableExprsHandler::OpenMPOffloadMappingFlags
7744 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7745 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7747 // A first private variable captured by reference will use only the
7748 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7749 // declaration is known as first-private in this handler.
7750 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7751 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7752 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7753 return MappableExprsHandler::OMP_MAP_ALWAYS |
7754 MappableExprsHandler::OMP_MAP_TO;
7755 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7756 return MappableExprsHandler::OMP_MAP_TO |
7757 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7758 return MappableExprsHandler::OMP_MAP_PRIVATE |
7759 MappableExprsHandler::OMP_MAP_TO;
7761 return MappableExprsHandler::OMP_MAP_TO |
7762 MappableExprsHandler::OMP_MAP_FROM;
7765 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7766 // Rotate by getFlagMemberOffset() bits.
7767 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7768 << getFlagMemberOffset());
7771 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7772 OpenMPOffloadMappingFlags MemberOfFlag) {
7773 // If the entry is PTR_AND_OBJ but has not been marked with the special
7774 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7775 // marked as MEMBER_OF.
7776 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7777 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7780 // Reset the placeholder value to prepare the flag for the assignment of the
7781 // proper MEMBER_OF value.
7782 Flags &= ~OMP_MAP_MEMBER_OF;
7783 Flags |= MemberOfFlag;
7786 void getPlainLayout(const CXXRecordDecl *RD,
7787 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7788 bool AsBase) const {
7789 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7791 llvm::StructType *St =
7792 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7794 unsigned NumElements = St->getNumElements();
7796 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7797 RecordLayout(NumElements);
7800 for (const auto &I : RD->bases()) {
7803 const auto *Base = I.getType()->getAsCXXRecordDecl();
7804 // Ignore empty bases.
7805 if (Base->isEmpty() || CGF.getContext()
7806 .getASTRecordLayout(Base)
7807 .getNonVirtualSize()
7811 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7812 RecordLayout[FieldIndex] = Base;
7814 // Fill in virtual bases.
7815 for (const auto &I : RD->vbases()) {
7816 const auto *Base = I.getType()->getAsCXXRecordDecl();
7817 // Ignore empty bases.
7818 if (Base->isEmpty())
7820 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7821 if (RecordLayout[FieldIndex])
7823 RecordLayout[FieldIndex] = Base;
7825 // Fill in all the fields.
7826 assert(!RD->isUnion() && "Unexpected union.");
7827 for (const auto *Field : RD->fields()) {
7828 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7829 // will fill in later.)
7830 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7831 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7832 RecordLayout[FieldIndex] = Field;
7835 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7836 &Data : RecordLayout) {
7839 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7840 getPlainLayout(Base, Layout, /*AsBase=*/true);
7842 Layout.push_back(Data.get<const FieldDecl *>());
7847 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7848 : CurDir(&Dir), CGF(CGF) {
7849 // Extract firstprivate clause information.
7850 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7851 for (const auto *D : C->varlists())
7852 FirstPrivateDecls.try_emplace(
7853 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7854 // Extract device pointer clause information.
7855 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7856 for (auto L : C->component_lists())
7857 DevPointersMap[L.first].push_back(L.second);
7860 /// Constructor for the declare mapper directive.
7861 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
7862 : CurDir(&Dir), CGF(CGF) {}
7864 /// Generate code for the combined entry if we have a partially mapped struct
7865 /// and take care of the mapping flags of the arguments corresponding to
7866 /// individual struct members.
7867 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7868 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7869 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7870 const StructRangeInfoTy &PartialStruct) const {
7871 // Base is the base of the struct
7872 BasePointers.push_back(PartialStruct.Base.getPointer());
7873 // Pointer is the address of the lowest element
7874 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7875 Pointers.push_back(LB);
7876 // Size is (addr of {highest+1} element) - (addr of lowest element)
7877 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7878 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7879 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7880 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7881 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7882 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7883 /*isSigned=*/false);
7884 Sizes.push_back(Size);
7885 // Map type is always TARGET_PARAM
7886 Types.push_back(OMP_MAP_TARGET_PARAM);
7887 // Remove TARGET_PARAM flag from the first element
7888 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7890 // All other current entries will be MEMBER_OF the combined entry
7891 // (except for PTR_AND_OBJ entries which do not have a placeholder value
7892 // 0xFFFF in the MEMBER_OF field).
7893 OpenMPOffloadMappingFlags MemberOfFlag =
7894 getMemberOfFlag(BasePointers.size() - 1);
7895 for (auto &M : CurTypes)
7896 setCorrectMemberOfFlag(M, MemberOfFlag);
7899 /// Generate all the base pointers, section pointers, sizes and map
7900 /// types for the extracted mappable expressions. Also, for each item that
7901 /// relates with a device pointer, a pair of the relevant declaration and
7902 /// index where it occurs is appended to the device pointers info array.
7903 void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7904 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7905 MapFlagsArrayTy &Types) const {
7906 // We have to process the component lists that relate with the same
7907 // declaration in a single chunk so that we can generate the map flags
7908 // correctly. Therefore, we organize all lists in a map.
7909 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7911 // Helper function to fill the information map for the different supported
7913 auto &&InfoGen = [&Info](
7915 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7916 OpenMPMapClauseKind MapType,
7917 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7918 bool ReturnDevicePointer, bool IsImplicit) {
7919 const ValueDecl *VD =
7920 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7921 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7925 assert(CurDir.is<const OMPExecutableDirective *>() &&
7926 "Expect a executable directive");
7927 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
7928 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
7929 for (const auto L : C->component_lists()) {
7930 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7931 /*ReturnDevicePointer=*/false, C->isImplicit());
7933 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
7934 for (const auto L : C->component_lists()) {
7935 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7936 /*ReturnDevicePointer=*/false, C->isImplicit());
7938 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
7939 for (const auto L : C->component_lists()) {
7940 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7941 /*ReturnDevicePointer=*/false, C->isImplicit());
7944 // Look at the use_device_ptr clause information and mark the existing map
7945 // entries as such. If there is no map information for an entry in the
7946 // use_device_ptr list, we create one with map type 'alloc' and zero size
7947 // section. It is the user fault if that was not mapped before. If there is
7948 // no map information and the pointer is a struct member, then we defer the
7949 // emission of that entry until the whole struct has been processed.
7950 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7953 for (const auto *C :
7954 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
7955 for (const auto L : C->component_lists()) {
7956 assert(!L.second.empty() && "Not expecting empty list of components!");
7957 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7958 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7959 const Expr *IE = L.second.back().getAssociatedExpression();
7960 // If the first component is a member expression, we have to look into
7961 // 'this', which maps to null in the map of map information. Otherwise
7962 // look directly for the information.
7963 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7965 // We potentially have map information for this declaration already.
7966 // Look for the first set of components that refer to it.
7967 if (It != Info.end()) {
7968 auto CI = std::find_if(
7969 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7970 return MI.Components.back().getAssociatedDeclaration() == VD;
7972 // If we found a map entry, signal that the pointer has to be returned
7973 // and move on to the next declaration.
7974 if (CI != It->second.end()) {
7975 CI->ReturnDevicePointer = true;
7980 // We didn't find any match in our map information - generate a zero
7981 // size array section - if the pointer is a struct member we defer this
7982 // action until the whole struct has been processed.
7983 if (isa<MemberExpr>(IE)) {
7984 // Insert the pointer into Info to be processed by
7985 // generateInfoForComponentList. Because it is a member pointer
7986 // without a pointee, no entry will be generated for it, therefore
7987 // we need to generate one after the whole struct has been processed.
7988 // Nonetheless, generateInfoForComponentList must be called to take
7989 // the pointer into account for the calculation of the range of the
7991 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7992 /*ReturnDevicePointer=*/false, C->isImplicit());
7993 DeferredInfo[nullptr].emplace_back(IE, VD);
7996 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7997 BasePointers.emplace_back(Ptr, VD);
7998 Pointers.push_back(Ptr);
7999 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8000 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
8005 for (const auto &M : Info) {
8006 // We need to know when we generate information for the first component
8007 // associated with a capture, because the mapping flags depend on it.
8008 bool IsFirstComponentList = true;
8010 // Temporary versions of arrays
8011 MapBaseValuesArrayTy CurBasePointers;
8012 MapValuesArrayTy CurPointers;
8013 MapValuesArrayTy CurSizes;
8014 MapFlagsArrayTy CurTypes;
8015 StructRangeInfoTy PartialStruct;
8017 for (const MapInfo &L : M.second) {
8018 assert(!L.Components.empty() &&
8019 "Not expecting declaration with no component lists.");
8021 // Remember the current base pointer index.
8022 unsigned CurrentBasePointersIdx = CurBasePointers.size();
8023 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8024 CurBasePointers, CurPointers, CurSizes,
8025 CurTypes, PartialStruct,
8026 IsFirstComponentList, L.IsImplicit);
8028 // If this entry relates with a device pointer, set the relevant
8029 // declaration and add the 'return pointer' flag.
8030 if (L.ReturnDevicePointer) {
8031 assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8032 "Unexpected number of mapped base pointers.");
8034 const ValueDecl *RelevantVD =
8035 L.Components.back().getAssociatedDeclaration();
8036 assert(RelevantVD &&
8037 "No relevant declaration related with device pointer??");
8039 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8040 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8042 IsFirstComponentList = false;
8045 // Append any pending zero-length pointers which are struct members and
8046 // used with use_device_ptr.
8047 auto CI = DeferredInfo.find(M.first);
8048 if (CI != DeferredInfo.end()) {
8049 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8050 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8051 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8052 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8053 CurBasePointers.emplace_back(BasePtr, L.VD);
8054 CurPointers.push_back(Ptr);
8055 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8056 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8057 // value MEMBER_OF=FFFF so that the entry is later updated with the
8058 // correct value of MEMBER_OF.
8059 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8064 // If there is an entry in PartialStruct it means we have a struct with
8065 // individual members mapped. Emit an extra combined entry.
8066 if (PartialStruct.Base.isValid())
8067 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8070 // We need to append the results of this capture to what we already have.
8071 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8072 Pointers.append(CurPointers.begin(), CurPointers.end());
8073 Sizes.append(CurSizes.begin(), CurSizes.end());
8074 Types.append(CurTypes.begin(), CurTypes.end());
8078 /// Generate all the base pointers, section pointers, sizes and map types for
8079 /// the extracted map clauses of user-defined mapper.
8080 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
8081 MapValuesArrayTy &Pointers,
8082 MapValuesArrayTy &Sizes,
8083 MapFlagsArrayTy &Types) const {
8084 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8085 "Expect a declare mapper directive");
8086 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8087 // We have to process the component lists that relate with the same
8088 // declaration in a single chunk so that we can generate the map flags
8089 // correctly. Therefore, we organize all lists in a map.
8090 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
8092 // Helper function to fill the information map for the different supported
8094 auto &&InfoGen = [&Info](
8096 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8097 OpenMPMapClauseKind MapType,
8098 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8099 bool ReturnDevicePointer, bool IsImplicit) {
8100 const ValueDecl *VD =
8101 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
8102 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
8106 for (const auto *C : CurMapperDir->clauselists()) {
8107 const auto *MC = cast<OMPMapClause>(C);
8108 for (const auto L : MC->component_lists()) {
8109 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
8110 /*ReturnDevicePointer=*/false, MC->isImplicit());
8114 for (const auto &M : Info) {
8115 // We need to know when we generate information for the first component
8116 // associated with a capture, because the mapping flags depend on it.
8117 bool IsFirstComponentList = true;
8119 // Temporary versions of arrays
8120 MapBaseValuesArrayTy CurBasePointers;
8121 MapValuesArrayTy CurPointers;
8122 MapValuesArrayTy CurSizes;
8123 MapFlagsArrayTy CurTypes;
8124 StructRangeInfoTy PartialStruct;
8126 for (const MapInfo &L : M.second) {
8127 assert(!L.Components.empty() &&
8128 "Not expecting declaration with no component lists.");
8129 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components,
8130 CurBasePointers, CurPointers, CurSizes,
8131 CurTypes, PartialStruct,
8132 IsFirstComponentList, L.IsImplicit);
8133 IsFirstComponentList = false;
8136 // If there is an entry in PartialStruct it means we have a struct with
8137 // individual members mapped. Emit an extra combined entry.
8138 if (PartialStruct.Base.isValid())
8139 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8142 // We need to append the results of this capture to what we already have.
8143 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8144 Pointers.append(CurPointers.begin(), CurPointers.end());
8145 Sizes.append(CurSizes.begin(), CurSizes.end());
8146 Types.append(CurTypes.begin(), CurTypes.end());
8150 /// Emit capture info for lambdas for variables captured by reference.
8151 void generateInfoForLambdaCaptures(
8152 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8153 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8154 MapFlagsArrayTy &Types,
8155 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8156 const auto *RD = VD->getType()
8158 .getNonReferenceType()
8159 ->getAsCXXRecordDecl();
8160 if (!RD || !RD->isLambda())
8162 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8163 LValue VDLVal = CGF.MakeAddrLValue(
8164 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8165 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8166 FieldDecl *ThisCapture = nullptr;
8167 RD->getCaptureFields(Captures, ThisCapture);
8170 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8171 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8172 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8173 VDLVal.getPointer(CGF));
8174 BasePointers.push_back(ThisLVal.getPointer(CGF));
8175 Pointers.push_back(ThisLValVal.getPointer(CGF));
8177 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178 CGF.Int64Ty, /*isSigned=*/true));
8179 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8180 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8182 for (const LambdaCapture &LC : RD->captures()) {
8183 if (!LC.capturesVariable())
8185 const VarDecl *VD = LC.getCapturedVar();
8186 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8188 auto It = Captures.find(VD);
8189 assert(It != Captures.end() && "Found lambda capture without field.");
8190 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8191 if (LC.getCaptureKind() == LCK_ByRef) {
8192 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8193 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8194 VDLVal.getPointer(CGF));
8195 BasePointers.push_back(VarLVal.getPointer(CGF));
8196 Pointers.push_back(VarLValVal.getPointer(CGF));
8197 Sizes.push_back(CGF.Builder.CreateIntCast(
8199 VD->getType().getCanonicalType().getNonReferenceType()),
8200 CGF.Int64Ty, /*isSigned=*/true));
8202 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8203 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8204 VDLVal.getPointer(CGF));
8205 BasePointers.push_back(VarLVal.getPointer(CGF));
8206 Pointers.push_back(VarRVal.getScalarVal());
8207 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8209 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8210 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8214 /// Set correct indices for lambdas captures.
8215 void adjustMemberOfForLambdaCaptures(
8216 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8217 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8218 MapFlagsArrayTy &Types) const {
8219 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8220 // Set correct member_of idx for all implicit lambda captures.
8221 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8222 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8224 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8225 assert(BasePtr && "Unable to find base lambda address.");
8227 for (unsigned J = I; J > 0; --J) {
8228 unsigned Idx = J - 1;
8229 if (Pointers[Idx] != BasePtr)
8234 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8235 // All other current entries will be MEMBER_OF the combined entry
8236 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8237 // 0xFFFF in the MEMBER_OF field).
8238 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8239 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8243 /// Generate the base pointers, section pointers, sizes and map types
8244 /// associated to a given capture.
8245 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8247 MapBaseValuesArrayTy &BasePointers,
8248 MapValuesArrayTy &Pointers,
8249 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8250 StructRangeInfoTy &PartialStruct) const {
8251 assert(!Cap->capturesVariableArrayType() &&
8252 "Not expecting to generate map info for a variable array type!");
8254 // We need to know when we generating information for the first component
8255 const ValueDecl *VD = Cap->capturesThis()
8257 : Cap->getCapturedVar()->getCanonicalDecl();
8259 // If this declaration appears in a is_device_ptr clause we just have to
8260 // pass the pointer by value. If it is a reference to a declaration, we just
8262 if (DevPointersMap.count(VD)) {
8263 BasePointers.emplace_back(Arg, VD);
8264 Pointers.push_back(Arg);
8266 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8267 CGF.Int64Ty, /*isSigned=*/true));
8268 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8273 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8274 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8275 SmallVector<MapData, 4> DeclComponentLists;
8276 assert(CurDir.is<const OMPExecutableDirective *>() &&
8277 "Expect a executable directive");
8278 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8279 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8280 for (const auto L : C->decl_component_lists(VD)) {
8281 assert(L.first == VD &&
8282 "We got information for the wrong declaration??");
8283 assert(!L.second.empty() &&
8284 "Not expecting declaration with no component lists.");
8285 DeclComponentLists.emplace_back(L.second, C->getMapType(),
8286 C->getMapTypeModifiers(),
8291 // Find overlapping elements (including the offset from the base element).
8292 llvm::SmallDenseMap<
8295 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8299 for (const MapData &L : DeclComponentLists) {
8300 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8301 OpenMPMapClauseKind MapType;
8302 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8304 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8306 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8307 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8308 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8309 auto CI = Components.rbegin();
8310 auto CE = Components.rend();
8311 auto SI = Components1.rbegin();
8312 auto SE = Components1.rend();
8313 for (; CI != CE && SI != SE; ++CI, ++SI) {
8314 if (CI->getAssociatedExpression()->getStmtClass() !=
8315 SI->getAssociatedExpression()->getStmtClass())
8317 // Are we dealing with different variables/fields?
8318 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8321 // Found overlapping if, at least for one component, reached the head of
8322 // the components list.
8323 if (CI == CE || SI == SE) {
8324 assert((CI != CE || SI != SE) &&
8325 "Unexpected full match of the mapping components.");
8326 const MapData &BaseData = CI == CE ? L : L1;
8327 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8328 SI == SE ? Components : Components1;
8329 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8330 OverlappedElements.getSecond().push_back(SubData);
8334 // Sort the overlapped elements for each item.
8335 llvm::SmallVector<const FieldDecl *, 4> Layout;
8336 if (!OverlappedData.empty()) {
8337 if (const auto *CRD =
8338 VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8339 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8341 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8342 Layout.append(RD->field_begin(), RD->field_end());
8345 for (auto &Pair : OverlappedData) {
8349 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8350 OMPClauseMappableExprCommon::MappableExprComponentListRef
8352 auto CI = First.rbegin();
8353 auto CE = First.rend();
8354 auto SI = Second.rbegin();
8355 auto SE = Second.rend();
8356 for (; CI != CE && SI != SE; ++CI, ++SI) {
8357 if (CI->getAssociatedExpression()->getStmtClass() !=
8358 SI->getAssociatedExpression()->getStmtClass())
8360 // Are we dealing with different variables/fields?
8361 if (CI->getAssociatedDeclaration() !=
8362 SI->getAssociatedDeclaration())
8366 // Lists contain the same elements.
8367 if (CI == CE && SI == SE)
8370 // List with less elements is less than list with more elements.
8371 if (CI == CE || SI == SE)
8374 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8375 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8376 if (FD1->getParent() == FD2->getParent())
8377 return FD1->getFieldIndex() < FD2->getFieldIndex();
8379 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8380 return FD == FD1 || FD == FD2;
8386 // Associated with a capture, because the mapping flags depend on it.
8387 // Go through all of the elements with the overlapped elements.
8388 for (const auto &Pair : OverlappedData) {
8389 const MapData &L = *Pair.getFirst();
8390 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8391 OpenMPMapClauseKind MapType;
8392 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8394 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8395 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8396 OverlappedComponents = Pair.getSecond();
8397 bool IsFirstComponentList = true;
8398 generateInfoForComponentList(MapType, MapModifiers, Components,
8399 BasePointers, Pointers, Sizes, Types,
8400 PartialStruct, IsFirstComponentList,
8401 IsImplicit, OverlappedComponents);
8403 // Go through other elements without overlapped elements.
8404 bool IsFirstComponentList = OverlappedData.empty();
8405 for (const MapData &L : DeclComponentLists) {
8406 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8407 OpenMPMapClauseKind MapType;
8408 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8410 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8411 auto It = OverlappedData.find(&L);
8412 if (It == OverlappedData.end())
8413 generateInfoForComponentList(MapType, MapModifiers, Components,
8414 BasePointers, Pointers, Sizes, Types,
8415 PartialStruct, IsFirstComponentList,
8417 IsFirstComponentList = false;
8421 /// Generate the base pointers, section pointers, sizes and map types
8422 /// associated with the declare target link variables.
8423 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8424 MapValuesArrayTy &Pointers,
8425 MapValuesArrayTy &Sizes,
8426 MapFlagsArrayTy &Types) const {
8427 assert(CurDir.is<const OMPExecutableDirective *>() &&
8428 "Expect a executable directive");
8429 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8430 // Map other list items in the map clause which are not captured variables
8431 // but "declare target link" global variables.
8432 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8433 for (const auto L : C->component_lists()) {
8436 const auto *VD = dyn_cast<VarDecl>(L.first);
8439 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8440 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8441 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8442 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8444 StructRangeInfoTy PartialStruct;
8445 generateInfoForComponentList(
8446 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8447 Pointers, Sizes, Types, PartialStruct,
8448 /*IsFirstComponentList=*/true, C->isImplicit());
8449 assert(!PartialStruct.Base.isValid() &&
8450 "No partial structs for declare target link expected.");
8455 /// Generate the default map information for a given capture \a CI,
8456 /// record field declaration \a RI and captured value \a CV.
8457 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8458 const FieldDecl &RI, llvm::Value *CV,
8459 MapBaseValuesArrayTy &CurBasePointers,
8460 MapValuesArrayTy &CurPointers,
8461 MapValuesArrayTy &CurSizes,
8462 MapFlagsArrayTy &CurMapTypes) const {
8463 bool IsImplicit = true;
8464 // Do the default mapping.
8465 if (CI.capturesThis()) {
8466 CurBasePointers.push_back(CV);
8467 CurPointers.push_back(CV);
8468 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8470 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8471 CGF.Int64Ty, /*isSigned=*/true));
8472 // Default map type.
8473 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8474 } else if (CI.capturesVariableByCopy()) {
8475 CurBasePointers.push_back(CV);
8476 CurPointers.push_back(CV);
8477 if (!RI.getType()->isAnyPointerType()) {
8478 // We have to signal to the runtime captures passed by value that are
8480 CurMapTypes.push_back(OMP_MAP_LITERAL);
8481 CurSizes.push_back(CGF.Builder.CreateIntCast(
8482 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8484 // Pointers are implicitly mapped with a zero size and no flags
8485 // (other than first map that is added for all implicit maps).
8486 CurMapTypes.push_back(OMP_MAP_NONE);
8487 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8489 const VarDecl *VD = CI.getCapturedVar();
8490 auto I = FirstPrivateDecls.find(VD);
8491 if (I != FirstPrivateDecls.end())
8492 IsImplicit = I->getSecond();
8494 assert(CI.capturesVariable() && "Expected captured reference.");
8495 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8496 QualType ElementType = PtrTy->getPointeeType();
8497 CurSizes.push_back(CGF.Builder.CreateIntCast(
8498 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8499 // The default map type for a scalar/complex type is 'to' because by
8500 // default the value doesn't have to be retrieved. For an aggregate
8501 // type, the default is 'tofrom'.
8502 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8503 const VarDecl *VD = CI.getCapturedVar();
8504 auto I = FirstPrivateDecls.find(VD);
8505 if (I != FirstPrivateDecls.end() &&
8506 VD->getType().isConstant(CGF.getContext())) {
8507 llvm::Constant *Addr =
8508 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8509 // Copy the value of the original variable to the new global copy.
8510 CGF.Builder.CreateMemCpy(
8511 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
8512 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8513 CurSizes.back(), /*IsVolatile=*/false);
8514 // Use new global variable as the base pointers.
8515 CurBasePointers.push_back(Addr);
8516 CurPointers.push_back(Addr);
8518 CurBasePointers.push_back(CV);
8519 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8520 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8521 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8522 AlignmentSource::Decl));
8523 CurPointers.push_back(PtrAddr.getPointer());
8525 CurPointers.push_back(CV);
8528 if (I != FirstPrivateDecls.end())
8529 IsImplicit = I->getSecond();
8531 // Every default map produces a single argument which is a target parameter.
8532 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8534 // Add flag stating this is an implicit map.
8536 CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8539 } // anonymous namespace
8541 /// Emit the arrays used to pass the captures and map information to the
8542 /// offloading runtime library. If there is no map or capture information,
8543 /// return nullptr by reference.
8545 emitOffloadingArrays(CodeGenFunction &CGF,
8546 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8547 MappableExprsHandler::MapValuesArrayTy &Pointers,
8548 MappableExprsHandler::MapValuesArrayTy &Sizes,
8549 MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8550 CGOpenMPRuntime::TargetDataInfo &Info) {
8551 CodeGenModule &CGM = CGF.CGM;
8552 ASTContext &Ctx = CGF.getContext();
8554 // Reset the array information.
8555 Info.clearArrayInfo();
8556 Info.NumberOfPtrs = BasePointers.size();
8558 if (Info.NumberOfPtrs) {
8559 // Detect if we have any capture size requiring runtime evaluation of the
8560 // size so that a constant array could be eventually used.
8561 bool hasRuntimeEvaluationCaptureSize = false;
8562 for (llvm::Value *S : Sizes)
8563 if (!isa<llvm::Constant>(S)) {
8564 hasRuntimeEvaluationCaptureSize = true;
8568 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8569 QualType PointerArrayType = Ctx.getConstantArrayType(
8570 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
8571 /*IndexTypeQuals=*/0);
8573 Info.BasePointersArray =
8574 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8575 Info.PointersArray =
8576 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8578 // If we don't have any VLA types or other types that require runtime
8579 // evaluation, we can use a constant array for the map sizes, otherwise we
8580 // need to fill up the arrays as we do for the pointers.
8582 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8583 if (hasRuntimeEvaluationCaptureSize) {
8584 QualType SizeArrayType = Ctx.getConstantArrayType(
8585 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
8586 /*IndexTypeQuals=*/0);
8588 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8590 // We expect all the sizes to be constant, so we collect them to create
8591 // a constant array.
8592 SmallVector<llvm::Constant *, 16> ConstSizes;
8593 for (llvm::Value *S : Sizes)
8594 ConstSizes.push_back(cast<llvm::Constant>(S));
8596 auto *SizesArrayInit = llvm::ConstantArray::get(
8597 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8598 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8599 auto *SizesArrayGbl = new llvm::GlobalVariable(
8600 CGM.getModule(), SizesArrayInit->getType(),
8601 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8602 SizesArrayInit, Name);
8603 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8604 Info.SizesArray = SizesArrayGbl;
8607 // The map types are always constant so we don't need to generate code to
8608 // fill arrays. Instead, we create an array constant.
8609 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8610 llvm::copy(MapTypes, Mapping.begin());
8611 llvm::Constant *MapTypesArrayInit =
8612 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8613 std::string MaptypesName =
8614 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8615 auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8616 CGM.getModule(), MapTypesArrayInit->getType(),
8617 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8618 MapTypesArrayInit, MaptypesName);
8619 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8620 Info.MapTypesArray = MapTypesArrayGbl;
8622 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8623 llvm::Value *BPVal = *BasePointers[I];
8624 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8625 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8626 Info.BasePointersArray, 0, I);
8627 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8628 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8629 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8630 CGF.Builder.CreateStore(BPVal, BPAddr);
8632 if (Info.requiresDevicePointerInfo())
8633 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8634 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8636 llvm::Value *PVal = Pointers[I];
8637 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8638 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8639 Info.PointersArray, 0, I);
8640 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8641 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8642 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8643 CGF.Builder.CreateStore(PVal, PAddr);
8645 if (hasRuntimeEvaluationCaptureSize) {
8646 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8647 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8651 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8652 CGF.Builder.CreateStore(
8653 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8660 /// Emit the arguments to be passed to the runtime library based on the
8661 /// arrays of pointers, sizes and map types.
8662 static void emitOffloadingArraysArgument(
8663 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8664 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8665 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8666 CodeGenModule &CGM = CGF.CGM;
8667 if (Info.NumberOfPtrs) {
8668 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8669 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8670 Info.BasePointersArray,
8671 /*Idx0=*/0, /*Idx1=*/0);
8672 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8673 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8677 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8678 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8679 /*Idx0=*/0, /*Idx1=*/0);
8680 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8681 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8686 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8687 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8688 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8690 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8694 /// Check for inner distribute directive.
8695 static const OMPExecutableDirective *
8696 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8697 const auto *CS = D.getInnermostCapturedStmt();
8699 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8700 const Stmt *ChildStmt =
8701 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8703 if (const auto *NestedDir =
8704 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8705 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8706 switch (D.getDirectiveKind()) {
8708 if (isOpenMPDistributeDirective(DKind))
8710 if (DKind == OMPD_teams) {
8711 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8712 /*IgnoreCaptured=*/true);
8715 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8716 if (const auto *NND =
8717 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8718 DKind = NND->getDirectiveKind();
8719 if (isOpenMPDistributeDirective(DKind))
8724 case OMPD_target_teams:
8725 if (isOpenMPDistributeDirective(DKind))
8728 case OMPD_target_parallel:
8729 case OMPD_target_simd:
8730 case OMPD_target_parallel_for:
8731 case OMPD_target_parallel_for_simd:
8733 case OMPD_target_teams_distribute:
8734 case OMPD_target_teams_distribute_simd:
8735 case OMPD_target_teams_distribute_parallel_for:
8736 case OMPD_target_teams_distribute_parallel_for_simd:
8739 case OMPD_parallel_for:
8740 case OMPD_parallel_master:
8741 case OMPD_parallel_sections:
8743 case OMPD_parallel_for_simd:
8745 case OMPD_cancellation_point:
8747 case OMPD_threadprivate:
8756 case OMPD_taskyield:
8759 case OMPD_taskgroup:
8763 case OMPD_target_data:
8764 case OMPD_target_exit_data:
8765 case OMPD_target_enter_data:
8766 case OMPD_distribute:
8767 case OMPD_distribute_simd:
8768 case OMPD_distribute_parallel_for:
8769 case OMPD_distribute_parallel_for_simd:
8770 case OMPD_teams_distribute:
8771 case OMPD_teams_distribute_simd:
8772 case OMPD_teams_distribute_parallel_for:
8773 case OMPD_teams_distribute_parallel_for_simd:
8774 case OMPD_target_update:
8775 case OMPD_declare_simd:
8776 case OMPD_declare_variant:
8777 case OMPD_declare_target:
8778 case OMPD_end_declare_target:
8779 case OMPD_declare_reduction:
8780 case OMPD_declare_mapper:
8782 case OMPD_taskloop_simd:
8783 case OMPD_master_taskloop:
8784 case OMPD_master_taskloop_simd:
8785 case OMPD_parallel_master_taskloop:
8786 case OMPD_parallel_master_taskloop_simd:
8789 llvm_unreachable("Unexpected directive.");
8796 /// Emit the user-defined mapper function. The code generation follows the
8797 /// pattern in the example below.
8799 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8800 /// void *base, void *begin,
8801 /// int64_t size, int64_t type) {
8802 /// // Allocate space for an array section first.
8803 /// if (size > 1 && !maptype.IsDelete)
8804 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8805 /// size*sizeof(Ty), clearToFrom(type));
8807 /// for (unsigned i = 0; i < size; i++) {
8808 /// // For each component specified by this mapper:
8809 /// for (auto c : all_components) {
8810 /// if (c.hasMapper())
8811 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
8814 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
8815 /// c.arg_begin, c.arg_size, c.arg_type);
8818 /// // Delete the array section.
8819 /// if (size > 1 && maptype.IsDelete)
8820 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8821 /// size*sizeof(Ty), clearToFrom(type));
8824 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
8825 CodeGenFunction *CGF) {
8826 if (UDMMap.count(D) > 0)
8828 ASTContext &C = CGM.getContext();
8829 QualType Ty = D->getType();
8830 QualType PtrTy = C.getPointerType(Ty).withRestrict();
8831 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
8832 auto *MapperVarDecl =
8833 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
8834 SourceLocation Loc = D->getLocation();
8835 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
8837 // Prepare mapper function arguments and attributes.
8838 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8839 C.VoidPtrTy, ImplicitParamDecl::Other);
8840 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
8841 ImplicitParamDecl::Other);
8842 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
8843 C.VoidPtrTy, ImplicitParamDecl::Other);
8844 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8845 ImplicitParamDecl::Other);
8846 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
8847 ImplicitParamDecl::Other);
8848 FunctionArgList Args;
8849 Args.push_back(&HandleArg);
8850 Args.push_back(&BaseArg);
8851 Args.push_back(&BeginArg);
8852 Args.push_back(&SizeArg);
8853 Args.push_back(&TypeArg);
8854 const CGFunctionInfo &FnInfo =
8855 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
8856 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
8857 SmallString<64> TyStr;
8858 llvm::raw_svector_ostream Out(TyStr);
8859 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
8860 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
8861 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
8862 Name, &CGM.getModule());
8863 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
8864 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
8865 // Start the mapper function code generation.
8866 CodeGenFunction MapperCGF(CGM);
8867 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
8868 // Compute the starting and end addreses of array elements.
8869 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
8870 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
8871 C.getPointerType(Int64Ty), Loc);
8872 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
8873 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
8874 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
8875 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size);
8876 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
8877 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
8878 C.getPointerType(Int64Ty), Loc);
8879 // Prepare common arguments for array initiation and deletion.
8880 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
8881 MapperCGF.GetAddrOfLocalVar(&HandleArg),
8882 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8883 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
8884 MapperCGF.GetAddrOfLocalVar(&BaseArg),
8885 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8886 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
8887 MapperCGF.GetAddrOfLocalVar(&BeginArg),
8888 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
8890 // Emit array initiation if this is an array section and \p MapType indicates
8891 // that memory allocation is required.
8892 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
8893 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
8894 ElementSize, HeadBB, /*IsInit=*/true);
8896 // Emit a for loop to iterate through SizeArg of elements and map all of them.
8898 // Emit the loop header block.
8899 MapperCGF.EmitBlock(HeadBB);
8900 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
8901 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
8902 // Evaluate whether the initial condition is satisfied.
8903 llvm::Value *IsEmpty =
8904 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
8905 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8906 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
8908 // Emit the loop body block.
8909 MapperCGF.EmitBlock(BodyBB);
8910 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
8911 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
8912 PtrPHI->addIncoming(PtrBegin, EntryBB);
8913 Address PtrCurrent =
8914 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
8916 .alignmentOfArrayElement(ElementSize));
8917 // Privatize the declared variable of mapper to be the current array element.
8918 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
8919 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() {
8921 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>())
8922 .getAddress(MapperCGF);
8924 (void)Scope.Privatize();
8926 // Get map clause information. Fill up the arrays with all mapped variables.
8927 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8928 MappableExprsHandler::MapValuesArrayTy Pointers;
8929 MappableExprsHandler::MapValuesArrayTy Sizes;
8930 MappableExprsHandler::MapFlagsArrayTy MapTypes;
8931 MappableExprsHandler MEHandler(*D, MapperCGF);
8932 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
8934 // Call the runtime API __tgt_mapper_num_components to get the number of
8935 // pre-existing components.
8936 llvm::Value *OffloadingArgs[] = {Handle};
8937 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
8938 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs);
8939 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
8941 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
8943 // Fill up the runtime mapper handle for all components.
8944 for (unsigned I = 0; I < BasePointers.size(); ++I) {
8945 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
8946 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8947 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
8948 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
8949 llvm::Value *CurSizeArg = Sizes[I];
8951 // Extract the MEMBER_OF field from the map type.
8952 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
8953 MapperCGF.EmitBlock(MemberBB);
8954 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
8955 llvm::Value *Member = MapperCGF.Builder.CreateAnd(
8957 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
8958 llvm::BasicBlock *MemberCombineBB =
8959 MapperCGF.createBasicBlock("omp.member.combine");
8960 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type");
8961 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member);
8962 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB);
8963 // Add the number of pre-existing components to the MEMBER_OF field if it
8965 MapperCGF.EmitBlock(MemberCombineBB);
8966 llvm::Value *CombinedMember =
8967 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8968 // Do nothing if it is not a member of previous components.
8969 MapperCGF.EmitBlock(TypeBB);
8970 llvm::PHINode *MemberMapType =
8971 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype");
8972 MemberMapType->addIncoming(OriMapType, MemberBB);
8973 MemberMapType->addIncoming(CombinedMember, MemberCombineBB);
8975 // Combine the map type inherited from user-defined mapper with that
8976 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
8977 // bits of the \a MapType, which is the input argument of the mapper
8978 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
8979 // bits of MemberMapType.
8980 // [OpenMP 5.0], 1.2.6. map-type decay.
8981 // | alloc | to | from | tofrom | release | delete
8982 // ----------------------------------------------------------
8983 // alloc | alloc | alloc | alloc | alloc | release | delete
8984 // to | alloc | to | alloc | to | release | delete
8985 // from | alloc | alloc | from | from | release | delete
8986 // tofrom | alloc | to | from | tofrom | release | delete
8987 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
8989 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
8990 MappableExprsHandler::OMP_MAP_FROM));
8991 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
8992 llvm::BasicBlock *AllocElseBB =
8993 MapperCGF.createBasicBlock("omp.type.alloc.else");
8994 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
8995 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
8996 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
8997 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
8998 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
8999 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9000 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9001 MapperCGF.EmitBlock(AllocBB);
9002 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9004 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9005 MappableExprsHandler::OMP_MAP_FROM)));
9006 MapperCGF.Builder.CreateBr(EndBB);
9007 MapperCGF.EmitBlock(AllocElseBB);
9008 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9010 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
9011 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9012 // In case of to, clear OMP_MAP_FROM.
9013 MapperCGF.EmitBlock(ToBB);
9014 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9016 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
9017 MapperCGF.Builder.CreateBr(EndBB);
9018 MapperCGF.EmitBlock(ToElseBB);
9019 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9021 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
9022 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9023 // In case of from, clear OMP_MAP_TO.
9024 MapperCGF.EmitBlock(FromBB);
9025 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9027 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
9028 // In case of tofrom, do nothing.
9029 MapperCGF.EmitBlock(EndBB);
9030 llvm::PHINode *CurMapType =
9031 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9032 CurMapType->addIncoming(AllocMapType, AllocBB);
9033 CurMapType->addIncoming(ToMapType, ToBB);
9034 CurMapType->addIncoming(FromMapType, FromBB);
9035 CurMapType->addIncoming(MemberMapType, ToElseBB);
9037 // TODO: call the corresponding mapper function if a user-defined mapper is
9038 // associated with this map clause.
9039 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9041 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9042 CurSizeArg, CurMapType};
9043 MapperCGF.EmitRuntimeCall(
9044 createRuntimeFunction(OMPRTL__tgt_push_mapper_component),
9048 // Update the pointer to point to the next element that needs to be mapped,
9049 // and check whether we have mapped all elements.
9050 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9051 PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9052 PtrPHI->addIncoming(PtrNext, BodyBB);
9053 llvm::Value *IsDone =
9054 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9055 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9056 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9058 MapperCGF.EmitBlock(ExitBB);
9059 // Emit array deletion if this is an array section and \p MapType indicates
9060 // that deletion is required.
9061 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9062 ElementSize, DoneBB, /*IsInit=*/false);
9064 // Emit the function exit block.
9065 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9066 MapperCGF.FinishFunction();
9067 UDMMap.try_emplace(D, Fn);
9069 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9070 Decls.second.push_back(D);
9074 /// Emit the array initialization or deletion portion for user-defined mapper
9075 /// code generation. First, it evaluates whether an array section is mapped and
9076 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9077 /// true, and \a MapType indicates to not delete this array, array
9078 /// initialization code is generated. If \a IsInit is false, and \a MapType
9079 /// indicates to not this array, array deletion code is generated.
9080 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9081 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9082 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9083 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) {
9084 StringRef Prefix = IsInit ? ".init" : ".del";
9086 // Evaluate if this is an array section.
9087 llvm::BasicBlock *IsDeleteBB =
9088 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete");
9089 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix);
9090 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE(
9091 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9092 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB);
9094 // Evaluate if we are going to delete this section.
9095 MapperCGF.EmitBlock(IsDeleteBB);
9096 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9098 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
9099 llvm::Value *DeleteCond;
9101 DeleteCond = MapperCGF.Builder.CreateIsNull(
9102 DeleteBit, "omp.array" + Prefix + ".delete");
9104 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9105 DeleteBit, "omp.array" + Prefix + ".delete");
9107 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB);
9109 MapperCGF.EmitBlock(BodyBB);
9110 // Get the array size by multiplying element size and element number (i.e., \p
9112 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9113 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9114 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9115 // memory allocation/deletion purpose only.
9116 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9118 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
9119 MappableExprsHandler::OMP_MAP_FROM)));
9120 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9122 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
9123 MapperCGF.EmitRuntimeCall(
9124 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs);
9127 void CGOpenMPRuntime::emitTargetNumIterationsCall(
9128 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9129 llvm::Value *DeviceID,
9130 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9131 const OMPLoopDirective &D)>
9133 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9134 const OMPExecutableDirective *TD = &D;
9135 // Get nested teams distribute kind directive, if any.
9136 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9137 TD = getNestedDistributeDirective(CGM.getContext(), D);
9140 const auto *LD = cast<OMPLoopDirective>(TD);
9141 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
9142 PrePostActionTy &) {
9143 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
9144 llvm::Value *Args[] = {DeviceID, NumIterations};
9145 CGF.EmitRuntimeCall(
9146 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
9149 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
9152 void CGOpenMPRuntime::emitTargetCall(
9153 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9154 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9156 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9157 const OMPLoopDirective &D)>
9159 if (!CGF.HaveInsertPoint())
9162 assert(OutlinedFn && "Invalid outlined function!");
9164 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
9165 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9166 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9167 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9168 PrePostActionTy &) {
9169 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9171 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9173 CodeGenFunction::OMPTargetDataInfo InputInfo;
9174 llvm::Value *MapTypesArray = nullptr;
9175 // Fill up the pointer arrays and transfer execution to the device.
9176 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
9177 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
9178 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9179 // On top of the arrays that were filled up, the target offloading call
9180 // takes as arguments the device id as well as the host pointer. The host
9181 // pointer is used by the runtime library to identify the current target
9182 // region, so it only has to be unique and not necessarily point to
9183 // anything. It could be the pointer to the outlined function that
9184 // implements the target region, but we aren't using that so that the
9185 // compiler doesn't need to keep that, and could therefore inline the host
9186 // function if proven worthwhile during optimization.
9188 // From this point on, we need to have an ID of the target region defined.
9189 assert(OutlinedFnID && "Invalid outlined function ID!");
9191 // Emit device ID if any.
9192 llvm::Value *DeviceID;
9194 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9195 CGF.Int64Ty, /*isSigned=*/true);
9197 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9200 // Emit the number of elements in the offloading arrays.
9201 llvm::Value *PointerNum =
9202 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9204 // Return value of the runtime offloading call.
9205 llvm::Value *Return;
9207 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9208 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9210 // Emit tripcount for the target loop-based directive.
9211 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
9213 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9214 // The target region is an outlined function launched by the runtime
9215 // via calls __tgt_target() or __tgt_target_teams().
9217 // __tgt_target() launches a target region with one team and one thread,
9218 // executing a serial region. This master thread may in turn launch
9219 // more threads within its team upon encountering a parallel region,
9220 // however, no additional teams can be launched on the device.
9222 // __tgt_target_teams() launches a target region with one or more teams,
9223 // each with one or more threads. This call is required for target
9224 // constructs such as:
9226 // 'target' / 'teams'
9227 // 'target teams distribute parallel for'
9228 // 'target parallel'
9231 // Note that on the host and CPU targets, the runtime implementation of
9232 // these calls simply call the outlined function without forking threads.
9233 // The outlined functions themselves have runtime calls to
9234 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9235 // the compiler in emitTeamsCall() and emitParallelCall().
9237 // In contrast, on the NVPTX target, the implementation of
9238 // __tgt_target_teams() launches a GPU kernel with the requested number
9239 // of teams and threads so no additional calls to the runtime are required.
9241 // If we have NumTeams defined this means that we have an enclosed teams
9242 // region. Therefore we also expect to have NumThreads defined. These two
9243 // values should be defined in the presence of a teams directive,
9244 // regardless of having any clauses associated. If the user is using teams
9245 // but no clauses, these two values will be the default that should be
9246 // passed to the runtime library - a 32-bit integer with the value zero.
9247 assert(NumThreads && "Thread limit expression should be available along "
9248 "with number of teams.");
9249 llvm::Value *OffloadingArgs[] = {DeviceID,
9252 InputInfo.BasePointersArray.getPointer(),
9253 InputInfo.PointersArray.getPointer(),
9254 InputInfo.SizesArray.getPointer(),
9258 Return = CGF.EmitRuntimeCall(
9259 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
9260 : OMPRTL__tgt_target_teams),
9263 llvm::Value *OffloadingArgs[] = {DeviceID,
9266 InputInfo.BasePointersArray.getPointer(),
9267 InputInfo.PointersArray.getPointer(),
9268 InputInfo.SizesArray.getPointer(),
9270 Return = CGF.EmitRuntimeCall(
9271 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
9272 : OMPRTL__tgt_target),
9276 // Check the error code and execute the host version if required.
9277 llvm::BasicBlock *OffloadFailedBlock =
9278 CGF.createBasicBlock("omp_offload.failed");
9279 llvm::BasicBlock *OffloadContBlock =
9280 CGF.createBasicBlock("omp_offload.cont");
9281 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9282 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9284 CGF.EmitBlock(OffloadFailedBlock);
9285 if (RequiresOuterTask) {
9286 CapturedVars.clear();
9287 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9289 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9290 CGF.EmitBranch(OffloadContBlock);
9292 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9295 // Notify that the host version must be executed.
9296 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
9297 RequiresOuterTask](CodeGenFunction &CGF,
9298 PrePostActionTy &) {
9299 if (RequiresOuterTask) {
9300 CapturedVars.clear();
9301 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9303 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9306 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9307 &CapturedVars, RequiresOuterTask,
9308 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9309 // Fill up the arrays with all the captured variables.
9310 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9311 MappableExprsHandler::MapValuesArrayTy Pointers;
9312 MappableExprsHandler::MapValuesArrayTy Sizes;
9313 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9315 // Get mappable expression information.
9316 MappableExprsHandler MEHandler(D, CGF);
9317 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9319 auto RI = CS.getCapturedRecordDecl()->field_begin();
9320 auto CV = CapturedVars.begin();
9321 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9322 CE = CS.capture_end();
9323 CI != CE; ++CI, ++RI, ++CV) {
9324 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
9325 MappableExprsHandler::MapValuesArrayTy CurPointers;
9326 MappableExprsHandler::MapValuesArrayTy CurSizes;
9327 MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
9328 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9330 // VLA sizes are passed to the outlined region by copy and do not have map
9331 // information associated.
9332 if (CI->capturesVariableArrayType()) {
9333 CurBasePointers.push_back(*CV);
9334 CurPointers.push_back(*CV);
9335 CurSizes.push_back(CGF.Builder.CreateIntCast(
9336 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9337 // Copy to the device as an argument. No need to retrieve it.
9338 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
9339 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
9340 MappableExprsHandler::OMP_MAP_IMPLICIT);
9342 // If we have any information in the map clause, we use it, otherwise we
9343 // just do a default mapping.
9344 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
9345 CurSizes, CurMapTypes, PartialStruct);
9346 if (CurBasePointers.empty())
9347 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
9348 CurPointers, CurSizes, CurMapTypes);
9349 // Generate correct mapping for variables captured by reference in
9351 if (CI->capturesVariable())
9352 MEHandler.generateInfoForLambdaCaptures(
9353 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
9354 CurMapTypes, LambdaPointers);
9356 // We expect to have at least an element of information for this capture.
9357 assert(!CurBasePointers.empty() &&
9358 "Non-existing map pointer for capture!");
9359 assert(CurBasePointers.size() == CurPointers.size() &&
9360 CurBasePointers.size() == CurSizes.size() &&
9361 CurBasePointers.size() == CurMapTypes.size() &&
9362 "Inconsistent map information sizes!");
9364 // If there is an entry in PartialStruct it means we have a struct with
9365 // individual members mapped. Emit an extra combined entry.
9366 if (PartialStruct.Base.isValid())
9367 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
9368 CurMapTypes, PartialStruct);
9370 // We need to append the results of this capture to what we already have.
9371 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
9372 Pointers.append(CurPointers.begin(), CurPointers.end());
9373 Sizes.append(CurSizes.begin(), CurSizes.end());
9374 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
9376 // Adjust MEMBER_OF flags for the lambdas captures.
9377 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
9378 Pointers, MapTypes);
9379 // Map other list items in the map clause which are not captured variables
9380 // but "declare target link" global variables.
9381 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
9384 TargetDataInfo Info;
9385 // Fill up the arrays and create the arguments.
9386 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9387 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9388 Info.PointersArray, Info.SizesArray,
9389 Info.MapTypesArray, Info);
9390 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9391 InputInfo.BasePointersArray =
9392 Address(Info.BasePointersArray, CGM.getPointerAlign());
9393 InputInfo.PointersArray =
9394 Address(Info.PointersArray, CGM.getPointerAlign());
9395 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
9396 MapTypesArray = Info.MapTypesArray;
9397 if (RequiresOuterTask)
9398 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9400 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9403 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
9404 CodeGenFunction &CGF, PrePostActionTy &) {
9405 if (RequiresOuterTask) {
9406 CodeGenFunction::OMPTargetDataInfo InputInfo;
9407 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9409 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9413 // If we have a target function ID it means that we need to support
9414 // offloading, otherwise, just execute on the host. We need to execute on host
9415 // regardless of the conditional in the if clause if, e.g., the user do not
9416 // specify target triples.
9419 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9421 RegionCodeGenTy ThenRCG(TargetThenGen);
9425 RegionCodeGenTy ElseRCG(TargetElseGen);
9430 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9431 StringRef ParentName) {
9435 // Codegen OMP target directives that offload compute to the device.
9436 bool RequiresDeviceCodegen =
9437 isa<OMPExecutableDirective>(S) &&
9438 isOpenMPTargetExecutionDirective(
9439 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9441 if (RequiresDeviceCodegen) {
9442 const auto &E = *cast<OMPExecutableDirective>(S);
9446 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9449 // Is this a target region that should not be emitted as an entry point? If
9450 // so just signal we are done with this target region.
9451 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9455 switch (E.getDirectiveKind()) {
9457 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9458 cast<OMPTargetDirective>(E));
9460 case OMPD_target_parallel:
9461 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9462 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9464 case OMPD_target_teams:
9465 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9466 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9468 case OMPD_target_teams_distribute:
9469 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9470 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9472 case OMPD_target_teams_distribute_simd:
9473 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9474 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9476 case OMPD_target_parallel_for:
9477 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9478 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9480 case OMPD_target_parallel_for_simd:
9481 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9482 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9484 case OMPD_target_simd:
9485 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9486 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9488 case OMPD_target_teams_distribute_parallel_for:
9489 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9491 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9493 case OMPD_target_teams_distribute_parallel_for_simd:
9495 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9497 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9501 case OMPD_parallel_for:
9502 case OMPD_parallel_master:
9503 case OMPD_parallel_sections:
9505 case OMPD_parallel_for_simd:
9507 case OMPD_cancellation_point:
9509 case OMPD_threadprivate:
9518 case OMPD_taskyield:
9521 case OMPD_taskgroup:
9525 case OMPD_target_data:
9526 case OMPD_target_exit_data:
9527 case OMPD_target_enter_data:
9528 case OMPD_distribute:
9529 case OMPD_distribute_simd:
9530 case OMPD_distribute_parallel_for:
9531 case OMPD_distribute_parallel_for_simd:
9532 case OMPD_teams_distribute:
9533 case OMPD_teams_distribute_simd:
9534 case OMPD_teams_distribute_parallel_for:
9535 case OMPD_teams_distribute_parallel_for_simd:
9536 case OMPD_target_update:
9537 case OMPD_declare_simd:
9538 case OMPD_declare_variant:
9539 case OMPD_declare_target:
9540 case OMPD_end_declare_target:
9541 case OMPD_declare_reduction:
9542 case OMPD_declare_mapper:
9544 case OMPD_taskloop_simd:
9545 case OMPD_master_taskloop:
9546 case OMPD_master_taskloop_simd:
9547 case OMPD_parallel_master_taskloop:
9548 case OMPD_parallel_master_taskloop_simd:
9551 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9556 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9557 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9560 scanForTargetRegionsFunctions(
9561 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9565 // If this is a lambda function, look into its body.
9566 if (const auto *L = dyn_cast<LambdaExpr>(S))
9569 // Keep looking for target regions recursively.
9570 for (const Stmt *II : S->children())
9571 scanForTargetRegionsFunctions(II, ParentName);
9574 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9575 // If emitting code for the host, we do not process FD here. Instead we do
9576 // the normal code generation.
9577 if (!CGM.getLangOpts().OpenMPIsDevice) {
9578 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) {
9579 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9580 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9581 // Do not emit device_type(nohost) functions for the host.
9582 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9588 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9589 // Try to detect target regions in the function.
9590 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9591 StringRef Name = CGM.getMangledName(GD);
9592 scanForTargetRegionsFunctions(FD->getBody(), Name);
9593 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9594 OMPDeclareTargetDeclAttr::getDeviceType(FD);
9595 // Do not emit device_type(nohost) functions for the host.
9596 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9600 // Do not to emit function if it is not marked as declare target.
9601 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9602 AlreadyEmittedTargetDecls.count(VD) == 0;
9605 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9606 if (!CGM.getLangOpts().OpenMPIsDevice)
9609 // Check if there are Ctors/Dtors in this declaration and look for target
9610 // regions in it. We use the complete variant to produce the kernel name
9612 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9613 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9614 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9615 StringRef ParentName =
9616 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9617 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9619 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9620 StringRef ParentName =
9621 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9622 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9626 // Do not to emit variable if it is not marked as declare target.
9627 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9628 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9629 cast<VarDecl>(GD.getDecl()));
9630 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9631 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9632 HasRequiresUnifiedSharedMemory)) {
9633 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9640 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9641 const VarDecl *VD) {
9642 assert(VD->getType().isConstant(CGM.getContext()) &&
9643 "Expected constant variable.");
9645 llvm::Constant *Addr;
9646 llvm::GlobalValue::LinkageTypes Linkage;
9647 QualType Ty = VD->getType();
9648 SmallString<128> Buffer;
9653 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9655 llvm::raw_svector_ostream OS(Buffer);
9656 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9657 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9660 Linkage = llvm::GlobalValue::InternalLinkage;
9662 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9663 getDefaultFirstprivateAddressSpace());
9664 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9665 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9666 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9667 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9668 VarName, Addr, VarSize,
9669 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9673 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9674 llvm::Constant *Addr) {
9675 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9676 !CGM.getLangOpts().OpenMPIsDevice)
9678 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9679 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9681 if (CGM.getLangOpts().OpenMPIsDevice) {
9682 // Register non-target variables being emitted in device code (debug info
9684 StringRef VarName = CGM.getMangledName(VD);
9685 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9689 // Register declare target variables.
9690 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9693 llvm::GlobalValue::LinkageTypes Linkage;
9695 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9696 !HasRequiresUnifiedSharedMemory) {
9697 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9698 VarName = CGM.getMangledName(VD);
9699 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9700 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9701 assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9703 VarSize = CharUnits::Zero();
9705 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9706 // Temp solution to prevent optimizations of the internal variables.
9707 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9708 std::string RefName = getName({VarName, "ref"});
9709 if (!CGM.GetGlobalValue(RefName)) {
9710 llvm::Constant *AddrRef =
9711 getOrCreateInternalVariable(Addr->getType(), RefName);
9712 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9713 GVAddrRef->setConstant(/*Val=*/true);
9714 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9715 GVAddrRef->setInitializer(Addr);
9716 CGM.addCompilerUsedGlobal(GVAddrRef);
9720 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9721 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9722 HasRequiresUnifiedSharedMemory)) &&
9723 "Declare target attribute must link or to with unified memory.");
9724 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9725 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9727 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9729 if (CGM.getLangOpts().OpenMPIsDevice) {
9730 VarName = Addr->getName();
9733 VarName = getAddrOfDeclareTargetVar(VD).getName();
9734 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9736 VarSize = CGM.getPointerSize();
9737 Linkage = llvm::GlobalValue::WeakAnyLinkage;
9740 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9741 VarName, Addr, VarSize, Flags, Linkage);
9744 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9745 if (isa<FunctionDecl>(GD.getDecl()) ||
9746 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9747 return emitTargetFunctions(GD);
9749 return emitTargetGlobalVariable(GD);
9752 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9753 for (const VarDecl *VD : DeferredGlobalVariables) {
9754 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9755 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9758 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9759 !HasRequiresUnifiedSharedMemory) {
9762 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9763 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9764 HasRequiresUnifiedSharedMemory)) &&
9765 "Expected link clause or to clause with unified memory.");
9766 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9771 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9772 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9773 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9774 " Expected target-based directive.");
9777 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9778 const OMPRequiresDecl *D) {
9779 for (const OMPClause *Clause : D->clauselists()) {
9780 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9781 HasRequiresUnifiedSharedMemory = true;
9787 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9789 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9791 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9792 switch(A->getAllocatorType()) {
9793 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9794 // Not supported, fallback to the default mem space.
9795 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9796 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9797 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9798 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9799 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9800 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9801 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9802 AS = LangAS::Default;
9804 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9805 llvm_unreachable("Expected predefined allocator for the variables with the "
9811 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9812 return HasRequiresUnifiedSharedMemory;
9815 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9818 if (CGM.getLangOpts().OpenMPIsDevice) {
9819 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9820 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9824 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9825 if (CGM.getLangOpts().OpenMPIsDevice)
9826 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9829 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9830 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9833 const auto *D = cast<FunctionDecl>(GD.getDecl());
9834 // Do not to emit function if it is marked as declare target as it was already
9836 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9837 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
9838 if (auto *F = dyn_cast_or_null<llvm::Function>(
9839 CGM.GetGlobalValue(CGM.getMangledName(GD))))
9840 return !F->isDeclaration();
9846 return !AlreadyEmittedTargetDecls.insert(D).second;
9849 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9850 // If we don't have entries or if we are emitting code for the device, we
9851 // don't need to do anything.
9852 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9853 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9854 (OffloadEntriesInfoManager.empty() &&
9855 !HasEmittedDeclareTargetRegion &&
9856 !HasEmittedTargetRegion))
9859 // Create and register the function that handles the requires directives.
9860 ASTContext &C = CGM.getContext();
9862 llvm::Function *RequiresRegFn;
9864 CodeGenFunction CGF(CGM);
9865 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9866 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9867 std::string ReqName = getName({"omp_offloading", "requires_reg"});
9868 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9869 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9870 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9871 // TODO: check for other requires clauses.
9872 // The requires directive takes effect only when a target region is
9873 // present in the compilation unit. Otherwise it is ignored and not
9874 // passed to the runtime. This avoids the runtime from throwing an error
9875 // for mismatching requires clauses across compilation units that don't
9876 // contain at least 1 target region.
9877 assert((HasEmittedTargetRegion ||
9878 HasEmittedDeclareTargetRegion ||
9879 !OffloadEntriesInfoManager.empty()) &&
9880 "Target or declare target region expected.");
9881 if (HasRequiresUnifiedSharedMemory)
9882 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9883 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9884 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9885 CGF.FinishFunction();
9887 return RequiresRegFn;
9890 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9891 const OMPExecutableDirective &D,
9893 llvm::Function *OutlinedFn,
9894 ArrayRef<llvm::Value *> CapturedVars) {
9895 if (!CGF.HaveInsertPoint())
9898 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9899 CodeGenFunction::RunCleanupsScope Scope(CGF);
9901 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9902 llvm::Value *Args[] = {
9904 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9905 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9906 llvm::SmallVector<llvm::Value *, 16> RealArgs;
9907 RealArgs.append(std::begin(Args), std::end(Args));
9908 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9910 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9911 CGF.EmitRuntimeCall(RTLFn, RealArgs);
9914 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9915 const Expr *NumTeams,
9916 const Expr *ThreadLimit,
9917 SourceLocation Loc) {
9918 if (!CGF.HaveInsertPoint())
9921 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9923 llvm::Value *NumTeamsVal =
9925 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9926 CGF.CGM.Int32Ty, /* isSigned = */ true)
9927 : CGF.Builder.getInt32(0);
9929 llvm::Value *ThreadLimitVal =
9931 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9932 CGF.CGM.Int32Ty, /* isSigned = */ true)
9933 : CGF.Builder.getInt32(0);
9935 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9936 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9938 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9942 void CGOpenMPRuntime::emitTargetDataCalls(
9943 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9944 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9945 if (!CGF.HaveInsertPoint())
9948 // Action used to replace the default codegen action and turn privatization
9950 PrePostActionTy NoPrivAction;
9952 // Generate the code for the opening of the data environment. Capture all the
9953 // arguments of the runtime call by reference because they are used in the
9954 // closing of the region.
9955 auto &&BeginThenGen = [this, &D, Device, &Info,
9956 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9957 // Fill up the arrays with all the mapped variables.
9958 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9959 MappableExprsHandler::MapValuesArrayTy Pointers;
9960 MappableExprsHandler::MapValuesArrayTy Sizes;
9961 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9963 // Get map clause information.
9964 MappableExprsHandler MCHandler(D, CGF);
9965 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9967 // Fill up the arrays and create the arguments.
9968 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9970 llvm::Value *BasePointersArrayArg = nullptr;
9971 llvm::Value *PointersArrayArg = nullptr;
9972 llvm::Value *SizesArrayArg = nullptr;
9973 llvm::Value *MapTypesArrayArg = nullptr;
9974 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9975 SizesArrayArg, MapTypesArrayArg, Info);
9977 // Emit device ID if any.
9978 llvm::Value *DeviceID = nullptr;
9980 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9981 CGF.Int64Ty, /*isSigned=*/true);
9983 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9986 // Emit the number of elements in the offloading arrays.
9987 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9989 llvm::Value *OffloadingArgs[] = {
9990 DeviceID, PointerNum, BasePointersArrayArg,
9991 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9992 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9995 // If device pointer privatization is required, emit the body of the region
9996 // here. It will have to be duplicated: with and without privatization.
9997 if (!Info.CaptureDeviceAddrMap.empty())
10001 // Generate code for the closing of the data region.
10002 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
10003 PrePostActionTy &) {
10004 assert(Info.isValid() && "Invalid data environment closing arguments.");
10006 llvm::Value *BasePointersArrayArg = nullptr;
10007 llvm::Value *PointersArrayArg = nullptr;
10008 llvm::Value *SizesArrayArg = nullptr;
10009 llvm::Value *MapTypesArrayArg = nullptr;
10010 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
10011 SizesArrayArg, MapTypesArrayArg, Info);
10013 // Emit device ID if any.
10014 llvm::Value *DeviceID = nullptr;
10016 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10017 CGF.Int64Ty, /*isSigned=*/true);
10019 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10022 // Emit the number of elements in the offloading arrays.
10023 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10025 llvm::Value *OffloadingArgs[] = {
10026 DeviceID, PointerNum, BasePointersArrayArg,
10027 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
10028 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
10032 // If we need device pointer privatization, we need to emit the body of the
10033 // region with no privatization in the 'else' branch of the conditional.
10034 // Otherwise, we don't have to do anything.
10035 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10036 PrePostActionTy &) {
10037 if (!Info.CaptureDeviceAddrMap.empty()) {
10038 CodeGen.setAction(NoPrivAction);
10043 // We don't have to do anything to close the region if the if clause evaluates
10045 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10048 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10050 RegionCodeGenTy RCG(BeginThenGen);
10054 // If we don't require privatization of device pointers, we emit the body in
10055 // between the runtime calls. This avoids duplicating the body code.
10056 if (Info.CaptureDeviceAddrMap.empty()) {
10057 CodeGen.setAction(NoPrivAction);
10062 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10064 RegionCodeGenTy RCG(EndThenGen);
10069 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10070 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10071 const Expr *Device) {
10072 if (!CGF.HaveInsertPoint())
10075 assert((isa<OMPTargetEnterDataDirective>(D) ||
10076 isa<OMPTargetExitDataDirective>(D) ||
10077 isa<OMPTargetUpdateDirective>(D)) &&
10078 "Expecting either target enter, exit data, or update directives.");
10080 CodeGenFunction::OMPTargetDataInfo InputInfo;
10081 llvm::Value *MapTypesArray = nullptr;
10082 // Generate the code for the opening of the data environment.
10083 auto &&ThenGen = [this, &D, Device, &InputInfo,
10084 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10085 // Emit device ID if any.
10086 llvm::Value *DeviceID = nullptr;
10088 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10089 CGF.Int64Ty, /*isSigned=*/true);
10091 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10094 // Emit the number of elements in the offloading arrays.
10095 llvm::Constant *PointerNum =
10096 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10098 llvm::Value *OffloadingArgs[] = {DeviceID,
10100 InputInfo.BasePointersArray.getPointer(),
10101 InputInfo.PointersArray.getPointer(),
10102 InputInfo.SizesArray.getPointer(),
10105 // Select the right runtime function call for each expected standalone
10107 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10108 OpenMPRTLFunction RTLFn;
10109 switch (D.getDirectiveKind()) {
10110 case OMPD_target_enter_data:
10111 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
10112 : OMPRTL__tgt_target_data_begin;
10114 case OMPD_target_exit_data:
10115 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
10116 : OMPRTL__tgt_target_data_end;
10118 case OMPD_target_update:
10119 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
10120 : OMPRTL__tgt_target_data_update;
10122 case OMPD_parallel:
10124 case OMPD_parallel_for:
10125 case OMPD_parallel_master:
10126 case OMPD_parallel_sections:
10127 case OMPD_for_simd:
10128 case OMPD_parallel_for_simd:
10130 case OMPD_cancellation_point:
10132 case OMPD_threadprivate:
10133 case OMPD_allocate:
10136 case OMPD_sections:
10140 case OMPD_critical:
10141 case OMPD_taskyield:
10143 case OMPD_taskwait:
10144 case OMPD_taskgroup:
10148 case OMPD_target_data:
10149 case OMPD_distribute:
10150 case OMPD_distribute_simd:
10151 case OMPD_distribute_parallel_for:
10152 case OMPD_distribute_parallel_for_simd:
10153 case OMPD_teams_distribute:
10154 case OMPD_teams_distribute_simd:
10155 case OMPD_teams_distribute_parallel_for:
10156 case OMPD_teams_distribute_parallel_for_simd:
10157 case OMPD_declare_simd:
10158 case OMPD_declare_variant:
10159 case OMPD_declare_target:
10160 case OMPD_end_declare_target:
10161 case OMPD_declare_reduction:
10162 case OMPD_declare_mapper:
10163 case OMPD_taskloop:
10164 case OMPD_taskloop_simd:
10165 case OMPD_master_taskloop:
10166 case OMPD_master_taskloop_simd:
10167 case OMPD_parallel_master_taskloop:
10168 case OMPD_parallel_master_taskloop_simd:
10170 case OMPD_target_simd:
10171 case OMPD_target_teams_distribute:
10172 case OMPD_target_teams_distribute_simd:
10173 case OMPD_target_teams_distribute_parallel_for:
10174 case OMPD_target_teams_distribute_parallel_for_simd:
10175 case OMPD_target_teams:
10176 case OMPD_target_parallel:
10177 case OMPD_target_parallel_for:
10178 case OMPD_target_parallel_for_simd:
10179 case OMPD_requires:
10181 llvm_unreachable("Unexpected standalone target data directive.");
10184 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
10187 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
10188 CodeGenFunction &CGF, PrePostActionTy &) {
10189 // Fill up the arrays with all the mapped variables.
10190 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
10191 MappableExprsHandler::MapValuesArrayTy Pointers;
10192 MappableExprsHandler::MapValuesArrayTy Sizes;
10193 MappableExprsHandler::MapFlagsArrayTy MapTypes;
10195 // Get map clause information.
10196 MappableExprsHandler MEHandler(D, CGF);
10197 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
10199 TargetDataInfo Info;
10200 // Fill up the arrays and create the arguments.
10201 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
10202 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
10203 Info.PointersArray, Info.SizesArray,
10204 Info.MapTypesArray, Info);
10205 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10206 InputInfo.BasePointersArray =
10207 Address(Info.BasePointersArray, CGM.getPointerAlign());
10208 InputInfo.PointersArray =
10209 Address(Info.PointersArray, CGM.getPointerAlign());
10210 InputInfo.SizesArray =
10211 Address(Info.SizesArray, CGM.getPointerAlign());
10212 MapTypesArray = Info.MapTypesArray;
10213 if (D.hasClausesOfKind<OMPDependClause>())
10214 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10216 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10220 emitIfClause(CGF, IfCond, TargetThenGen,
10221 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10223 RegionCodeGenTy ThenRCG(TargetThenGen);
10229 /// Kind of parameter in a function with 'declare simd' directive.
10230 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
10231 /// Attribute set of the parameter.
10232 struct ParamAttrTy {
10233 ParamKindTy Kind = Vector;
10234 llvm::APSInt StrideOrArg;
10235 llvm::APSInt Alignment;
10239 static unsigned evaluateCDTSize(const FunctionDecl *FD,
10240 ArrayRef<ParamAttrTy> ParamAttrs) {
10241 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10242 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10243 // of that clause. The VLEN value must be power of 2.
10244 // In other case the notion of the function`s "characteristic data type" (CDT)
10245 // is used to compute the vector length.
10246 // CDT is defined in the following order:
10247 // a) For non-void function, the CDT is the return type.
10248 // b) If the function has any non-uniform, non-linear parameters, then the
10249 // CDT is the type of the first such parameter.
10250 // c) If the CDT determined by a) or b) above is struct, union, or class
10251 // type which is pass-by-value (except for the type that maps to the
10252 // built-in complex data type), the characteristic data type is int.
10253 // d) If none of the above three cases is applicable, the CDT is int.
10254 // The VLEN is then determined based on the CDT and the size of vector
10255 // register of that ISA for which current vector version is generated. The
10256 // VLEN is computed using the formula below:
10257 // VLEN = sizeof(vector_register) / sizeof(CDT),
10258 // where vector register size specified in section 3.2.1 Registers and the
10259 // Stack Frame of original AMD64 ABI document.
10260 QualType RetType = FD->getReturnType();
10261 if (RetType.isNull())
10263 ASTContext &C = FD->getASTContext();
10265 if (!RetType.isNull() && !RetType->isVoidType()) {
10268 unsigned Offset = 0;
10269 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10270 if (ParamAttrs[Offset].Kind == Vector)
10271 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10274 if (CDT.isNull()) {
10275 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10276 if (ParamAttrs[I + Offset].Kind == Vector) {
10277 CDT = FD->getParamDecl(I)->getType();
10285 CDT = CDT->getCanonicalTypeUnqualified();
10286 if (CDT->isRecordType() || CDT->isUnionType())
10288 return C.getTypeSize(CDT);
10292 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10293 const llvm::APSInt &VLENVal,
10294 ArrayRef<ParamAttrTy> ParamAttrs,
10295 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10298 unsigned VecRegSize;
10300 ISADataTy ISAData[] = {
10314 llvm::SmallVector<char, 2> Masked;
10316 case OMPDeclareSimdDeclAttr::BS_Undefined:
10317 Masked.push_back('N');
10318 Masked.push_back('M');
10320 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10321 Masked.push_back('N');
10323 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10324 Masked.push_back('M');
10327 for (char Mask : Masked) {
10328 for (const ISADataTy &Data : ISAData) {
10329 SmallString<256> Buffer;
10330 llvm::raw_svector_ostream Out(Buffer);
10331 Out << "_ZGV" << Data.ISA << Mask;
10333 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10334 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10335 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10339 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
10340 switch (ParamAttr.Kind){
10341 case LinearWithVarStride:
10342 Out << 's' << ParamAttr.StrideOrArg;
10346 if (!!ParamAttr.StrideOrArg)
10347 Out << ParamAttr.StrideOrArg;
10356 if (!!ParamAttr.Alignment)
10357 Out << 'a' << ParamAttr.Alignment;
10359 Out << '_' << Fn->getName();
10360 Fn->addFnAttr(Out.str());
10365 // This are the Functions that are needed to mangle the name of the
10366 // vector functions generated by the compiler, according to the rules
10367 // defined in the "Vector Function ABI specifications for AArch64",
10369 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10371 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
10373 /// TODO: Need to implement the behavior for reference marked with a
10374 /// var or no linear modifiers (1.b in the section). For this, we
10375 /// need to extend ParamKindTy to support the linear modifiers.
10376 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10377 QT = QT.getCanonicalType();
10379 if (QT->isVoidType())
10382 if (Kind == ParamKindTy::Uniform)
10385 if (Kind == ParamKindTy::Linear)
10388 // TODO: Handle linear references with modifiers
10390 if (Kind == ParamKindTy::LinearWithVarStride)
10396 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10397 static bool getAArch64PBV(QualType QT, ASTContext &C) {
10398 QT = QT.getCanonicalType();
10399 unsigned Size = C.getTypeSize(QT);
10401 // Only scalars and complex within 16 bytes wide set PVB to true.
10402 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10405 if (QT->isFloatingType())
10408 if (QT->isIntegerType())
10411 if (QT->isPointerType())
10414 // TODO: Add support for complex types (section 3.1.2, item 2).
10419 /// Computes the lane size (LS) of a return type or of an input parameter,
10420 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10421 /// TODO: Add support for references, section 3.2.1, item 1.
10422 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10423 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10424 QualType PTy = QT.getCanonicalType()->getPointeeType();
10425 if (getAArch64PBV(PTy, C))
10426 return C.getTypeSize(PTy);
10428 if (getAArch64PBV(QT, C))
10429 return C.getTypeSize(QT);
10431 return C.getTypeSize(C.getUIntPtrType());
10434 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10435 // signature of the scalar function, as defined in 3.2.2 of the
10437 static std::tuple<unsigned, unsigned, bool>
10438 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10439 QualType RetType = FD->getReturnType().getCanonicalType();
10441 ASTContext &C = FD->getASTContext();
10443 bool OutputBecomesInput = false;
10445 llvm::SmallVector<unsigned, 8> Sizes;
10446 if (!RetType->isVoidType()) {
10447 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10448 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10449 OutputBecomesInput = true;
10451 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10452 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10453 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10456 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10457 // The LS of a function parameter / return value can only be a power
10458 // of 2, starting from 8 bits, up to 128.
10459 assert(std::all_of(Sizes.begin(), Sizes.end(),
10460 [](unsigned Size) {
10461 return Size == 8 || Size == 16 || Size == 32 ||
10462 Size == 64 || Size == 128;
10466 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10467 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10468 OutputBecomesInput);
10471 /// Mangle the parameter part of the vector function name according to
10472 /// their OpenMP classification. The mangling function is defined in
10473 /// section 3.5 of the AAVFABI.
10474 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10475 SmallString<256> Buffer;
10476 llvm::raw_svector_ostream Out(Buffer);
10477 for (const auto &ParamAttr : ParamAttrs) {
10478 switch (ParamAttr.Kind) {
10479 case LinearWithVarStride:
10480 Out << "ls" << ParamAttr.StrideOrArg;
10484 // Don't print the step value if it is not present or if it is
10486 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10487 Out << ParamAttr.StrideOrArg;
10497 if (!!ParamAttr.Alignment)
10498 Out << 'a' << ParamAttr.Alignment;
10504 // Function used to add the attribute. The parameter `VLEN` is
10505 // templated to allow the use of "x" when targeting scalable functions
10507 template <typename T>
10508 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10509 char ISA, StringRef ParSeq,
10510 StringRef MangledName, bool OutputBecomesInput,
10511 llvm::Function *Fn) {
10512 SmallString<256> Buffer;
10513 llvm::raw_svector_ostream Out(Buffer);
10514 Out << Prefix << ISA << LMask << VLEN;
10515 if (OutputBecomesInput)
10517 Out << ParSeq << "_" << MangledName;
10518 Fn->addFnAttr(Out.str());
10521 // Helper function to generate the Advanced SIMD names depending on
10522 // the value of the NDS when simdlen is not present.
10523 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10524 StringRef Prefix, char ISA,
10525 StringRef ParSeq, StringRef MangledName,
10526 bool OutputBecomesInput,
10527 llvm::Function *Fn) {
10530 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10531 OutputBecomesInput, Fn);
10532 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10533 OutputBecomesInput, Fn);
10536 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10537 OutputBecomesInput, Fn);
10538 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10539 OutputBecomesInput, Fn);
10542 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10543 OutputBecomesInput, Fn);
10544 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10545 OutputBecomesInput, Fn);
10549 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10550 OutputBecomesInput, Fn);
10553 llvm_unreachable("Scalar type is too wide.");
10557 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10558 static void emitAArch64DeclareSimdFunction(
10559 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10560 ArrayRef<ParamAttrTy> ParamAttrs,
10561 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10562 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10564 // Get basic data for building the vector signature.
10565 const auto Data = getNDSWDS(FD, ParamAttrs);
10566 const unsigned NDS = std::get<0>(Data);
10567 const unsigned WDS = std::get<1>(Data);
10568 const bool OutputBecomesInput = std::get<2>(Data);
10570 // Check the values provided via `simdlen` by the user.
10571 // 1. A `simdlen(1)` doesn't produce vector signatures,
10572 if (UserVLEN == 1) {
10573 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10574 DiagnosticsEngine::Warning,
10575 "The clause simdlen(1) has no effect when targeting aarch64.");
10576 CGM.getDiags().Report(SLoc, DiagID);
10580 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10581 // Advanced SIMD output.
10582 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10583 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10584 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10585 "power of 2 when targeting Advanced SIMD.");
10586 CGM.getDiags().Report(SLoc, DiagID);
10590 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10592 if (ISA == 's' && UserVLEN != 0) {
10593 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10594 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10595 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10596 "lanes in the architectural constraints "
10597 "for SVE (min is 128-bit, max is "
10598 "2048-bit, by steps of 128-bit)");
10599 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10604 // Sort out parameter sequence.
10605 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10606 StringRef Prefix = "_ZGV";
10607 // Generate simdlen from user input (if any).
10610 // SVE generates only a masked function.
10611 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10612 OutputBecomesInput, Fn);
10614 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10615 // Advanced SIMD generates one or two functions, depending on
10616 // the `[not]inbranch` clause.
10618 case OMPDeclareSimdDeclAttr::BS_Undefined:
10619 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10620 OutputBecomesInput, Fn);
10621 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10622 OutputBecomesInput, Fn);
10624 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10625 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10626 OutputBecomesInput, Fn);
10628 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10629 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10630 OutputBecomesInput, Fn);
10635 // If no user simdlen is provided, follow the AAVFABI rules for
10636 // generating the vector length.
10638 // SVE, section 3.4.1, item 1.
10639 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10640 OutputBecomesInput, Fn);
10642 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10643 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10644 // two vector names depending on the use of the clause
10645 // `[not]inbranch`.
10647 case OMPDeclareSimdDeclAttr::BS_Undefined:
10648 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10649 OutputBecomesInput, Fn);
10650 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10651 OutputBecomesInput, Fn);
10653 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10654 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10655 OutputBecomesInput, Fn);
10657 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10658 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10659 OutputBecomesInput, Fn);
10666 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10667 llvm::Function *Fn) {
10668 ASTContext &C = CGM.getContext();
10669 FD = FD->getMostRecentDecl();
10670 // Map params to their positions in function decl.
10671 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10672 if (isa<CXXMethodDecl>(FD))
10673 ParamPositions.try_emplace(FD, 0);
10674 unsigned ParamPos = ParamPositions.size();
10675 for (const ParmVarDecl *P : FD->parameters()) {
10676 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10680 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10681 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10682 // Mark uniform parameters.
10683 for (const Expr *E : Attr->uniforms()) {
10684 E = E->IgnoreParenImpCasts();
10686 if (isa<CXXThisExpr>(E)) {
10687 Pos = ParamPositions[FD];
10689 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10690 ->getCanonicalDecl();
10691 Pos = ParamPositions[PVD];
10693 ParamAttrs[Pos].Kind = Uniform;
10695 // Get alignment info.
10696 auto NI = Attr->alignments_begin();
10697 for (const Expr *E : Attr->aligneds()) {
10698 E = E->IgnoreParenImpCasts();
10701 if (isa<CXXThisExpr>(E)) {
10702 Pos = ParamPositions[FD];
10703 ParmTy = E->getType();
10705 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10706 ->getCanonicalDecl();
10707 Pos = ParamPositions[PVD];
10708 ParmTy = PVD->getType();
10710 ParamAttrs[Pos].Alignment =
10712 ? (*NI)->EvaluateKnownConstInt(C)
10713 : llvm::APSInt::getUnsigned(
10714 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10718 // Mark linear parameters.
10719 auto SI = Attr->steps_begin();
10720 auto MI = Attr->modifiers_begin();
10721 for (const Expr *E : Attr->linears()) {
10722 E = E->IgnoreParenImpCasts();
10724 if (isa<CXXThisExpr>(E)) {
10725 Pos = ParamPositions[FD];
10727 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10728 ->getCanonicalDecl();
10729 Pos = ParamPositions[PVD];
10731 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10732 ParamAttr.Kind = Linear;
10734 Expr::EvalResult Result;
10735 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10736 if (const auto *DRE =
10737 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10738 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10739 ParamAttr.Kind = LinearWithVarStride;
10740 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10741 ParamPositions[StridePVD->getCanonicalDecl()]);
10745 ParamAttr.StrideOrArg = Result.Val.getInt();
10751 llvm::APSInt VLENVal;
10752 SourceLocation ExprLoc;
10753 const Expr *VLENExpr = Attr->getSimdlen();
10755 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10756 ExprLoc = VLENExpr->getExprLoc();
10758 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10759 if (CGM.getTriple().isX86()) {
10760 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10761 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10762 unsigned VLEN = VLENVal.getExtValue();
10763 StringRef MangledName = Fn->getName();
10764 if (CGM.getTarget().hasFeature("sve"))
10765 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10766 MangledName, 's', 128, Fn, ExprLoc);
10767 if (CGM.getTarget().hasFeature("neon"))
10768 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10769 MangledName, 'n', 128, Fn, ExprLoc);
10772 FD = FD->getPreviousDecl();
10777 /// Cleanup action for doacross support.
10778 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10780 static const int DoacrossFinArgs = 2;
10783 llvm::FunctionCallee RTLFn;
10784 llvm::Value *Args[DoacrossFinArgs];
10787 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10788 ArrayRef<llvm::Value *> CallArgs)
10790 assert(CallArgs.size() == DoacrossFinArgs);
10791 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10793 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10794 if (!CGF.HaveInsertPoint())
10796 CGF.EmitRuntimeCall(RTLFn, Args);
10801 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10802 const OMPLoopDirective &D,
10803 ArrayRef<Expr *> NumIterations) {
10804 if (!CGF.HaveInsertPoint())
10807 ASTContext &C = CGM.getContext();
10808 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10810 if (KmpDimTy.isNull()) {
10811 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10812 // kmp_int64 lo; // lower
10813 // kmp_int64 up; // upper
10814 // kmp_int64 st; // stride
10816 RD = C.buildImplicitRecord("kmp_dim");
10817 RD->startDefinition();
10818 addFieldToRecordDecl(C, RD, Int64Ty);
10819 addFieldToRecordDecl(C, RD, Int64Ty);
10820 addFieldToRecordDecl(C, RD, Int64Ty);
10821 RD->completeDefinition();
10822 KmpDimTy = C.getRecordType(RD);
10824 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10826 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10828 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
10830 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10831 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10832 enum { LowerFD = 0, UpperFD, StrideFD };
10833 // Fill dims with data.
10834 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10835 LValue DimsLVal = CGF.MakeAddrLValue(
10836 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10837 // dims.upper = num_iterations;
10838 LValue UpperLVal = CGF.EmitLValueForField(
10839 DimsLVal, *std::next(RD->field_begin(), UpperFD));
10840 llvm::Value *NumIterVal =
10841 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10842 D.getNumIterations()->getType(), Int64Ty,
10843 D.getNumIterations()->getExprLoc());
10844 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10845 // dims.stride = 1;
10846 LValue StrideLVal = CGF.EmitLValueForField(
10847 DimsLVal, *std::next(RD->field_begin(), StrideFD));
10848 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10852 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10853 // kmp_int32 num_dims, struct kmp_dim * dims);
10854 llvm::Value *Args[] = {
10855 emitUpdateLocation(CGF, D.getBeginLoc()),
10856 getThreadID(CGF, D.getBeginLoc()),
10857 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10858 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10859 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10862 llvm::FunctionCallee RTLFn =
10863 createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10864 CGF.EmitRuntimeCall(RTLFn, Args);
10865 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10866 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10867 llvm::FunctionCallee FiniRTLFn =
10868 createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10869 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10870 llvm::makeArrayRef(FiniArgs));
10873 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10874 const OMPDependClause *C) {
10876 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10877 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10878 QualType ArrayTy = CGM.getContext().getConstantArrayType(
10879 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
10880 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10881 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10882 const Expr *CounterVal = C->getLoopData(I);
10883 assert(CounterVal);
10884 llvm::Value *CntVal = CGF.EmitScalarConversion(
10885 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10886 CounterVal->getExprLoc());
10887 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10888 /*Volatile=*/false, Int64Ty);
10890 llvm::Value *Args[] = {
10891 emitUpdateLocation(CGF, C->getBeginLoc()),
10892 getThreadID(CGF, C->getBeginLoc()),
10893 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10894 llvm::FunctionCallee RTLFn;
10895 if (C->getDependencyKind() == OMPC_DEPEND_source) {
10896 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10898 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10899 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10901 CGF.EmitRuntimeCall(RTLFn, Args);
10904 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10905 llvm::FunctionCallee Callee,
10906 ArrayRef<llvm::Value *> Args) const {
10907 assert(Loc.isValid() && "Outlined function call location must be valid.");
10908 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10910 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10911 if (Fn->doesNotThrow()) {
10912 CGF.EmitNounwindRuntimeCall(Fn, Args);
10916 CGF.EmitRuntimeCall(Callee, Args);
10919 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10920 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10921 ArrayRef<llvm::Value *> Args) const {
10922 emitCall(CGF, Loc, OutlinedFn, Args);
10925 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10926 if (const auto *FD = dyn_cast<FunctionDecl>(D))
10927 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10928 HasEmittedDeclareTargetRegion = true;
10931 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10932 const VarDecl *NativeParam,
10933 const VarDecl *TargetParam) const {
10934 return CGF.GetAddrOfLocalVar(NativeParam);
10938 /// Cleanup action for allocate support.
10939 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10941 static const int CleanupArgs = 3;
10944 llvm::FunctionCallee RTLFn;
10945 llvm::Value *Args[CleanupArgs];
10948 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10949 ArrayRef<llvm::Value *> CallArgs)
10951 assert(CallArgs.size() == CleanupArgs &&
10952 "Size of arguments does not match.");
10953 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10955 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10956 if (!CGF.HaveInsertPoint())
10958 CGF.EmitRuntimeCall(RTLFn, Args);
10963 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10964 const VarDecl *VD) {
10966 return Address::invalid();
10967 const VarDecl *CVD = VD->getCanonicalDecl();
10968 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10969 return Address::invalid();
10970 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10971 // Use the default allocation.
10972 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10973 !AA->getAllocator())
10974 return Address::invalid();
10976 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10977 if (CVD->getType()->isVariablyModifiedType()) {
10978 Size = CGF.getTypeSize(CVD->getType());
10979 // Align the size: ((size + align - 1) / align) * align
10980 Size = CGF.Builder.CreateNUWAdd(
10981 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10982 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10983 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10985 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10986 Size = CGM.getSize(Sz.alignTo(Align));
10988 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10989 assert(AA->getAllocator() &&
10990 "Expected allocator expression for non-default allocator.");
10991 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10992 // According to the standard, the original allocator type is a enum (integer).
10993 // Convert to pointer type, if required.
10994 if (Allocator->getType()->isIntegerTy())
10995 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10996 else if (Allocator->getType()->isPointerTy())
10997 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10999 llvm::Value *Args[] = {ThreadID, Size, Allocator};
11001 llvm::Value *Addr =
11002 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
11003 CVD->getName() + ".void.addr");
11004 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
11006 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
11008 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11009 llvm::makeArrayRef(FiniArgs));
11010 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11012 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
11013 CVD->getName() + ".addr");
11014 return Address(Addr, Align);
11018 using OMPContextSelectorData =
11019 OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>;
11020 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>;
11021 } // anonymous namespace
11023 /// Checks current context and returns true if it matches the context selector.
11024 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx,
11025 typename... Arguments>
11026 static bool checkContext(const OMPContextSelectorData &Data,
11027 Arguments... Params) {
11028 assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown &&
11029 "Unknown context selector or context selector set.");
11033 /// Checks for implementation={vendor(<vendor>)} context selector.
11034 /// \returns true iff <vendor>="llvm", false otherwise.
11036 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(
11037 const OMPContextSelectorData &Data) {
11038 return llvm::all_of(Data.Names,
11039 [](StringRef S) { return !S.compare_lower("llvm"); });
11042 /// Checks for device={kind(<kind>)} context selector.
11043 /// \returns true if <kind>="host" and compilation is for host.
11044 /// true if <kind>="nohost" and compilation is for device.
11045 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU.
11046 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN.
11047 /// false otherwise.
11049 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(
11050 const OMPContextSelectorData &Data, CodeGenModule &CGM) {
11051 for (StringRef Name : Data.Names) {
11052 if (!Name.compare_lower("host")) {
11053 if (CGM.getLangOpts().OpenMPIsDevice)
11057 if (!Name.compare_lower("nohost")) {
11058 if (!CGM.getLangOpts().OpenMPIsDevice)
11062 switch (CGM.getTriple().getArch()) {
11063 case llvm::Triple::arm:
11064 case llvm::Triple::armeb:
11065 case llvm::Triple::aarch64:
11066 case llvm::Triple::aarch64_be:
11067 case llvm::Triple::aarch64_32:
11068 case llvm::Triple::ppc:
11069 case llvm::Triple::ppc64:
11070 case llvm::Triple::ppc64le:
11071 case llvm::Triple::x86:
11072 case llvm::Triple::x86_64:
11073 if (Name.compare_lower("cpu"))
11076 case llvm::Triple::amdgcn:
11077 case llvm::Triple::nvptx:
11078 case llvm::Triple::nvptx64:
11079 if (Name.compare_lower("gpu"))
11082 case llvm::Triple::UnknownArch:
11083 case llvm::Triple::arc:
11084 case llvm::Triple::avr:
11085 case llvm::Triple::bpfel:
11086 case llvm::Triple::bpfeb:
11087 case llvm::Triple::hexagon:
11088 case llvm::Triple::mips:
11089 case llvm::Triple::mipsel:
11090 case llvm::Triple::mips64:
11091 case llvm::Triple::mips64el:
11092 case llvm::Triple::msp430:
11093 case llvm::Triple::r600:
11094 case llvm::Triple::riscv32:
11095 case llvm::Triple::riscv64:
11096 case llvm::Triple::sparc:
11097 case llvm::Triple::sparcv9:
11098 case llvm::Triple::sparcel:
11099 case llvm::Triple::systemz:
11100 case llvm::Triple::tce:
11101 case llvm::Triple::tcele:
11102 case llvm::Triple::thumb:
11103 case llvm::Triple::thumbeb:
11104 case llvm::Triple::xcore:
11105 case llvm::Triple::le32:
11106 case llvm::Triple::le64:
11107 case llvm::Triple::amdil:
11108 case llvm::Triple::amdil64:
11109 case llvm::Triple::hsail:
11110 case llvm::Triple::hsail64:
11111 case llvm::Triple::spir:
11112 case llvm::Triple::spir64:
11113 case llvm::Triple::kalimba:
11114 case llvm::Triple::shave:
11115 case llvm::Triple::lanai:
11116 case llvm::Triple::wasm32:
11117 case llvm::Triple::wasm64:
11118 case llvm::Triple::renderscript32:
11119 case llvm::Triple::renderscript64:
11120 case llvm::Triple::ve:
11127 static bool matchesContext(CodeGenModule &CGM,
11128 const CompleteOMPContextSelectorData &ContextData) {
11129 for (const OMPContextSelectorData &Data : ContextData) {
11130 switch (Data.Ctx) {
11131 case OMP_CTX_vendor:
11132 assert(Data.CtxSet == OMP_CTX_SET_implementation &&
11133 "Expected implementation context selector set.");
11134 if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data))
11138 assert(Data.CtxSet == OMP_CTX_SET_device &&
11139 "Expected device context selector set.");
11140 if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data,
11144 case OMP_CTX_unknown:
11145 llvm_unreachable("Unknown context selector kind.");
11151 static CompleteOMPContextSelectorData
11152 translateAttrToContextSelectorData(ASTContext &C,
11153 const OMPDeclareVariantAttr *A) {
11154 CompleteOMPContextSelectorData Data;
11155 for (unsigned I = 0, E = A->scores_size(); I < E; ++I) {
11156 Data.emplace_back();
11157 auto CtxSet = static_cast<OpenMPContextSelectorSetKind>(
11158 *std::next(A->ctxSelectorSets_begin(), I));
11159 auto Ctx = static_cast<OpenMPContextSelectorKind>(
11160 *std::next(A->ctxSelectors_begin(), I));
11161 Data.back().CtxSet = CtxSet;
11162 Data.back().Ctx = Ctx;
11163 const Expr *Score = *std::next(A->scores_begin(), I);
11164 Data.back().Score = Score->EvaluateKnownConstInt(C);
11166 case OMP_CTX_vendor:
11167 assert(CtxSet == OMP_CTX_SET_implementation &&
11168 "Expected implementation context selector set.");
11169 Data.back().Names =
11170 llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end());
11173 assert(CtxSet == OMP_CTX_SET_device &&
11174 "Expected device context selector set.");
11175 Data.back().Names =
11176 llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end());
11178 case OMP_CTX_unknown:
11179 llvm_unreachable("Unknown context selector kind.");
11185 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS,
11186 const CompleteOMPContextSelectorData &RHS) {
11187 llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData;
11188 for (const OMPContextSelectorData &D : RHS) {
11189 auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx));
11190 Pair.getSecond().insert(D.Names.begin(), D.Names.end());
11192 bool AllSetsAreEqual = true;
11193 for (const OMPContextSelectorData &D : LHS) {
11194 auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx));
11195 if (It == RHSData.end())
11197 if (D.Names.size() > It->getSecond().size())
11199 if (llvm::set_union(It->getSecond(), D.Names))
11202 AllSetsAreEqual && (D.Names.size() == It->getSecond().size());
11205 return LHS.size() != RHS.size() || !AllSetsAreEqual;
11208 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS,
11209 const CompleteOMPContextSelectorData &RHS) {
11210 // Score is calculated as sum of all scores + 1.
11211 llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11212 bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS);
11213 if (RHSIsSubsetOfLHS) {
11214 LHSScore = llvm::APSInt::get(0);
11216 for (const OMPContextSelectorData &Data : LHS) {
11217 if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) {
11218 LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11219 } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) {
11220 LHSScore += Data.Score.extend(LHSScore.getBitWidth());
11222 LHSScore += Data.Score;
11226 llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false);
11227 if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) {
11228 RHSScore = llvm::APSInt::get(0);
11230 for (const OMPContextSelectorData &Data : RHS) {
11231 if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) {
11232 RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score;
11233 } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) {
11234 RHSScore += Data.Score.extend(RHSScore.getBitWidth());
11236 RHSScore += Data.Score;
11240 return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0;
11243 /// Finds the variant function that matches current context with its context
11245 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM,
11246 const FunctionDecl *FD) {
11247 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>())
11249 // Iterate through all DeclareVariant attributes and check context selectors.
11250 const OMPDeclareVariantAttr *TopMostAttr = nullptr;
11251 CompleteOMPContextSelectorData TopMostData;
11252 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) {
11253 CompleteOMPContextSelectorData Data =
11254 translateAttrToContextSelectorData(CGM.getContext(), A);
11255 if (!matchesContext(CGM, Data))
11257 // If the attribute matches the context, find the attribute with the highest
11259 if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) {
11261 TopMostData.swap(Data);
11266 return cast<FunctionDecl>(
11267 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts())
11271 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) {
11272 const auto *D = cast<FunctionDecl>(GD.getDecl());
11273 // If the original function is defined already, use its definition.
11274 StringRef MangledName = CGM.getMangledName(GD);
11275 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName);
11276 if (Orig && !Orig->isDeclaration())
11278 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D);
11279 // Emit original function if it does not have declare variant attribute or the
11280 // context does not match.
11283 GlobalDecl NewGD = GD.getWithDecl(NewFD);
11284 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) {
11285 DeferredVariantFunction.erase(D);
11288 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD)));
11292 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11293 CodeGenModule &CGM, const OMPLoopDirective &S)
11294 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11295 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11298 NontemporalDeclsSet &DS =
11299 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11300 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11301 for (const Stmt *Ref : C->private_refs()) {
11302 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11303 const ValueDecl *VD;
11304 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11305 VD = DRE->getDecl();
11307 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11308 assert((ME->isImplicitCXXThis() ||
11309 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11310 "Expected member of current class.");
11311 VD = ME->getMemberDecl();
11318 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11321 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11324 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11325 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11327 return llvm::any_of(
11328 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11329 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
11332 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11333 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11335 NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11336 [](const OMPLastprivateClause *C) {
11337 return C->getKind() ==
11338 OMPC_LASTPRIVATE_conditional;
11340 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11343 LastprivateConditionalData &Data =
11344 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11345 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11346 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11349 for (const Expr *Ref : C->varlists()) {
11350 Data.DeclToUniqeName.try_emplace(
11351 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11352 generateUniqueName(CGM, "pl_cond", Ref));
11355 Data.IVLVal = IVLVal;
11356 // In simd only mode or for simd directives no need to generate threadprivate
11357 // references for the loop iteration counter, we can use the original one
11358 // since outlining cannot happen in simd regions.
11359 if (CGF.getLangOpts().OpenMPSimd ||
11360 isOpenMPSimdDirective(S.getDirectiveKind())) {
11361 Data.UseOriginalIV = true;
11364 llvm::SmallString<16> Buffer;
11365 llvm::raw_svector_ostream OS(Buffer);
11367 CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
11368 assert(PLoc.isValid() && "Source location is expected to be always valid.");
11370 llvm::sys::fs::UniqueID ID;
11371 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11372 CGM.getDiags().Report(diag::err_cannot_open_file)
11373 << PLoc.getFilename() << EC.message();
11374 OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
11375 << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
11376 Data.IVName = OS.str();
11379 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11382 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11385 void CGOpenMPRuntime::initLastprivateConditionalCounter(
11386 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11387 if (CGM.getLangOpts().OpenMPSimd ||
11388 !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11389 [](const OMPLastprivateClause *C) {
11390 return C->getKind() == OMPC_LASTPRIVATE_conditional;
11393 const CGOpenMPRuntime::LastprivateConditionalData &Data =
11394 LastprivateConditionalStack.back();
11395 if (Data.UseOriginalIV)
11397 // Global loop counter. Required to handle inner parallel-for regions.
11399 Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11400 CGF, Data.IVLVal.getType(), Data.IVName);
11401 LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType());
11402 llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc());
11403 CGF.EmitStoreOfScalar(IVVal, GlobIVLVal);
11407 /// Checks if the lastprivate conditional variable is referenced in LHS.
11408 class LastprivateConditionalRefChecker final
11409 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11410 CodeGenFunction &CGF;
11411 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11412 const Expr *FoundE = nullptr;
11413 const Decl *FoundD = nullptr;
11414 StringRef UniqueDeclName;
11417 SourceLocation Loc;
11418 bool UseOriginalIV = false;
11421 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11422 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11423 llvm::reverse(LPM)) {
11424 auto It = D.DeclToUniqeName.find(E->getDecl());
11425 if (It == D.DeclToUniqeName.end())
11428 FoundD = E->getDecl()->getCanonicalDecl();
11429 UniqueDeclName = It->getSecond();
11432 UseOriginalIV = D.UseOriginalIV;
11435 return FoundE == E;
11437 bool VisitMemberExpr(const MemberExpr *E) {
11438 if (!CGF.IsWrappedCXXThis(E->getBase()))
11440 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11441 llvm::reverse(LPM)) {
11442 auto It = D.DeclToUniqeName.find(E->getMemberDecl());
11443 if (It == D.DeclToUniqeName.end())
11446 FoundD = E->getMemberDecl()->getCanonicalDecl();
11447 UniqueDeclName = It->getSecond();
11450 UseOriginalIV = D.UseOriginalIV;
11453 return FoundE == E;
11455 bool VisitStmt(const Stmt *S) {
11456 for (const Stmt *Child : S->children()) {
11459 if (const auto *E = dyn_cast<Expr>(Child))
11460 if (!E->isGLValue())
11467 explicit LastprivateConditionalRefChecker(
11468 CodeGenFunction &CGF,
11469 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11470 : CGF(CGF), LPM(LPM) {}
11471 std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
11472 getFoundData() const {
11473 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11479 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11481 if (CGF.getLangOpts().OpenMP < 50)
11483 LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
11484 if (!Checker.Visit(LHS))
11486 const Expr *FoundE;
11487 const Decl *FoundD;
11488 StringRef UniqueDeclName;
11491 bool UseOriginalIV;
11492 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11493 Checker.getFoundData();
11495 // Last updated loop counter for the lastprivate conditional var.
11496 // int<xx> last_iv = 0;
11497 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11498 llvm::Constant *LastIV =
11499 getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
11500 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11501 IVLVal.getAlignment().getAsAlign());
11502 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11504 // Private address of the lastprivate conditional in the current context.
11506 LValue LVal = CGF.EmitLValue(FoundE);
11507 // Last value of the lastprivate conditional.
11508 // decltype(priv_a) last_a;
11509 llvm::Constant *Last = getOrCreateInternalVariable(
11510 LVal.getAddress(CGF).getElementType(), UniqueDeclName);
11511 cast<llvm::GlobalVariable>(Last)->setAlignment(
11512 LVal.getAlignment().getAsAlign());
11514 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11516 // Global loop counter. Required to handle inner parallel-for regions.
11518 if (!UseOriginalIV) {
11520 getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName);
11521 IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType());
11523 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
11525 // #pragma omp critical(a)
11526 // if (last_iv <= iv) {
11528 // last_a = priv_a;
11530 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11531 FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
11533 llvm::Value *LastIVVal =
11534 CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
11535 // (last_iv <= global_iv) ? Check if the variable is updated and store new
11536 // value in global var.
11537 llvm::Value *CmpRes;
11538 if (IVLVal.getType()->isSignedIntegerType()) {
11539 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11541 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11542 "Loop iteration variable must be integer.");
11543 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11545 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11546 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11547 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11549 CGF.EmitBlock(ThenBB);
11551 // last_iv = global_iv;
11552 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11554 // last_a = priv_a;
11555 switch (CGF.getEvaluationKind(LVal.getType())) {
11557 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
11558 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11561 case TEK_Complex: {
11562 CodeGenFunction::ComplexPairTy PrivVal =
11563 CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
11564 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11567 case TEK_Aggregate:
11569 "Aggregates are not supported in lastprivate conditional.");
11572 CGF.EmitBranch(ExitBB);
11573 // There is no need to emit line number for unconditional branch.
11574 (void)ApplyDebugLocation::CreateEmpty(CGF);
11575 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11578 if (CGM.getLangOpts().OpenMPSimd) {
11579 // Do not emit as a critical region as no parallel region could be emitted.
11580 RegionCodeGenTy ThenRCG(CodeGen);
11583 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
11587 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11588 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11589 SourceLocation Loc) {
11590 if (CGF.getLangOpts().OpenMP < 50)
11592 auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
11593 assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
11594 "Unknown lastprivate conditional variable.");
11595 StringRef UniqueName = It->getSecond();
11596 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11597 // The variable was not updated in the region - exit.
11600 LValue LPLVal = CGF.MakeAddrLValue(
11601 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11602 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11603 CGF.EmitStoreOfScalar(Res, PrivLVal);
11606 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11607 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11608 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11609 llvm_unreachable("Not supported in SIMD-only mode");
11612 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11613 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11614 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
11615 llvm_unreachable("Not supported in SIMD-only mode");
11618 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11619 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11620 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11621 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11622 bool Tied, unsigned &NumberOfParts) {
11623 llvm_unreachable("Not supported in SIMD-only mode");
11626 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11627 SourceLocation Loc,
11628 llvm::Function *OutlinedFn,
11629 ArrayRef<llvm::Value *> CapturedVars,
11630 const Expr *IfCond) {
11631 llvm_unreachable("Not supported in SIMD-only mode");
11634 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11635 CodeGenFunction &CGF, StringRef CriticalName,
11636 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11637 const Expr *Hint) {
11638 llvm_unreachable("Not supported in SIMD-only mode");
11641 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11642 const RegionCodeGenTy &MasterOpGen,
11643 SourceLocation Loc) {
11644 llvm_unreachable("Not supported in SIMD-only mode");
11647 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11648 SourceLocation Loc) {
11649 llvm_unreachable("Not supported in SIMD-only mode");
11652 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11653 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11654 SourceLocation Loc) {
11655 llvm_unreachable("Not supported in SIMD-only mode");
11658 void CGOpenMPSIMDRuntime::emitSingleRegion(
11659 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11660 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11661 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11662 ArrayRef<const Expr *> AssignmentOps) {
11663 llvm_unreachable("Not supported in SIMD-only mode");
11666 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11667 const RegionCodeGenTy &OrderedOpGen,
11668 SourceLocation Loc,
11670 llvm_unreachable("Not supported in SIMD-only mode");
11673 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11674 SourceLocation Loc,
11675 OpenMPDirectiveKind Kind,
11677 bool ForceSimpleCall) {
11678 llvm_unreachable("Not supported in SIMD-only mode");
11681 void CGOpenMPSIMDRuntime::emitForDispatchInit(
11682 CodeGenFunction &CGF, SourceLocation Loc,
11683 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11684 bool Ordered, const DispatchRTInput &DispatchValues) {
11685 llvm_unreachable("Not supported in SIMD-only mode");
11688 void CGOpenMPSIMDRuntime::emitForStaticInit(
11689 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11690 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11691 llvm_unreachable("Not supported in SIMD-only mode");
11694 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11695 CodeGenFunction &CGF, SourceLocation Loc,
11696 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11697 llvm_unreachable("Not supported in SIMD-only mode");
11700 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11701 SourceLocation Loc,
11704 llvm_unreachable("Not supported in SIMD-only mode");
11707 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
11708 SourceLocation Loc,
11709 OpenMPDirectiveKind DKind) {
11710 llvm_unreachable("Not supported in SIMD-only mode");
11713 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
11714 SourceLocation Loc,
11715 unsigned IVSize, bool IVSigned,
11716 Address IL, Address LB,
11717 Address UB, Address ST) {
11718 llvm_unreachable("Not supported in SIMD-only mode");
11721 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
11722 llvm::Value *NumThreads,
11723 SourceLocation Loc) {
11724 llvm_unreachable("Not supported in SIMD-only mode");
11727 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
11728 ProcBindKind ProcBind,
11729 SourceLocation Loc) {
11730 llvm_unreachable("Not supported in SIMD-only mode");
11733 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
11736 SourceLocation Loc) {
11737 llvm_unreachable("Not supported in SIMD-only mode");
11740 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
11741 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
11742 CodeGenFunction *CGF) {
11743 llvm_unreachable("Not supported in SIMD-only mode");
11746 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
11747 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
11748 llvm_unreachable("Not supported in SIMD-only mode");
11751 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
11752 ArrayRef<const Expr *> Vars,
11753 SourceLocation Loc) {
11754 llvm_unreachable("Not supported in SIMD-only mode");
11757 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
11758 const OMPExecutableDirective &D,
11759 llvm::Function *TaskFunction,
11760 QualType SharedsTy, Address Shareds,
11761 const Expr *IfCond,
11762 const OMPTaskDataTy &Data) {
11763 llvm_unreachable("Not supported in SIMD-only mode");
11766 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
11767 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
11768 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
11769 const Expr *IfCond, const OMPTaskDataTy &Data) {
11770 llvm_unreachable("Not supported in SIMD-only mode");
11773 void CGOpenMPSIMDRuntime::emitReduction(
11774 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
11775 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
11776 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
11777 assert(Options.SimpleReduction && "Only simple reduction is expected.");
11778 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
11779 ReductionOps, Options);
11782 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
11783 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
11784 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
11785 llvm_unreachable("Not supported in SIMD-only mode");
11788 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
11789 SourceLocation Loc,
11790 ReductionCodeGen &RCG,
11792 llvm_unreachable("Not supported in SIMD-only mode");
11795 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
11796 SourceLocation Loc,
11797 llvm::Value *ReductionsPtr,
11798 LValue SharedLVal) {
11799 llvm_unreachable("Not supported in SIMD-only mode");
11802 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
11803 SourceLocation Loc) {
11804 llvm_unreachable("Not supported in SIMD-only mode");
11807 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
11808 CodeGenFunction &CGF, SourceLocation Loc,
11809 OpenMPDirectiveKind CancelRegion) {
11810 llvm_unreachable("Not supported in SIMD-only mode");
11813 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
11814 SourceLocation Loc, const Expr *IfCond,
11815 OpenMPDirectiveKind CancelRegion) {
11816 llvm_unreachable("Not supported in SIMD-only mode");
11819 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
11820 const OMPExecutableDirective &D, StringRef ParentName,
11821 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
11822 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
11823 llvm_unreachable("Not supported in SIMD-only mode");
11826 void CGOpenMPSIMDRuntime::emitTargetCall(
11827 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11828 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
11829 const Expr *Device,
11830 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
11831 const OMPLoopDirective &D)>
11833 llvm_unreachable("Not supported in SIMD-only mode");
11836 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
11837 llvm_unreachable("Not supported in SIMD-only mode");
11840 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
11841 llvm_unreachable("Not supported in SIMD-only mode");
11844 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
11848 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
11849 const OMPExecutableDirective &D,
11850 SourceLocation Loc,
11851 llvm::Function *OutlinedFn,
11852 ArrayRef<llvm::Value *> CapturedVars) {
11853 llvm_unreachable("Not supported in SIMD-only mode");
11856 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11857 const Expr *NumTeams,
11858 const Expr *ThreadLimit,
11859 SourceLocation Loc) {
11860 llvm_unreachable("Not supported in SIMD-only mode");
11863 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
11864 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11865 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11866 llvm_unreachable("Not supported in SIMD-only mode");
11869 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
11870 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11871 const Expr *Device) {
11872 llvm_unreachable("Not supported in SIMD-only mode");
11875 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11876 const OMPLoopDirective &D,
11877 ArrayRef<Expr *> NumIterations) {
11878 llvm_unreachable("Not supported in SIMD-only mode");
11881 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11882 const OMPDependClause *C) {
11883 llvm_unreachable("Not supported in SIMD-only mode");
11887 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
11888 const VarDecl *NativeParam) const {
11889 llvm_unreachable("Not supported in SIMD-only mode");
11893 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
11894 const VarDecl *NativeParam,
11895 const VarDecl *TargetParam) const {
11896 llvm_unreachable("Not supported in SIMD-only mode");