1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "clang/CodeGen/ConstantInitBuilder.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/Basic/BitmaskEnum.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
31 using namespace clang;
32 using namespace CodeGen;
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
38 /// Kinds of OpenMP regions used in codegen.
39 enum CGOpenMPRegionKind {
40 /// Region with outlined function for standalone 'parallel'
42 ParallelOutlinedRegion,
43 /// Region with outlined function for standalone 'task' directive.
45 /// Region for constructs that do not require function outlining,
46 /// like 'for', 'sections', 'atomic' etc. directives.
48 /// Region with outlined function for standalone 'target' directive.
52 CGOpenMPRegionInfo(const CapturedStmt &CS,
53 const CGOpenMPRegionKind RegionKind,
54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63 Kind(Kind), HasCancel(HasCancel) {}
65 /// Get a variable or parameter for storing global thread id
66 /// inside OpenMP construct.
67 virtual const VarDecl *getThreadIDVariable() const = 0;
69 /// Emit the captured statement body.
70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
72 /// Get an LValue for the current ThreadID variable.
73 /// \return LValue for thread id variable. This LValue always has type int32*.
74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
82 bool hasCancel() const { return HasCancel; }
84 static bool classof(const CGCapturedStmtInfo *Info) {
85 return Info->getKind() == CR_OpenMP;
88 ~CGOpenMPRegionInfo() override = default;
91 CGOpenMPRegionKind RegionKind;
92 RegionCodeGenTy CodeGen;
93 OpenMPDirectiveKind Kind;
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101 const RegionCodeGenTy &CodeGen,
102 OpenMPDirectiveKind Kind, bool HasCancel,
103 StringRef HelperName)
104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
110 /// Get a variable or parameter for storing global thread id
111 /// inside OpenMP construct.
112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
114 /// Get the name of the capture helper.
115 StringRef getHelperName() const override { return HelperName; }
117 static bool classof(const CGCapturedStmtInfo *Info) {
118 return CGOpenMPRegionInfo::classof(Info) &&
119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120 ParallelOutlinedRegion;
124 /// A variable or parameter storing global thread id for OpenMP
126 const VarDecl *ThreadIDVar;
127 StringRef HelperName;
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
133 class UntiedTaskActionTy final : public PrePostActionTy {
135 const VarDecl *PartIDVar;
136 const RegionCodeGenTy UntiedCodeGen;
137 llvm::SwitchInst *UntiedSwitch = nullptr;
140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141 const RegionCodeGenTy &UntiedCodeGen)
142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143 void Enter(CodeGenFunction &CGF) override {
145 // Emit task switching point.
146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147 CGF.GetAddrOfLocalVar(PartIDVar),
148 PartIDVar->getType()->castAs<PointerType>());
150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153 CGF.EmitBlock(DoneBB);
154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157 CGF.Builder.GetInsertBlock());
158 emitUntiedSwitch(CGF);
161 void emitUntiedSwitch(CodeGenFunction &CGF) const {
163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164 CGF.GetAddrOfLocalVar(PartIDVar),
165 PartIDVar->getType()->castAs<PointerType>());
166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
169 CodeGenFunction::JumpDest CurPoint =
170 CGF.getJumpDestInCurrentScope(".untied.next.");
171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174 CGF.Builder.GetInsertBlock());
175 CGF.EmitBranchThroughCleanup(CurPoint);
176 CGF.EmitBlock(CurPoint.getBlock());
179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182 const VarDecl *ThreadIDVar,
183 const RegionCodeGenTy &CodeGen,
184 OpenMPDirectiveKind Kind, bool HasCancel,
185 const UntiedTaskActionTy &Action)
186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187 ThreadIDVar(ThreadIDVar), Action(Action) {
188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
191 /// Get a variable or parameter for storing global thread id
192 /// inside OpenMP construct.
193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
195 /// Get an LValue for the current ThreadID variable.
196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
198 /// Get the name of the capture helper.
199 StringRef getHelperName() const override { return ".omp_outlined."; }
201 void emitUntiedSwitch(CodeGenFunction &CGF) override {
202 Action.emitUntiedSwitch(CGF);
205 static bool classof(const CGCapturedStmtInfo *Info) {
206 return CGOpenMPRegionInfo::classof(Info) &&
207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
212 /// A variable or parameter storing global thread id for OpenMP
214 const VarDecl *ThreadIDVar;
215 /// Action for emitting code for untied tasks.
216 const UntiedTaskActionTy &Action;
219 /// API for inlined captured statement code generation in OpenMP
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224 const RegionCodeGenTy &CodeGen,
225 OpenMPDirectiveKind Kind, bool HasCancel)
226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
230 // Retrieve the value of the context parameter.
231 llvm::Value *getContextValue() const override {
233 return OuterRegionInfo->getContextValue();
234 llvm_unreachable("No context value for inlined OpenMP region");
237 void setContextValue(llvm::Value *V) override {
238 if (OuterRegionInfo) {
239 OuterRegionInfo->setContextValue(V);
242 llvm_unreachable("No context value for inlined OpenMP region");
245 /// Lookup the captured field decl for a variable.
246 const FieldDecl *lookup(const VarDecl *VD) const override {
248 return OuterRegionInfo->lookup(VD);
249 // If there is no outer outlined region,no need to lookup in a list of
250 // captured variables, we can use the original one.
254 FieldDecl *getThisFieldDecl() const override {
256 return OuterRegionInfo->getThisFieldDecl();
260 /// Get a variable or parameter for storing global thread id
261 /// inside OpenMP construct.
262 const VarDecl *getThreadIDVariable() const override {
264 return OuterRegionInfo->getThreadIDVariable();
268 /// Get an LValue for the current ThreadID variable.
269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
271 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272 llvm_unreachable("No LValue for inlined OpenMP construct");
275 /// Get the name of the capture helper.
276 StringRef getHelperName() const override {
277 if (auto *OuterRegionInfo = getOldCSI())
278 return OuterRegionInfo->getHelperName();
279 llvm_unreachable("No helper name for inlined OpenMP construct");
282 void emitUntiedSwitch(CodeGenFunction &CGF) override {
284 OuterRegionInfo->emitUntiedSwitch(CGF);
287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
289 static bool classof(const CGCapturedStmtInfo *Info) {
290 return CGOpenMPRegionInfo::classof(Info) &&
291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
294 ~CGOpenMPInlinedRegionInfo() override = default;
297 /// CodeGen info about outer OpenMP region.
298 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
299 CGOpenMPRegionInfo *OuterRegionInfo;
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310 const RegionCodeGenTy &CodeGen, StringRef HelperName)
311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312 /*HasCancel=*/false),
313 HelperName(HelperName) {}
315 /// This is unused for target regions because each starts executing
316 /// with a single thread.
317 const VarDecl *getThreadIDVariable() const override { return nullptr; }
319 /// Get the name of the capture helper.
320 StringRef getHelperName() const override { return HelperName; }
322 static bool classof(const CGCapturedStmtInfo *Info) {
323 return CGOpenMPRegionInfo::classof(Info) &&
324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
328 StringRef HelperName;
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332 llvm_unreachable("No codegen for expressions");
334 /// API for generation of expressions captured in a innermost OpenMP
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
341 /*HasCancel=*/false),
343 // Make sure the globals captured in the provided statement are local by
344 // using the privatization logic. We assume the same variable is not
345 // captured more than once.
346 for (const auto &C : CS.captures()) {
347 if (!C.capturesVariable() && !C.capturesVariableByCopy())
350 const VarDecl *VD = C.getCapturedVar();
351 if (VD->isLocalVarDeclOrParm())
354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355 /*RefersToEnclosingVariableOrCapture=*/false,
356 VD->getType().getNonReferenceType(), VK_LValue,
358 PrivScope.addPrivate(
359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
361 (void)PrivScope.Privatize();
364 /// Lookup the captured field decl for a variable.
365 const FieldDecl *lookup(const VarDecl *VD) const override {
366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
371 /// Emit the captured statement body.
372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373 llvm_unreachable("No body for expressions");
376 /// Get a variable or parameter for storing global thread id
377 /// inside OpenMP construct.
378 const VarDecl *getThreadIDVariable() const override {
379 llvm_unreachable("No thread id for expressions");
382 /// Get the name of the capture helper.
383 StringRef getHelperName() const override {
384 llvm_unreachable("No helper name for expressions");
387 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
390 /// Private scope to capture global variables.
391 CodeGenFunction::OMPPrivateScope PrivScope;
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396 CodeGenFunction &CGF;
397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398 FieldDecl *LambdaThisCaptureField = nullptr;
399 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
402 /// Constructs region for combined constructs.
403 /// \param CodeGen Code generation sequence for combined directives. Includes
404 /// a list of functions used for code generation of implicitly inlined
406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407 OpenMPDirectiveKind Kind, bool HasCancel)
409 // Start emission for the construct.
410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414 CGF.LambdaThisCaptureField = nullptr;
415 BlockInfo = CGF.BlockInfo;
416 CGF.BlockInfo = nullptr;
419 ~InlinedOpenMPRegionRAII() {
420 // Restore original CapturedStmtInfo only if we're done with code emission.
422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423 delete CGF.CapturedStmtInfo;
424 CGF.CapturedStmtInfo = OldCSI;
425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427 CGF.BlockInfo = BlockInfo;
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435 /// Use trampoline for internal microtask.
436 OMP_IDENT_IMD = 0x01,
437 /// Use c-style ident structure.
438 OMP_IDENT_KMPC = 0x02,
439 /// Atomic reduction option for kmpc_reduce.
440 OMP_ATOMIC_REDUCE = 0x10,
441 /// Explicit 'barrier' directive.
442 OMP_IDENT_BARRIER_EXPL = 0x20,
443 /// Implicit barrier in code.
444 OMP_IDENT_BARRIER_IMPL = 0x40,
445 /// Implicit barrier in 'for' directive.
446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447 /// Implicit barrier in 'sections' directive.
448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449 /// Implicit barrier in 'single' directive.
450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451 /// Call of __kmp_for_static_init for static loop.
452 OMP_IDENT_WORK_LOOP = 0x200,
453 /// Call of __kmp_for_static_init for sections.
454 OMP_IDENT_WORK_SECTIONS = 0x400,
455 /// Call of __kmp_for_static_init for distribute.
456 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
462 /// Values for bit flags for marking which requires clauses have been used.
463 enum OpenMPOffloadingRequiresDirFlags : int64_t {
465 OMP_REQ_UNDEFINED = 0x000,
466 /// no requires clause present.
467 OMP_REQ_NONE = 0x001,
468 /// reverse_offload clause.
469 OMP_REQ_REVERSE_OFFLOAD = 0x002,
470 /// unified_address clause.
471 OMP_REQ_UNIFIED_ADDRESS = 0x004,
472 /// unified_shared_memory clause.
473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
474 /// dynamic_allocators clause.
475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
479 enum OpenMPOffloadingReservedDeviceIDs {
480 /// Device ID if the device was not defined, runtime should get it
481 /// from environment variables in the spec.
482 OMP_DEVICEID_UNDEF = -1,
484 } // anonymous namespace
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 /// kmp_int32 reserved_1; /**< might be used in Fortran;
493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
494 /// KMP_IDENT_KMPC identifies this union
496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
499 /// /* but currently used for storing
500 /// region-specific ITT */
501 /// /* contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
505 /// char const *psource; /**< String describing the source location.
506 /// The string is composed of semi-colon separated
507 // fields which describe the source file,
508 /// the function and a pair of line numbers that
509 /// delimit the construct.
512 enum IdentFieldIndex {
513 /// might be used in Fortran
514 IdentField_Reserved_1,
515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
517 /// Not really used in Fortran any more
518 IdentField_Reserved_2,
519 /// Source[4] in Fortran, do not use for C++
520 IdentField_Reserved_3,
521 /// String describing the source location. The string is composed of
522 /// semi-colon separated fields which describe the source file, the function
523 /// and a pair of line numbers that delimit the construct.
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
529 enum OpenMPSchedType {
530 /// Lower bound for default (unordered) versions.
532 OMP_sch_static_chunked = 33,
534 OMP_sch_dynamic_chunked = 35,
535 OMP_sch_guided_chunked = 36,
536 OMP_sch_runtime = 37,
538 /// static with chunk adjustment (e.g., simd)
539 OMP_sch_static_balanced_chunked = 45,
540 /// Lower bound for 'ordered' versions.
542 OMP_ord_static_chunked = 65,
544 OMP_ord_dynamic_chunked = 67,
545 OMP_ord_guided_chunked = 68,
546 OMP_ord_runtime = 69,
548 OMP_sch_default = OMP_sch_static,
549 /// dist_schedule types
550 OMP_dist_sch_static_chunked = 91,
551 OMP_dist_sch_static = 92,
552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553 /// Set if the monotonic schedule modifier was present.
554 OMP_sch_modifier_monotonic = (1 << 29),
555 /// Set if the nonmonotonic schedule modifier was present.
556 OMP_sch_modifier_nonmonotonic = (1 << 30),
559 enum OpenMPRTLFunction {
560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561 /// kmpc_micro microtask, ...);
562 OMPRTL__kmpc_fork_call,
563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
565 OMPRTL__kmpc_threadprivate_cached,
566 /// Call to void __kmpc_threadprivate_register( ident_t *,
567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
568 OMPRTL__kmpc_threadprivate_register,
569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
570 OMPRTL__kmpc_global_thread_num,
571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572 // kmp_critical_name *crit);
573 OMPRTL__kmpc_critical,
574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575 // global_tid, kmp_critical_name *crit, uintptr_t hint);
576 OMPRTL__kmpc_critical_with_hint,
577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578 // kmp_critical_name *crit);
579 OMPRTL__kmpc_end_critical,
580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
582 OMPRTL__kmpc_cancel_barrier,
583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
584 OMPRTL__kmpc_barrier,
585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
586 OMPRTL__kmpc_for_static_fini,
587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
589 OMPRTL__kmpc_serialized_parallel,
590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
592 OMPRTL__kmpc_end_serialized_parallel,
593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594 // kmp_int32 num_threads);
595 OMPRTL__kmpc_push_num_threads,
596 // Call to void __kmpc_flush(ident_t *loc);
598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
601 OMPRTL__kmpc_end_master,
602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
604 OMPRTL__kmpc_omp_taskyield,
605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
608 OMPRTL__kmpc_end_single,
609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611 // kmp_routine_entry_t *task_entry);
612 OMPRTL__kmpc_omp_task_alloc,
613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616 // kmp_int64 device_id);
617 OMPRTL__kmpc_omp_target_task_alloc,
618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
620 OMPRTL__kmpc_omp_task,
621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
624 OMPRTL__kmpc_copyprivate,
625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
633 OMPRTL__kmpc_reduce_nowait,
634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635 // kmp_critical_name *lck);
636 OMPRTL__kmpc_end_reduce,
637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638 // kmp_critical_name *lck);
639 OMPRTL__kmpc_end_reduce_nowait,
640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641 // kmp_task_t * new_task);
642 OMPRTL__kmpc_omp_task_begin_if0,
643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644 // kmp_task_t * new_task);
645 OMPRTL__kmpc_omp_task_complete_if0,
646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
647 OMPRTL__kmpc_ordered,
648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
649 OMPRTL__kmpc_end_ordered,
650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
652 OMPRTL__kmpc_omp_taskwait,
653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
654 OMPRTL__kmpc_taskgroup,
655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
656 OMPRTL__kmpc_end_taskgroup,
657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
659 OMPRTL__kmpc_push_proc_bind,
660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
663 OMPRTL__kmpc_omp_task_with_deps,
664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
667 OMPRTL__kmpc_omp_wait_deps,
668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669 // global_tid, kmp_int32 cncl_kind);
670 OMPRTL__kmpc_cancellationpoint,
671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672 // kmp_int32 cncl_kind);
674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675 // kmp_int32 num_teams, kmp_int32 thread_limit);
676 OMPRTL__kmpc_push_num_teams,
677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
679 OMPRTL__kmpc_fork_teams,
680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682 // sched, kmp_uint64 grainsize, void *task_dup);
683 OMPRTL__kmpc_taskloop,
684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685 // num_dims, struct kmp_dim *dims);
686 OMPRTL__kmpc_doacross_init,
687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
688 OMPRTL__kmpc_doacross_fini,
689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
691 OMPRTL__kmpc_doacross_post,
692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
694 OMPRTL__kmpc_doacross_wait,
695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
697 OMPRTL__kmpc_task_reduction_init,
698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
700 OMPRTL__kmpc_task_reduction_get_th_data,
701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
707 // Offloading related calls
709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
711 OMPRTL__kmpc_push_target_tripcount,
712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
719 OMPRTL__tgt_target_nowait,
720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722 // *arg_types, int32_t num_teams, int32_t thread_limit);
723 OMPRTL__tgt_target_teams,
724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
727 OMPRTL__tgt_target_teams_nowait,
728 // Call to void __tgt_register_requires(int64_t flags);
729 OMPRTL__tgt_register_requires,
730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
731 OMPRTL__tgt_register_lib,
732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
733 OMPRTL__tgt_unregister_lib,
734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
736 OMPRTL__tgt_target_data_begin,
737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
740 OMPRTL__tgt_target_data_begin_nowait,
741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
743 OMPRTL__tgt_target_data_end,
744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
747 OMPRTL__tgt_target_data_end_nowait,
748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
750 OMPRTL__tgt_target_data_update,
751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
754 OMPRTL__tgt_target_data_update_nowait,
757 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
759 class CleanupTy final : public EHScopeStack::Cleanup {
760 PrePostActionTy *Action;
763 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
764 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
765 if (!CGF.HaveInsertPoint())
771 } // anonymous namespace
773 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
774 CodeGenFunction::RunCleanupsScope Scope(CGF);
776 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
777 Callback(CodeGen, CGF, *PrePostAction);
779 PrePostActionTy Action;
780 Callback(CodeGen, CGF, Action);
784 /// Check if the combiner is a call to UDR combiner and if it is so return the
785 /// UDR decl used for reduction.
786 static const OMPDeclareReductionDecl *
787 getReductionInit(const Expr *ReductionOp) {
788 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
789 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
790 if (const auto *DRE =
791 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
792 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
797 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
798 const OMPDeclareReductionDecl *DRD,
800 Address Private, Address Original,
802 if (DRD->getInitializer()) {
803 std::pair<llvm::Function *, llvm::Function *> Reduction =
804 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
805 const auto *CE = cast<CallExpr>(InitOp);
806 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
807 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
808 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
810 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
812 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
813 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
814 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
815 [=]() { return Private; });
816 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
817 [=]() { return Original; });
818 (void)PrivateScope.Privatize();
819 RValue Func = RValue::get(Reduction.second);
820 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
821 CGF.EmitIgnoredExpr(InitOp);
823 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
824 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
825 auto *GV = new llvm::GlobalVariable(
826 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
827 llvm::GlobalValue::PrivateLinkage, Init, Name);
828 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
830 switch (CGF.getEvaluationKind(Ty)) {
832 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
836 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
839 InitRVal = RValue::getAggregate(LV.getAddress());
842 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
843 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
844 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
845 /*IsInitializer=*/false);
849 /// Emit initialization of arrays of complex types.
850 /// \param DestAddr Address of the array.
851 /// \param Type Type of array.
852 /// \param Init Initial expression of array.
853 /// \param SrcAddr Address of the original array.
854 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
855 QualType Type, bool EmitDeclareReductionInit,
857 const OMPDeclareReductionDecl *DRD,
858 Address SrcAddr = Address::invalid()) {
859 // Perform element-by-element initialization.
862 // Drill down to the base element type on both arrays.
863 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
864 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
866 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
869 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
871 llvm::Value *SrcBegin = nullptr;
873 SrcBegin = SrcAddr.getPointer();
874 llvm::Value *DestBegin = DestAddr.getPointer();
875 // Cast from pointer to array type to pointer to single element.
876 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
877 // The basic structure here is a while-do loop.
878 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
879 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
880 llvm::Value *IsEmpty =
881 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
882 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
884 // Enter the loop body, making that address the current address.
885 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
886 CGF.EmitBlock(BodyBB);
888 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
890 llvm::PHINode *SrcElementPHI = nullptr;
891 Address SrcElementCurrent = Address::invalid();
893 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
894 "omp.arraycpy.srcElementPast");
895 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
897 Address(SrcElementPHI,
898 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
900 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
901 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
902 DestElementPHI->addIncoming(DestBegin, EntryBB);
903 Address DestElementCurrent =
904 Address(DestElementPHI,
905 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
909 CodeGenFunction::RunCleanupsScope InitScope(CGF);
910 if (EmitDeclareReductionInit) {
911 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
912 SrcElementCurrent, ElementTy);
914 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
915 /*IsInitializer=*/false);
919 // Shift the address forward by one element.
920 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
921 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
922 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
925 // Shift the address forward by one element.
926 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
927 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
928 // Check whether we've reached the end.
930 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
931 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
932 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
935 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
938 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
939 return CGF.EmitOMPSharedLValue(E);
942 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
945 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
949 void ReductionCodeGen::emitAggregateInitialization(
950 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
951 const OMPDeclareReductionDecl *DRD) {
952 // Emit VarDecl with copy init for arrays.
953 // Get the address of the original variable captured in current
955 const auto *PrivateVD =
956 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
957 bool EmitDeclareReductionInit =
958 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
959 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
960 EmitDeclareReductionInit,
961 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
962 : PrivateVD->getInit(),
963 DRD, SharedLVal.getAddress());
966 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
967 ArrayRef<const Expr *> Privates,
968 ArrayRef<const Expr *> ReductionOps) {
969 ClausesData.reserve(Shareds.size());
970 SharedAddresses.reserve(Shareds.size());
971 Sizes.reserve(Shareds.size());
972 BaseDecls.reserve(Shareds.size());
973 auto IPriv = Privates.begin();
974 auto IRed = ReductionOps.begin();
975 for (const Expr *Ref : Shareds) {
976 ClausesData.emplace_back(Ref, *IPriv, *IRed);
977 std::advance(IPriv, 1);
978 std::advance(IRed, 1);
982 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
983 assert(SharedAddresses.size() == N &&
984 "Number of generated lvalues must be exactly N.");
985 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
986 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
987 SharedAddresses.emplace_back(First, Second);
990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
991 const auto *PrivateVD =
992 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
993 QualType PrivateType = PrivateVD->getType();
994 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
995 if (!PrivateType->isVariablyModifiedType()) {
998 SharedAddresses[N].first.getType().getNonReferenceType()),
1003 llvm::Value *SizeInChars;
1005 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1007 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1008 if (AsArraySection) {
1009 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1010 SharedAddresses[N].first.getPointer());
1011 Size = CGF.Builder.CreateNUWAdd(
1012 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1013 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1015 SizeInChars = CGF.getTypeSize(
1016 SharedAddresses[N].first.getType().getNonReferenceType());
1017 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1019 Sizes.emplace_back(SizeInChars, Size);
1020 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1022 cast<OpaqueValueExpr>(
1023 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1025 CGF.EmitVariablyModifiedType(PrivateType);
1028 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
1029 llvm::Value *Size) {
1030 const auto *PrivateVD =
1031 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1032 QualType PrivateType = PrivateVD->getType();
1033 if (!PrivateType->isVariablyModifiedType()) {
1034 assert(!Size && !Sizes[N].second &&
1035 "Size should be nullptr for non-variably modified reduction "
1039 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1041 cast<OpaqueValueExpr>(
1042 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1044 CGF.EmitVariablyModifiedType(PrivateType);
1047 void ReductionCodeGen::emitInitialization(
1048 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1049 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1050 assert(SharedAddresses.size() > N && "No variable was generated");
1051 const auto *PrivateVD =
1052 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1053 const OMPDeclareReductionDecl *DRD =
1054 getReductionInit(ClausesData[N].ReductionOp);
1055 QualType PrivateType = PrivateVD->getType();
1056 PrivateAddr = CGF.Builder.CreateElementBitCast(
1057 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1058 QualType SharedType = SharedAddresses[N].first.getType();
1059 SharedLVal = CGF.MakeAddrLValue(
1060 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1061 CGF.ConvertTypeForMem(SharedType)),
1062 SharedType, SharedAddresses[N].first.getBaseInfo(),
1063 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1064 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1065 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1066 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1067 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1068 PrivateAddr, SharedLVal.getAddress(),
1069 SharedLVal.getType());
1070 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1071 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1072 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1073 PrivateVD->getType().getQualifiers(),
1074 /*IsInitializer=*/false);
1078 bool ReductionCodeGen::needCleanups(unsigned N) {
1079 const auto *PrivateVD =
1080 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1081 QualType PrivateType = PrivateVD->getType();
1082 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1083 return DTorKind != QualType::DK_none;
1086 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
1087 Address PrivateAddr) {
1088 const auto *PrivateVD =
1089 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1090 QualType PrivateType = PrivateVD->getType();
1091 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1092 if (needCleanups(N)) {
1093 PrivateAddr = CGF.Builder.CreateElementBitCast(
1094 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1095 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1099 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1101 BaseTy = BaseTy.getNonReferenceType();
1102 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1103 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1104 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1105 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1107 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1108 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1110 BaseTy = BaseTy->getPointeeType();
1112 return CGF.MakeAddrLValue(
1113 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
1114 CGF.ConvertTypeForMem(ElTy)),
1115 BaseLV.getType(), BaseLV.getBaseInfo(),
1116 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1119 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1120 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1121 llvm::Value *Addr) {
1122 Address Tmp = Address::invalid();
1123 Address TopTmp = Address::invalid();
1124 Address MostTopTmp = Address::invalid();
1125 BaseTy = BaseTy.getNonReferenceType();
1126 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1127 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1128 Tmp = CGF.CreateMemTemp(BaseTy);
1129 if (TopTmp.isValid())
1130 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1134 BaseTy = BaseTy->getPointeeType();
1136 llvm::Type *Ty = BaseLVType;
1138 Ty = Tmp.getElementType();
1139 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1140 if (Tmp.isValid()) {
1141 CGF.Builder.CreateStore(Addr, Tmp);
1144 return Address(Addr, BaseLVAlignment);
1147 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1148 const VarDecl *OrigVD = nullptr;
1149 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1150 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1151 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1152 Base = TempOASE->getBase()->IgnoreParenImpCasts();
1153 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1154 Base = TempASE->getBase()->IgnoreParenImpCasts();
1155 DE = cast<DeclRefExpr>(Base);
1156 OrigVD = cast<VarDecl>(DE->getDecl());
1157 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1158 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1159 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1160 Base = TempASE->getBase()->IgnoreParenImpCasts();
1161 DE = cast<DeclRefExpr>(Base);
1162 OrigVD = cast<VarDecl>(DE->getDecl());
1167 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1168 Address PrivateAddr) {
1169 const DeclRefExpr *DE;
1170 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1171 BaseDecls.emplace_back(OrigVD);
1172 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1174 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1175 OriginalBaseLValue);
1176 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1177 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1178 llvm::Value *PrivatePointer =
1179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1180 PrivateAddr.getPointer(),
1181 SharedAddresses[N].first.getAddress().getType());
1182 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1183 return castToBase(CGF, OrigVD->getType(),
1184 SharedAddresses[N].first.getType(),
1185 OriginalBaseLValue.getAddress().getType(),
1186 OriginalBaseLValue.getAlignment(), Ptr);
1188 BaseDecls.emplace_back(
1189 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1193 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1194 const OMPDeclareReductionDecl *DRD =
1195 getReductionInit(ClausesData[N].ReductionOp);
1196 return DRD && DRD->getInitializer();
1199 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1200 return CGF.EmitLoadOfPointerLValue(
1201 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1202 getThreadIDVariable()->getType()->castAs<PointerType>());
1205 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1206 if (!CGF.HaveInsertPoint())
1208 // 1.2.2 OpenMP Language Terminology
1209 // Structured block - An executable statement with a single entry at the
1210 // top and a single exit at the bottom.
1211 // The point of exit cannot be a branch out of the structured block.
1212 // longjmp() and throw() must not violate the entry/exit criteria.
1213 CGF.EHStack.pushTerminate();
1215 CGF.EHStack.popTerminate();
1218 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1219 CodeGenFunction &CGF) {
1220 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1221 getThreadIDVariable()->getType(),
1222 AlignmentSource::Decl);
1225 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1227 auto *Field = FieldDecl::Create(
1228 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1229 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1230 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1231 Field->setAccess(AS_public);
1236 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1237 StringRef Separator)
1238 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1239 OffloadEntriesInfoManager(CGM) {
1240 ASTContext &C = CGM.getContext();
1241 RecordDecl *RD = C.buildImplicitRecord("ident_t");
1242 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1243 RD->startDefinition();
1245 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1247 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1249 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1251 addFieldToRecordDecl(C, RD, KmpInt32Ty);
1253 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1254 RD->completeDefinition();
1255 IdentQTy = C.getRecordType(RD);
1256 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1257 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1259 loadOffloadInfoMetadata();
1262 void CGOpenMPRuntime::clear() {
1263 InternalVars.clear();
1264 // Clean non-target variable declarations possibly used only in debug info.
1265 for (const auto &Data : EmittedNonTargetVariables) {
1266 if (!Data.getValue().pointsToAliveValue())
1268 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1271 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1273 GV->eraseFromParent();
1277 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1278 SmallString<128> Buffer;
1279 llvm::raw_svector_ostream OS(Buffer);
1280 StringRef Sep = FirstSeparator;
1281 for (StringRef Part : Parts) {
1288 static llvm::Function *
1289 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1290 const Expr *CombinerInitializer, const VarDecl *In,
1291 const VarDecl *Out, bool IsCombiner) {
1292 // void .omp_combiner.(Ty *in, Ty *out);
1293 ASTContext &C = CGM.getContext();
1294 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1295 FunctionArgList Args;
1296 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1297 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1298 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1299 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1300 Args.push_back(&OmpOutParm);
1301 Args.push_back(&OmpInParm);
1302 const CGFunctionInfo &FnInfo =
1303 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1304 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1305 std::string Name = CGM.getOpenMPRuntime().getName(
1306 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1307 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1308 Name, &CGM.getModule());
1309 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1310 if (CGM.getLangOpts().Optimize) {
1311 Fn->removeFnAttr(llvm::Attribute::NoInline);
1312 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1313 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1315 CodeGenFunction CGF(CGM);
1316 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1317 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1318 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1319 Out->getLocation());
1320 CodeGenFunction::OMPPrivateScope Scope(CGF);
1321 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1322 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1323 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1326 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1327 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1328 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1331 (void)Scope.Privatize();
1332 if (!IsCombiner && Out->hasInit() &&
1333 !CGF.isTrivialInitializer(Out->getInit())) {
1334 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1335 Out->getType().getQualifiers(),
1336 /*IsInitializer=*/true);
1338 if (CombinerInitializer)
1339 CGF.EmitIgnoredExpr(CombinerInitializer);
1340 Scope.ForceCleanup();
1341 CGF.FinishFunction();
1345 void CGOpenMPRuntime::emitUserDefinedReduction(
1346 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1347 if (UDRMap.count(D) > 0)
1349 llvm::Function *Combiner = emitCombinerOrInitializer(
1350 CGM, D->getType(), D->getCombiner(),
1351 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1352 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1353 /*IsCombiner=*/true);
1354 llvm::Function *Initializer = nullptr;
1355 if (const Expr *Init = D->getInitializer()) {
1356 Initializer = emitCombinerOrInitializer(
1358 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1360 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1361 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1362 /*IsCombiner=*/false);
1364 UDRMap.try_emplace(D, Combiner, Initializer);
1366 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1367 Decls.second.push_back(D);
1371 std::pair<llvm::Function *, llvm::Function *>
1372 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1373 auto I = UDRMap.find(D);
1374 if (I != UDRMap.end())
1376 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1377 return UDRMap.lookup(D);
1380 static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1381 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1382 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1383 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1384 assert(ThreadIDVar->getType()->isPointerType() &&
1385 "thread id variable must be of type kmp_int32 *");
1386 CodeGenFunction CGF(CGM, true);
1387 bool HasCancel = false;
1388 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1389 HasCancel = OPD->hasCancel();
1390 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1391 HasCancel = OPSD->hasCancel();
1392 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1393 HasCancel = OPFD->hasCancel();
1394 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1395 HasCancel = OPFD->hasCancel();
1396 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1397 HasCancel = OPFD->hasCancel();
1398 else if (const auto *OPFD =
1399 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1400 HasCancel = OPFD->hasCancel();
1401 else if (const auto *OPFD =
1402 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1403 HasCancel = OPFD->hasCancel();
1404 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1405 HasCancel, OutlinedHelperName);
1406 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1407 return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1410 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1411 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1412 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1413 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1414 return emitParallelOrTeamsOutlinedFunction(
1415 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1418 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1419 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1420 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1421 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1422 return emitParallelOrTeamsOutlinedFunction(
1423 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1426 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1427 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1428 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1429 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1430 bool Tied, unsigned &NumberOfParts) {
1431 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1432 PrePostActionTy &) {
1433 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1434 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1435 llvm::Value *TaskArgs[] = {
1437 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1438 TaskTVar->getType()->castAs<PointerType>())
1440 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1442 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1444 CodeGen.setAction(Action);
1445 assert(!ThreadIDVar->getType()->isPointerType() &&
1446 "thread id variable must be of type kmp_int32 for tasks");
1447 const OpenMPDirectiveKind Region =
1448 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1450 const CapturedStmt *CS = D.getCapturedStmt(Region);
1451 const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1452 CodeGenFunction CGF(CGM, true);
1453 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1455 TD ? TD->hasCancel() : false, Action);
1456 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1457 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1459 NumberOfParts = Action.getNumberOfParts();
1463 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1464 const RecordDecl *RD, const CGRecordLayout &RL,
1465 ArrayRef<llvm::Constant *> Data) {
1466 llvm::StructType *StructTy = RL.getLLVMType();
1467 unsigned PrevIdx = 0;
1468 ConstantInitBuilder CIBuilder(CGM);
1469 auto DI = Data.begin();
1470 for (const FieldDecl *FD : RD->fields()) {
1471 unsigned Idx = RL.getLLVMFieldNo(FD);
1472 // Fill the alignment.
1473 for (unsigned I = PrevIdx; I < Idx; ++I)
1474 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1481 template <class... As>
1482 static llvm::GlobalVariable *
1483 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1484 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1486 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488 ConstantInitBuilder CIBuilder(CGM);
1489 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490 buildStructValue(Fields, CGM, RD, RL, Data);
1491 return Fields.finishAndCreateGlobal(
1492 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493 std::forward<As>(Args)...);
1496 template <typename T>
1498 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1499 ArrayRef<llvm::Constant *> Data,
1501 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1502 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1503 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1504 buildStructValue(Fields, CGM, RD, RL, Data);
1505 Fields.finishAndAddTo(Parent);
1508 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1509 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1510 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1511 FlagsTy FlagsKey(Flags, Reserved2Flags);
1512 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1514 if (!DefaultOpenMPPSource) {
1515 // Initialize default location for psource field of ident_t structure of
1516 // all ident_t objects. Format is ";file;function;line;column;;".
1518 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1519 DefaultOpenMPPSource =
1520 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1521 DefaultOpenMPPSource =
1522 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1525 llvm::Constant *Data[] = {
1526 llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1527 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1528 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1529 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1530 llvm::GlobalValue *DefaultOpenMPLocation =
1531 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1532 llvm::GlobalValue::PrivateLinkage);
1533 DefaultOpenMPLocation->setUnnamedAddr(
1534 llvm::GlobalValue::UnnamedAddr::Global);
1536 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1538 return Address(Entry, Align);
1541 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1542 bool AtCurrentPoint) {
1543 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1544 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1546 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1547 if (AtCurrentPoint) {
1548 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1549 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1551 Elem.second.ServiceInsertPt =
1552 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1553 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1557 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1558 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1559 if (Elem.second.ServiceInsertPt) {
1560 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1561 Elem.second.ServiceInsertPt = nullptr;
1562 Ptr->eraseFromParent();
1566 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1569 Flags |= OMP_IDENT_KMPC;
1570 // If no debug info is generated - return global default location.
1571 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1573 return getOrCreateDefaultLocation(Flags).getPointer();
1575 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1577 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1578 Address LocValue = Address::invalid();
1579 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1580 if (I != OpenMPLocThreadIDMap.end())
1581 LocValue = Address(I->second.DebugLoc, Align);
1583 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1584 // GetOpenMPThreadID was called before this routine.
1585 if (!LocValue.isValid()) {
1586 // Generate "ident_t .kmpc_loc.addr;"
1587 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1588 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1589 Elem.second.DebugLoc = AI.getPointer();
1592 if (!Elem.second.ServiceInsertPt)
1593 setLocThreadIdInsertPt(CGF);
1594 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1595 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1596 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1597 CGF.getTypeSize(IdentQTy));
1600 // char **psource = &.kmpc_loc_<flags>.addr.psource;
1601 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1602 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1604 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1606 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1607 if (OMPDebugLoc == nullptr) {
1608 SmallString<128> Buffer2;
1609 llvm::raw_svector_ostream OS2(Buffer2);
1610 // Build debug location
1611 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1612 OS2 << ";" << PLoc.getFilename() << ";";
1613 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1614 OS2 << FD->getQualifiedNameAsString();
1615 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1616 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1617 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1619 // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1620 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1622 // Our callers always pass this to a runtime function, so for
1623 // convenience, go ahead and return a naked pointer.
1624 return LocValue.getPointer();
1627 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1628 SourceLocation Loc) {
1629 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1631 llvm::Value *ThreadID = nullptr;
1632 // Check whether we've already cached a load of the thread id in this
1634 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1635 if (I != OpenMPLocThreadIDMap.end()) {
1636 ThreadID = I->second.ThreadID;
1637 if (ThreadID != nullptr)
1640 // If exceptions are enabled, do not use parameter to avoid possible crash.
1641 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1642 !CGF.getLangOpts().CXXExceptions ||
1643 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1644 if (auto *OMPRegionInfo =
1645 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1646 if (OMPRegionInfo->getThreadIDVariable()) {
1647 // Check if this an outlined function with thread id passed as argument.
1648 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1649 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1650 // If value loaded in entry block, cache it and use it everywhere in
1652 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1653 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1654 Elem.second.ThreadID = ThreadID;
1661 // This is not an outlined function region - need to call __kmpc_int32
1662 // kmpc_global_thread_num(ident_t *loc).
1663 // Generate thread id value and cache this value for use across the
1665 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1666 if (!Elem.second.ServiceInsertPt)
1667 setLocThreadIdInsertPt(CGF);
1668 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1669 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1670 llvm::CallInst *Call = CGF.Builder.CreateCall(
1671 createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
1672 emitUpdateLocation(CGF, Loc));
1673 Call->setCallingConv(CGF.getRuntimeCC());
1674 Elem.second.ThreadID = Call;
1678 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1679 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1680 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1681 clearLocThreadIdInsertPt(CGF);
1682 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1684 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1685 for(auto *D : FunctionUDRMap[CGF.CurFn])
1687 FunctionUDRMap.erase(CGF.CurFn);
1691 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1692 return IdentTy->getPointerTo();
1695 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1696 if (!Kmpc_MicroTy) {
1697 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1698 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1699 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1700 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1702 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1705 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1706 llvm::FunctionCallee RTLFn = nullptr;
1707 switch (static_cast<OpenMPRTLFunction>(Function)) {
1708 case OMPRTL__kmpc_fork_call: {
1709 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1711 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1712 getKmpc_MicroPointerTy()};
1714 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1715 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1716 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1717 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1718 llvm::LLVMContext &Ctx = F->getContext();
1719 llvm::MDBuilder MDB(Ctx);
1720 // Annotate the callback behavior of the __kmpc_fork_call:
1721 // - The callback callee is argument number 2 (microtask).
1722 // - The first two arguments of the callback callee are unknown (-1).
1723 // - All variadic arguments to the __kmpc_fork_call are passed to the
1726 llvm::LLVMContext::MD_callback,
1727 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1729 /* VarArgsArePassed */ true)}));
1734 case OMPRTL__kmpc_global_thread_num: {
1735 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1736 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1738 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1739 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1742 case OMPRTL__kmpc_threadprivate_cached: {
1743 // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1744 // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1745 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1746 CGM.VoidPtrTy, CGM.SizeTy,
1747 CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1749 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1753 case OMPRTL__kmpc_critical: {
1754 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1755 // kmp_critical_name *crit);
1756 llvm::Type *TypeParams[] = {
1757 getIdentTyPointerTy(), CGM.Int32Ty,
1758 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1760 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1764 case OMPRTL__kmpc_critical_with_hint: {
1765 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1766 // kmp_critical_name *crit, uintptr_t hint);
1767 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1768 llvm::PointerType::getUnqual(KmpCriticalNameTy),
1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1775 case OMPRTL__kmpc_threadprivate_register: {
1776 // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1777 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1778 // typedef void *(*kmpc_ctor)(void *);
1780 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1781 /*isVarArg*/ false)->getPointerTo();
1782 // typedef void *(*kmpc_cctor)(void *, void *);
1783 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1784 auto *KmpcCopyCtorTy =
1785 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1788 // typedef void (*kmpc_dtor)(void *);
1790 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1792 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1793 KmpcCopyCtorTy, KmpcDtorTy};
1794 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1795 /*isVarArg*/ false);
1796 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1799 case OMPRTL__kmpc_end_critical: {
1800 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1801 // kmp_critical_name *crit);
1802 llvm::Type *TypeParams[] = {
1803 getIdentTyPointerTy(), CGM.Int32Ty,
1804 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1806 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1810 case OMPRTL__kmpc_cancel_barrier: {
1811 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1813 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1815 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1816 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1819 case OMPRTL__kmpc_barrier: {
1820 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1821 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1823 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1824 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1827 case OMPRTL__kmpc_for_static_fini: {
1828 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1829 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1831 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1832 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1835 case OMPRTL__kmpc_push_num_threads: {
1836 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1837 // kmp_int32 num_threads)
1838 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1841 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1845 case OMPRTL__kmpc_serialized_parallel: {
1846 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1848 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1850 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1851 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1854 case OMPRTL__kmpc_end_serialized_parallel: {
1855 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1857 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1859 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1863 case OMPRTL__kmpc_flush: {
1864 // Build void __kmpc_flush(ident_t *loc);
1865 llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1867 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1868 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1871 case OMPRTL__kmpc_master: {
1872 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1873 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1875 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1879 case OMPRTL__kmpc_end_master: {
1880 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1881 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1883 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1884 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1887 case OMPRTL__kmpc_omp_taskyield: {
1888 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1890 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1892 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1893 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1896 case OMPRTL__kmpc_single: {
1897 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1898 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1900 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1904 case OMPRTL__kmpc_end_single: {
1905 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1906 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1912 case OMPRTL__kmpc_omp_task_alloc: {
1913 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1914 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1915 // kmp_routine_entry_t *task_entry);
1916 assert(KmpRoutineEntryPtrTy != nullptr &&
1917 "Type kmp_routine_entry_t must be created.");
1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1919 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1920 // Return void * and then cast to particular kmp_task_t type.
1922 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1923 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1926 case OMPRTL__kmpc_omp_target_task_alloc: {
1927 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1928 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1929 // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1930 assert(KmpRoutineEntryPtrTy != nullptr &&
1931 "Type kmp_routine_entry_t must be created.");
1932 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1933 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1935 // Return void * and then cast to particular kmp_task_t type.
1937 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1938 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1941 case OMPRTL__kmpc_omp_task: {
1942 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1944 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1947 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1951 case OMPRTL__kmpc_copyprivate: {
1952 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1953 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1954 // kmp_int32 didit);
1955 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1957 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1958 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1959 CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1962 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1963 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1966 case OMPRTL__kmpc_reduce: {
1967 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1968 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1969 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1970 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1971 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1972 /*isVarArg=*/false);
1973 llvm::Type *TypeParams[] = {
1974 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1975 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1976 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1978 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1979 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1982 case OMPRTL__kmpc_reduce_nowait: {
1983 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1984 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1985 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1987 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1988 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1989 /*isVarArg=*/false);
1990 llvm::Type *TypeParams[] = {
1991 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1992 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1993 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1995 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1996 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1999 case OMPRTL__kmpc_end_reduce: {
2000 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2001 // kmp_critical_name *lck);
2002 llvm::Type *TypeParams[] = {
2003 getIdentTyPointerTy(), CGM.Int32Ty,
2004 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2006 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2010 case OMPRTL__kmpc_end_reduce_nowait: {
2011 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2012 // kmp_critical_name *lck);
2013 llvm::Type *TypeParams[] = {
2014 getIdentTyPointerTy(), CGM.Int32Ty,
2015 llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2019 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2022 case OMPRTL__kmpc_omp_task_begin_if0: {
2023 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2025 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2030 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2033 case OMPRTL__kmpc_omp_task_complete_if0: {
2034 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040 RTLFn = CGM.CreateRuntimeFunction(FnTy,
2041 /*Name=*/"__kmpc_omp_task_complete_if0");
2044 case OMPRTL__kmpc_ordered: {
2045 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2046 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2048 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2052 case OMPRTL__kmpc_end_ordered: {
2053 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2054 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2056 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2057 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2060 case OMPRTL__kmpc_omp_taskwait: {
2061 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2062 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2064 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2065 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2068 case OMPRTL__kmpc_taskgroup: {
2069 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2070 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2072 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2076 case OMPRTL__kmpc_end_taskgroup: {
2077 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2078 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2080 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2081 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2084 case OMPRTL__kmpc_push_proc_bind: {
2085 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2087 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2089 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2090 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2093 case OMPRTL__kmpc_omp_task_with_deps: {
2094 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2095 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2096 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2097 llvm::Type *TypeParams[] = {
2098 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
2099 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
2101 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2103 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2106 case OMPRTL__kmpc_omp_wait_deps: {
2107 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2108 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2109 // kmp_depend_info_t *noalias_dep_list);
2110 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2111 CGM.Int32Ty, CGM.VoidPtrTy,
2112 CGM.Int32Ty, CGM.VoidPtrTy};
2114 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2115 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2118 case OMPRTL__kmpc_cancellationpoint: {
2119 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2120 // global_tid, kmp_int32 cncl_kind)
2121 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2123 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2124 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2127 case OMPRTL__kmpc_cancel: {
2128 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2129 // kmp_int32 cncl_kind)
2130 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2132 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2133 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2136 case OMPRTL__kmpc_push_num_teams: {
2137 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2138 // kmp_int32 num_teams, kmp_int32 num_threads)
2139 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2142 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2146 case OMPRTL__kmpc_fork_teams: {
2147 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2150 getKmpc_MicroPointerTy()};
2152 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2153 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2154 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2155 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2156 llvm::LLVMContext &Ctx = F->getContext();
2157 llvm::MDBuilder MDB(Ctx);
2158 // Annotate the callback behavior of the __kmpc_fork_teams:
2159 // - The callback callee is argument number 2 (microtask).
2160 // - The first two arguments of the callback callee are unknown (-1).
2161 // - All variadic arguments to the __kmpc_fork_teams are passed to the
2164 llvm::LLVMContext::MD_callback,
2165 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2167 /* VarArgsArePassed */ true)}));
2172 case OMPRTL__kmpc_taskloop: {
2173 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2174 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2175 // sched, kmp_uint64 grainsize, void *task_dup);
2176 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2180 CGM.Int64Ty->getPointerTo(),
2181 CGM.Int64Ty->getPointerTo(),
2188 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2189 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2192 case OMPRTL__kmpc_doacross_init: {
2193 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2194 // num_dims, struct kmp_dim *dims);
2195 llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2200 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2201 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2204 case OMPRTL__kmpc_doacross_fini: {
2205 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2208 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2209 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2212 case OMPRTL__kmpc_doacross_post: {
2213 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2215 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2216 CGM.Int64Ty->getPointerTo()};
2218 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2222 case OMPRTL__kmpc_doacross_wait: {
2223 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2225 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2226 CGM.Int64Ty->getPointerTo()};
2228 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2229 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2232 case OMPRTL__kmpc_task_reduction_init: {
2233 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2235 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2237 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2239 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2242 case OMPRTL__kmpc_task_reduction_get_th_data: {
2243 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2245 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2247 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2248 RTLFn = CGM.CreateRuntimeFunction(
2249 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2252 case OMPRTL__kmpc_alloc: {
2253 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2254 // al); omp_allocator_handle_t type is void *.
2255 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2257 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2258 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2261 case OMPRTL__kmpc_free: {
2262 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2263 // al); omp_allocator_handle_t type is void *.
2264 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2266 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2267 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2270 case OMPRTL__kmpc_push_target_tripcount: {
2271 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2273 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2274 llvm::FunctionType *FnTy =
2275 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2276 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2279 case OMPRTL__tgt_target: {
2280 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2281 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2283 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2288 CGM.Int64Ty->getPointerTo(),
2289 CGM.Int64Ty->getPointerTo()};
2291 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2292 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2295 case OMPRTL__tgt_target_nowait: {
2296 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2297 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2298 // int64_t *arg_types);
2299 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2304 CGM.Int64Ty->getPointerTo(),
2305 CGM.Int64Ty->getPointerTo()};
2307 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2308 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2311 case OMPRTL__tgt_target_teams: {
2312 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2313 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2314 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2315 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2320 CGM.Int64Ty->getPointerTo(),
2321 CGM.Int64Ty->getPointerTo(),
2325 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2326 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2329 case OMPRTL__tgt_target_teams_nowait: {
2330 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2331 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2332 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2333 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2338 CGM.Int64Ty->getPointerTo(),
2339 CGM.Int64Ty->getPointerTo(),
2343 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2344 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2347 case OMPRTL__tgt_register_requires: {
2348 // Build void __tgt_register_requires(int64_t flags);
2349 llvm::Type *TypeParams[] = {CGM.Int64Ty};
2351 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2352 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2355 case OMPRTL__tgt_register_lib: {
2356 // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2358 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2359 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2361 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2365 case OMPRTL__tgt_unregister_lib: {
2366 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2368 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2369 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2371 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2372 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2375 case OMPRTL__tgt_target_data_begin: {
2376 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2377 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2378 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2382 CGM.Int64Ty->getPointerTo(),
2383 CGM.Int64Ty->getPointerTo()};
2385 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2389 case OMPRTL__tgt_target_data_begin_nowait: {
2390 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2391 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2393 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2397 CGM.Int64Ty->getPointerTo(),
2398 CGM.Int64Ty->getPointerTo()};
2400 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2404 case OMPRTL__tgt_target_data_end: {
2405 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2406 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2407 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2411 CGM.Int64Ty->getPointerTo(),
2412 CGM.Int64Ty->getPointerTo()};
2414 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2415 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2418 case OMPRTL__tgt_target_data_end_nowait: {
2419 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2420 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2422 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2426 CGM.Int64Ty->getPointerTo(),
2427 CGM.Int64Ty->getPointerTo()};
2429 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2430 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2433 case OMPRTL__tgt_target_data_update: {
2434 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2435 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2436 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2440 CGM.Int64Ty->getPointerTo(),
2441 CGM.Int64Ty->getPointerTo()};
2443 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2444 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2447 case OMPRTL__tgt_target_data_update_nowait: {
2448 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2449 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2451 llvm::Type *TypeParams[] = {CGM.Int64Ty,
2455 CGM.Int64Ty->getPointerTo(),
2456 CGM.Int64Ty->getPointerTo()};
2458 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2459 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2463 assert(RTLFn && "Unable to find OpenMP runtime function");
2467 llvm::FunctionCallee
2468 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2469 assert((IVSize == 32 || IVSize == 64) &&
2470 "IV size is not compatible with the omp runtime");
2471 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2472 : "__kmpc_for_static_init_4u")
2473 : (IVSigned ? "__kmpc_for_static_init_8"
2474 : "__kmpc_for_static_init_8u");
2475 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2476 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2477 llvm::Type *TypeParams[] = {
2478 getIdentTyPointerTy(), // loc
2480 CGM.Int32Ty, // schedtype
2481 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2489 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490 return CGM.CreateRuntimeFunction(FnTy, Name);
2493 llvm::FunctionCallee
2494 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2495 assert((IVSize == 32 || IVSize == 64) &&
2496 "IV size is not compatible with the omp runtime");
2499 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2500 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2501 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2502 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2504 CGM.Int32Ty, // schedtype
2511 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512 return CGM.CreateRuntimeFunction(FnTy, Name);
2515 llvm::FunctionCallee
2516 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2517 assert((IVSize == 32 || IVSize == 64) &&
2518 "IV size is not compatible with the omp runtime");
2521 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2522 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2523 llvm::Type *TypeParams[] = {
2524 getIdentTyPointerTy(), // loc
2528 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529 return CGM.CreateRuntimeFunction(FnTy, Name);
2532 llvm::FunctionCallee
2533 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2534 assert((IVSize == 32 || IVSize == 64) &&
2535 "IV size is not compatible with the omp runtime");
2538 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2539 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2540 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2541 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2542 llvm::Type *TypeParams[] = {
2543 getIdentTyPointerTy(), // loc
2545 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2551 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2552 return CGM.CreateRuntimeFunction(FnTy, Name);
2555 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
2556 if (CGM.getLangOpts().OpenMPSimd)
2557 return Address::invalid();
2558 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2559 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2560 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2561 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2562 HasRequiresUnifiedSharedMemory))) {
2563 SmallString<64> PtrName;
2565 llvm::raw_svector_ostream OS(PtrName);
2566 OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
2568 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2570 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2571 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
2573 if (!CGM.getLangOpts().OpenMPIsDevice) {
2574 auto *GV = cast<llvm::GlobalVariable>(Ptr);
2575 GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2576 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2578 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2579 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2581 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2583 return Address::invalid();
2587 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
2588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
2589 !CGM.getContext().getTargetInfo().isTLSSupported());
2590 // Lookup the entry, lazily creating it if necessary.
2591 std::string Suffix = getName({"cache", ""});
2592 return getOrCreateInternalVariable(
2593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2596 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
2599 SourceLocation Loc) {
2600 if (CGM.getLangOpts().OpenMPUseTLS &&
2601 CGM.getContext().getTargetInfo().isTLSSupported())
2604 llvm::Type *VarTy = VDAddr.getElementType();
2605 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2606 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2608 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2609 getOrCreateThreadPrivateCache(VD)};
2610 return Address(CGF.EmitRuntimeCall(
2611 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2612 VDAddr.getAlignment());
2615 void CGOpenMPRuntime::emitThreadPrivateVarInit(
2616 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2617 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2618 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2620 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2621 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
2623 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2624 // to register constructor/destructor for variable.
2625 llvm::Value *Args[] = {
2626 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2627 Ctor, CopyCtor, Dtor};
2628 CGF.EmitRuntimeCall(
2629 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
2632 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
2633 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2634 bool PerformInit, CodeGenFunction *CGF) {
2635 if (CGM.getLangOpts().OpenMPUseTLS &&
2636 CGM.getContext().getTargetInfo().isTLSSupported())
2639 VD = VD->getDefinition(CGM.getContext());
2640 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2641 QualType ASTTy = VD->getType();
2643 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2644 const Expr *Init = VD->getAnyInitializer();
2645 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2646 // Generate function that re-emits the declaration's initializer into the
2647 // threadprivate copy of the variable VD
2648 CodeGenFunction CtorCGF(CGM);
2649 FunctionArgList Args;
2650 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2651 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2652 ImplicitParamDecl::Other);
2653 Args.push_back(&Dst);
2655 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2656 CGM.getContext().VoidPtrTy, Args);
2657 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2658 std::string Name = getName({"__kmpc_global_ctor_", ""});
2659 llvm::Function *Fn =
2660 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2661 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2663 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2664 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2665 CGM.getContext().VoidPtrTy, Dst.getLocation());
2666 Address Arg = Address(ArgVal, VDAddr.getAlignment());
2667 Arg = CtorCGF.Builder.CreateElementBitCast(
2668 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2669 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2670 /*IsInitializer=*/true);
2671 ArgVal = CtorCGF.EmitLoadOfScalar(
2672 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2673 CGM.getContext().VoidPtrTy, Dst.getLocation());
2674 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2675 CtorCGF.FinishFunction();
2678 if (VD->getType().isDestructedType() != QualType::DK_none) {
2679 // Generate function that emits destructor call for the threadprivate copy
2680 // of the variable VD
2681 CodeGenFunction DtorCGF(CGM);
2682 FunctionArgList Args;
2683 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2684 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2685 ImplicitParamDecl::Other);
2686 Args.push_back(&Dst);
2688 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2689 CGM.getContext().VoidTy, Args);
2690 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2691 std::string Name = getName({"__kmpc_global_dtor_", ""});
2692 llvm::Function *Fn =
2693 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2694 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2695 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2697 // Create a scope with an artificial location for the body of this function.
2698 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2699 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2700 DtorCGF.GetAddrOfLocalVar(&Dst),
2701 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2702 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2703 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2704 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2705 DtorCGF.FinishFunction();
2708 // Do not emit init function if it is not required.
2712 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2713 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2716 // Copying constructor for the threadprivate variable.
2717 // Must be NULL - reserved by runtime, but currently it requires that this
2718 // parameter is always NULL. Otherwise it fires assertion.
2719 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2720 if (Ctor == nullptr) {
2721 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2724 Ctor = llvm::Constant::getNullValue(CtorTy);
2726 if (Dtor == nullptr) {
2727 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2730 Dtor = llvm::Constant::getNullValue(DtorTy);
2733 auto *InitFunctionTy =
2734 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2735 std::string Name = getName({"__omp_threadprivate_init_", ""});
2736 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2737 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2738 CodeGenFunction InitCGF(CGM);
2739 FunctionArgList ArgList;
2740 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2741 CGM.getTypes().arrangeNullaryFunction(), ArgList,
2743 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2744 InitCGF.FinishFunction();
2745 return InitFunction;
2747 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2752 /// Obtain information that uniquely identifies a target entry. This
2753 /// consists of the file and device IDs as well as line number associated with
2754 /// the relevant entry source location.
2755 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
2756 unsigned &DeviceID, unsigned &FileID,
2757 unsigned &LineNum) {
2758 SourceManager &SM = C.getSourceManager();
2760 // The loc should be always valid and have a file ID (the user cannot use
2761 // #pragma directives in macros)
2763 assert(Loc.isValid() && "Source location is expected to be always valid.");
2765 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2766 assert(PLoc.isValid() && "Source location is expected to be always valid.");
2768 llvm::sys::fs::UniqueID ID;
2769 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2770 SM.getDiagnostics().Report(diag::err_cannot_open_file)
2771 << PLoc.getFilename() << EC.message();
2773 DeviceID = ID.getDevice();
2774 FileID = ID.getFile();
2775 LineNum = PLoc.getLine();
2778 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
2779 llvm::GlobalVariable *Addr,
2781 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
2782 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2783 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2784 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2785 HasRequiresUnifiedSharedMemory))
2786 return CGM.getLangOpts().OpenMPIsDevice;
2787 VD = VD->getDefinition(CGM.getContext());
2788 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2789 return CGM.getLangOpts().OpenMPIsDevice;
2791 QualType ASTTy = VD->getType();
2793 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
2794 // Produce the unique prefix to identify the new target regions. We use
2795 // the source location of the variable declaration which we know to not
2796 // conflict with any target region.
2800 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2801 SmallString<128> Buffer, Out;
2803 llvm::raw_svector_ostream OS(Buffer);
2804 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2805 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2808 const Expr *Init = VD->getAnyInitializer();
2809 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2810 llvm::Constant *Ctor;
2812 if (CGM.getLangOpts().OpenMPIsDevice) {
2813 // Generate function that re-emits the declaration's initializer into
2814 // the threadprivate copy of the variable VD
2815 CodeGenFunction CtorCGF(CGM);
2817 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2818 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2819 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2820 FTy, Twine(Buffer, "_ctor"), FI, Loc);
2821 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2822 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2823 FunctionArgList(), Loc, Loc);
2824 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2825 CtorCGF.EmitAnyExprToMem(Init,
2826 Address(Addr, CGM.getContext().getDeclAlign(VD)),
2827 Init->getType().getQualifiers(),
2828 /*IsInitializer=*/true);
2829 CtorCGF.FinishFunction();
2831 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2832 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2834 Ctor = new llvm::GlobalVariable(
2835 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2836 llvm::GlobalValue::PrivateLinkage,
2837 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2841 // Register the information for the entry associated with the constructor.
2843 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2844 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2845 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
2847 if (VD->getType().isDestructedType() != QualType::DK_none) {
2848 llvm::Constant *Dtor;
2850 if (CGM.getLangOpts().OpenMPIsDevice) {
2851 // Generate function that emits destructor call for the threadprivate
2852 // copy of the variable VD
2853 CodeGenFunction DtorCGF(CGM);
2855 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
2856 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2857 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2858 FTy, Twine(Buffer, "_dtor"), FI, Loc);
2859 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2860 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2861 FunctionArgList(), Loc, Loc);
2862 // Create a scope with an artificial location for the body of this
2864 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2865 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2866 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2867 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2868 DtorCGF.FinishFunction();
2870 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2871 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2873 Dtor = new llvm::GlobalVariable(
2874 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2875 llvm::GlobalValue::PrivateLinkage,
2876 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2879 // Register the information for the entry associated with the destructor.
2881 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2882 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2883 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2885 return CGM.getLangOpts().OpenMPIsDevice;
2888 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2891 std::string Suffix = getName({"artificial", ""});
2892 std::string CacheSuffix = getName({"cache", ""});
2893 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2894 llvm::Value *GAddr =
2895 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2896 llvm::Value *Args[] = {
2897 emitUpdateLocation(CGF, SourceLocation()),
2898 getThreadID(CGF, SourceLocation()),
2899 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2900 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2901 /*isSigned=*/false),
2902 getOrCreateInternalVariable(
2903 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2905 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2906 CGF.EmitRuntimeCall(
2907 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
2908 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2909 CGM.getPointerAlign());
2912 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2913 const RegionCodeGenTy &ThenGen,
2914 const RegionCodeGenTy &ElseGen) {
2915 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2917 // If the condition constant folds and can be elided, try to avoid emitting
2918 // the condition and the dead arm of the if/else.
2920 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2928 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2929 // emit the conditional branch.
2930 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2931 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2932 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2933 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2935 // Emit the 'then' code.
2936 CGF.EmitBlock(ThenBlock);
2938 CGF.EmitBranch(ContBlock);
2939 // Emit the 'else' code if present.
2940 // There is no need to emit line number for unconditional branch.
2941 (void)ApplyDebugLocation::CreateEmpty(CGF);
2942 CGF.EmitBlock(ElseBlock);
2944 // There is no need to emit line number for unconditional branch.
2945 (void)ApplyDebugLocation::CreateEmpty(CGF);
2946 CGF.EmitBranch(ContBlock);
2947 // Emit the continuation block for code after the if.
2948 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2951 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2952 llvm::Function *OutlinedFn,
2953 ArrayRef<llvm::Value *> CapturedVars,
2954 const Expr *IfCond) {
2955 if (!CGF.HaveInsertPoint())
2957 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2958 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2959 PrePostActionTy &) {
2960 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2961 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2962 llvm::Value *Args[] = {
2964 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2965 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2966 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2967 RealArgs.append(std::begin(Args), std::end(Args));
2968 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2970 llvm::FunctionCallee RTLFn =
2971 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2972 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2974 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2975 PrePostActionTy &) {
2976 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2977 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2979 // __kmpc_serialized_parallel(&Loc, GTid);
2980 llvm::Value *Args[] = {RTLoc, ThreadID};
2981 CGF.EmitRuntimeCall(
2982 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2984 // OutlinedFn(>id, &zero, CapturedStruct);
2985 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2986 /*Name*/ ".zero.addr");
2987 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2988 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2989 // ThreadId for serialized parallels is 0.
2990 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2991 OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2992 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2993 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2995 // __kmpc_end_serialized_parallel(&Loc, GTid);
2996 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2997 CGF.EmitRuntimeCall(
2998 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
3002 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3004 RegionCodeGenTy ThenRCG(ThenGen);
3009 // If we're inside an (outlined) parallel region, use the region info's
3010 // thread-ID variable (it is passed in a first argument of the outlined function
3011 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3012 // regular serial code region, get thread ID by calling kmp_int32
3013 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3014 // return the address of that temp.
3015 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
3016 SourceLocation Loc) {
3017 if (auto *OMPRegionInfo =
3018 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3019 if (OMPRegionInfo->getThreadIDVariable())
3020 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3022 llvm::Value *ThreadID = getThreadID(CGF, Loc);
3024 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3025 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3026 CGF.EmitStoreOfScalar(ThreadID,
3027 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3029 return ThreadIDTemp;
3032 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable(
3033 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3034 SmallString<256> Buffer;
3035 llvm::raw_svector_ostream Out(Buffer);
3037 StringRef RuntimeName = Out.str();
3038 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3040 assert(Elem.second->getType()->getPointerElementType() == Ty &&
3041 "OMP internal variable has different type than requested");
3042 return &*Elem.second;
3045 return Elem.second = new llvm::GlobalVariable(
3046 CGM.getModule(), Ty, /*IsConstant*/ false,
3047 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3048 Elem.first(), /*InsertBefore=*/nullptr,
3049 llvm::GlobalValue::NotThreadLocal, AddressSpace);
3052 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
3053 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3054 std::string Name = getName({Prefix, "var"});
3055 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3059 /// Common pre(post)-action for different OpenMP constructs.
3060 class CommonActionTy final : public PrePostActionTy {
3061 llvm::FunctionCallee EnterCallee;
3062 ArrayRef<llvm::Value *> EnterArgs;
3063 llvm::FunctionCallee ExitCallee;
3064 ArrayRef<llvm::Value *> ExitArgs;
3066 llvm::BasicBlock *ContBlock = nullptr;
3069 CommonActionTy(llvm::FunctionCallee EnterCallee,
3070 ArrayRef<llvm::Value *> EnterArgs,
3071 llvm::FunctionCallee ExitCallee,
3072 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3073 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3074 ExitArgs(ExitArgs), Conditional(Conditional) {}
3075 void Enter(CodeGenFunction &CGF) override {
3076 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3078 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3079 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3080 ContBlock = CGF.createBasicBlock("omp_if.end");
3081 // Generate the branch (If-stmt)
3082 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3083 CGF.EmitBlock(ThenBlock);
3086 void Done(CodeGenFunction &CGF) {
3087 // Emit the rest of blocks/branches
3088 CGF.EmitBranch(ContBlock);
3089 CGF.EmitBlock(ContBlock, true);
3091 void Exit(CodeGenFunction &CGF) override {
3092 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3095 } // anonymous namespace
3097 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
3098 StringRef CriticalName,
3099 const RegionCodeGenTy &CriticalOpGen,
3100 SourceLocation Loc, const Expr *Hint) {
3101 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3103 // __kmpc_end_critical(ident_t *, gtid, Lock);
3104 // Prepare arguments and build a call to __kmpc_critical
3105 if (!CGF.HaveInsertPoint())
3107 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3108 getCriticalRegionLock(CriticalName)};
3109 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3112 EnterArgs.push_back(CGF.Builder.CreateIntCast(
3113 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3115 CommonActionTy Action(
3116 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint
3117 : OMPRTL__kmpc_critical),
3118 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args);
3119 CriticalOpGen.setAction(Action);
3120 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3123 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
3124 const RegionCodeGenTy &MasterOpGen,
3125 SourceLocation Loc) {
3126 if (!CGF.HaveInsertPoint())
3128 // if(__kmpc_master(ident_t *, gtid)) {
3130 // __kmpc_end_master(ident_t *, gtid);
3132 // Prepare arguments and build a call to __kmpc_master
3133 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3134 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3135 createRuntimeFunction(OMPRTL__kmpc_end_master), Args,
3136 /*Conditional=*/true);
3137 MasterOpGen.setAction(Action);
3138 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3142 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
3143 SourceLocation Loc) {
3144 if (!CGF.HaveInsertPoint())
3146 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3147 llvm::Value *Args[] = {
3148 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3149 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3150 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
3151 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3152 Region->emitUntiedSwitch(CGF);
3155 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
3156 const RegionCodeGenTy &TaskgroupOpGen,
3157 SourceLocation Loc) {
3158 if (!CGF.HaveInsertPoint())
3160 // __kmpc_taskgroup(ident_t *, gtid);
3161 // TaskgroupOpGen();
3162 // __kmpc_end_taskgroup(ident_t *, gtid);
3163 // Prepare arguments and build a call to __kmpc_taskgroup
3164 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3165 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3166 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup),
3168 TaskgroupOpGen.setAction(Action);
3169 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3172 /// Given an array of pointers to variables, project the address of a
3174 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
3175 unsigned Index, const VarDecl *Var) {
3176 // Pull out the pointer to the variable.
3177 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3178 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3180 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3181 Addr = CGF.Builder.CreateElementBitCast(
3182 Addr, CGF.ConvertTypeForMem(Var->getType()));
3186 static llvm::Value *emitCopyprivateCopyFunction(
3187 CodeGenModule &CGM, llvm::Type *ArgsType,
3188 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3189 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3190 SourceLocation Loc) {
3191 ASTContext &C = CGM.getContext();
3192 // void copy_func(void *LHSArg, void *RHSArg);
3193 FunctionArgList Args;
3194 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3195 ImplicitParamDecl::Other);
3196 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3197 ImplicitParamDecl::Other);
3198 Args.push_back(&LHSArg);
3199 Args.push_back(&RHSArg);
3201 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3203 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3204 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3205 llvm::GlobalValue::InternalLinkage, Name,
3207 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3208 Fn->setDoesNotRecurse();
3209 CodeGenFunction CGF(CGM);
3210 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3211 // Dest = (void*[n])(LHSArg);
3212 // Src = (void*[n])(RHSArg);
3213 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3214 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3215 ArgsType), CGF.getPointerAlign());
3216 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3217 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3218 ArgsType), CGF.getPointerAlign());
3219 // *(Type0*)Dst[0] = *(Type0*)Src[0];
3220 // *(Type1*)Dst[1] = *(Type1*)Src[1];
3222 // *(Typen*)Dst[n] = *(Typen*)Src[n];
3223 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3224 const auto *DestVar =
3225 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3226 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3228 const auto *SrcVar =
3229 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3230 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3232 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3233 QualType Type = VD->getType();
3234 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3236 CGF.FinishFunction();
3240 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
3241 const RegionCodeGenTy &SingleOpGen,
3243 ArrayRef<const Expr *> CopyprivateVars,
3244 ArrayRef<const Expr *> SrcExprs,
3245 ArrayRef<const Expr *> DstExprs,
3246 ArrayRef<const Expr *> AssignmentOps) {
3247 if (!CGF.HaveInsertPoint())
3249 assert(CopyprivateVars.size() == SrcExprs.size() &&
3250 CopyprivateVars.size() == DstExprs.size() &&
3251 CopyprivateVars.size() == AssignmentOps.size());
3252 ASTContext &C = CGM.getContext();
3253 // int32 did_it = 0;
3254 // if(__kmpc_single(ident_t *, gtid)) {
3256 // __kmpc_end_single(ident_t *, gtid);
3259 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3260 // <copy_func>, did_it);
3262 Address DidIt = Address::invalid();
3263 if (!CopyprivateVars.empty()) {
3264 // int32 did_it = 0;
3265 QualType KmpInt32Ty =
3266 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3267 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3268 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3270 // Prepare arguments and build a call to __kmpc_single
3271 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3272 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3273 createRuntimeFunction(OMPRTL__kmpc_end_single), Args,
3274 /*Conditional=*/true);
3275 SingleOpGen.setAction(Action);
3276 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3277 if (DidIt.isValid()) {
3279 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3282 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3283 // <copy_func>, did_it);
3284 if (DidIt.isValid()) {
3285 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3286 QualType CopyprivateArrayTy =
3287 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3288 /*IndexTypeQuals=*/0);
3289 // Create a list of all private variables for copyprivate.
3290 Address CopyprivateList =
3291 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3292 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3293 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3294 CGF.Builder.CreateStore(
3295 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3296 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3299 // Build function that copies private values from single region to all other
3300 // threads in the corresponding parallel region.
3301 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
3302 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3303 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3304 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3306 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3308 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3309 llvm::Value *Args[] = {
3310 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3311 getThreadID(CGF, Loc), // i32 <gtid>
3312 BufSize, // size_t <buf_size>
3313 CL.getPointer(), // void *<copyprivate list>
3314 CpyFn, // void (*) (void *, void *) <copy_func>
3315 DidItVal // i32 did_it
3317 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
3321 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
3322 const RegionCodeGenTy &OrderedOpGen,
3323 SourceLocation Loc, bool IsThreads) {
3324 if (!CGF.HaveInsertPoint())
3326 // __kmpc_ordered(ident_t *, gtid);
3328 // __kmpc_end_ordered(ident_t *, gtid);
3329 // Prepare arguments and build a call to __kmpc_ordered
3331 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3332 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3333 createRuntimeFunction(OMPRTL__kmpc_end_ordered),
3335 OrderedOpGen.setAction(Action);
3336 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3339 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3342 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
3344 if (Kind == OMPD_for)
3345 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3346 else if (Kind == OMPD_sections)
3347 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3348 else if (Kind == OMPD_single)
3349 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3350 else if (Kind == OMPD_barrier)
3351 Flags = OMP_IDENT_BARRIER_EXPL;
3353 Flags = OMP_IDENT_BARRIER_IMPL;
3357 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
3358 CodeGenFunction &CGF, const OMPLoopDirective &S,
3359 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3360 // Check if the loop directive is actually a doacross loop directive. In this
3361 // case choose static, 1 schedule.
3363 S.getClausesOfKind<OMPOrderedClause>(),
3364 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3365 ScheduleKind = OMPC_SCHEDULE_static;
3366 // Chunk size is 1 in this case.
3367 llvm::APInt ChunkSize(32, 1);
3368 ChunkExpr = IntegerLiteral::Create(
3369 CGF.getContext(), ChunkSize,
3370 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3375 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
3376 OpenMPDirectiveKind Kind, bool EmitChecks,
3377 bool ForceSimpleCall) {
3378 if (!CGF.HaveInsertPoint())
3380 // Build call __kmpc_cancel_barrier(loc, thread_id);
3381 // Build call __kmpc_barrier(loc, thread_id);
3382 unsigned Flags = getDefaultFlagsForBarriers(Kind);
3383 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3385 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3386 getThreadID(CGF, Loc)};
3387 if (auto *OMPRegionInfo =
3388 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3389 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3390 llvm::Value *Result = CGF.EmitRuntimeCall(
3391 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
3393 // if (__kmpc_cancel_barrier()) {
3394 // exit from construct;
3396 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3397 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3398 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3399 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3400 CGF.EmitBlock(ExitBB);
3401 // exit from construct;
3402 CodeGenFunction::JumpDest CancelDestination =
3403 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3404 CGF.EmitBranchThroughCleanup(CancelDestination);
3405 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3410 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args);
3413 /// Map the OpenMP loop schedule to the runtime enumeration.
3414 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
3415 bool Chunked, bool Ordered) {
3416 switch (ScheduleKind) {
3417 case OMPC_SCHEDULE_static:
3418 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3419 : (Ordered ? OMP_ord_static : OMP_sch_static);
3420 case OMPC_SCHEDULE_dynamic:
3421 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
3422 case OMPC_SCHEDULE_guided:
3423 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
3424 case OMPC_SCHEDULE_runtime:
3425 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3426 case OMPC_SCHEDULE_auto:
3427 return Ordered ? OMP_ord_auto : OMP_sch_auto;
3428 case OMPC_SCHEDULE_unknown:
3429 assert(!Chunked && "chunk was specified but schedule kind not known");
3430 return Ordered ? OMP_ord_static : OMP_sch_static;
3432 llvm_unreachable("Unexpected runtime schedule");
3435 /// Map the OpenMP distribute schedule to the runtime enumeration.
3436 static OpenMPSchedType
3437 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
3438 // only static is allowed for dist_schedule
3439 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
3442 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
3443 bool Chunked) const {
3444 OpenMPSchedType Schedule =
3445 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3446 return Schedule == OMP_sch_static;
3449 bool CGOpenMPRuntime::isStaticNonchunked(
3450 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3451 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3452 return Schedule == OMP_dist_sch_static;
3455 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
3456 bool Chunked) const {
3457 OpenMPSchedType Schedule =
3458 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3459 return Schedule == OMP_sch_static_chunked;
3462 bool CGOpenMPRuntime::isStaticChunked(
3463 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3464 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3465 return Schedule == OMP_dist_sch_static_chunked;
3468 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
3469 OpenMPSchedType Schedule =
3470 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3471 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3472 return Schedule != OMP_sch_static;
3475 static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
3476 OpenMPScheduleClauseModifier M1,
3477 OpenMPScheduleClauseModifier M2) {
3480 case OMPC_SCHEDULE_MODIFIER_monotonic:
3481 Modifier = OMP_sch_modifier_monotonic;
3483 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3484 Modifier = OMP_sch_modifier_nonmonotonic;
3486 case OMPC_SCHEDULE_MODIFIER_simd:
3487 if (Schedule == OMP_sch_static_chunked)
3488 Schedule = OMP_sch_static_balanced_chunked;
3490 case OMPC_SCHEDULE_MODIFIER_last:
3491 case OMPC_SCHEDULE_MODIFIER_unknown:
3495 case OMPC_SCHEDULE_MODIFIER_monotonic:
3496 Modifier = OMP_sch_modifier_monotonic;
3498 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3499 Modifier = OMP_sch_modifier_nonmonotonic;
3501 case OMPC_SCHEDULE_MODIFIER_simd:
3502 if (Schedule == OMP_sch_static_chunked)
3503 Schedule = OMP_sch_static_balanced_chunked;
3505 case OMPC_SCHEDULE_MODIFIER_last:
3506 case OMPC_SCHEDULE_MODIFIER_unknown:
3509 return Schedule | Modifier;
3512 void CGOpenMPRuntime::emitForDispatchInit(
3513 CodeGenFunction &CGF, SourceLocation Loc,
3514 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3515 bool Ordered, const DispatchRTInput &DispatchValues) {
3516 if (!CGF.HaveInsertPoint())
3518 OpenMPSchedType Schedule = getRuntimeSchedule(
3519 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3521 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3522 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3523 Schedule != OMP_sch_static_balanced_chunked));
3524 // Call __kmpc_dispatch_init(
3525 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3526 // kmp_int[32|64] lower, kmp_int[32|64] upper,
3527 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3529 // If the Chunk was not specified in the clause - use default value 1.
3530 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3531 : CGF.Builder.getIntN(IVSize, 1);
3532 llvm::Value *Args[] = {
3533 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3534 CGF.Builder.getInt32(addMonoNonMonoModifier(
3535 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3536 DispatchValues.LB, // Lower
3537 DispatchValues.UB, // Upper
3538 CGF.Builder.getIntN(IVSize, 1), // Stride
3541 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3544 static void emitForStaticInitCall(
3545 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3546 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3547 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
3548 const CGOpenMPRuntime::StaticRTInput &Values) {
3549 if (!CGF.HaveInsertPoint())
3552 assert(!Values.Ordered);
3553 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3554 Schedule == OMP_sch_static_balanced_chunked ||
3555 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3556 Schedule == OMP_dist_sch_static ||
3557 Schedule == OMP_dist_sch_static_chunked);
3559 // Call __kmpc_for_static_init(
3560 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3561 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3562 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3563 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3564 llvm::Value *Chunk = Values.Chunk;
3565 if (Chunk == nullptr) {
3566 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3567 Schedule == OMP_dist_sch_static) &&
3568 "expected static non-chunked schedule");
3569 // If the Chunk was not specified in the clause - use default value 1.
3570 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3572 assert((Schedule == OMP_sch_static_chunked ||
3573 Schedule == OMP_sch_static_balanced_chunked ||
3574 Schedule == OMP_ord_static_chunked ||
3575 Schedule == OMP_dist_sch_static_chunked) &&
3576 "expected static chunked schedule");
3578 llvm::Value *Args[] = {
3581 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3582 M2)), // Schedule type
3583 Values.IL.getPointer(), // &isLastIter
3584 Values.LB.getPointer(), // &LB
3585 Values.UB.getPointer(), // &UB
3586 Values.ST.getPointer(), // &Stride
3587 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3590 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3593 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
3595 OpenMPDirectiveKind DKind,
3596 const OpenMPScheduleTy &ScheduleKind,
3597 const StaticRTInput &Values) {
3598 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3599 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3600 assert(isOpenMPWorksharingDirective(DKind) &&
3601 "Expected loop-based or sections-based directive.");
3602 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3603 isOpenMPLoopDirective(DKind)
3604 ? OMP_IDENT_WORK_LOOP
3605 : OMP_IDENT_WORK_SECTIONS);
3606 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3607 llvm::FunctionCallee StaticInitFunction =
3608 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3609 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3610 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3613 void CGOpenMPRuntime::emitDistributeStaticInit(
3614 CodeGenFunction &CGF, SourceLocation Loc,
3615 OpenMPDistScheduleClauseKind SchedKind,
3616 const CGOpenMPRuntime::StaticRTInput &Values) {
3617 OpenMPSchedType ScheduleNum =
3618 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3619 llvm::Value *UpdatedLocation =
3620 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3621 llvm::Value *ThreadId = getThreadID(CGF, Loc);
3622 llvm::FunctionCallee StaticInitFunction =
3623 createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3624 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3625 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3626 OMPC_SCHEDULE_MODIFIER_unknown, Values);
3629 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
3631 OpenMPDirectiveKind DKind) {
3632 if (!CGF.HaveInsertPoint())
3634 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3635 llvm::Value *Args[] = {
3636 emitUpdateLocation(CGF, Loc,
3637 isOpenMPDistributeDirective(DKind)
3638 ? OMP_IDENT_WORK_DISTRIBUTE
3639 : isOpenMPLoopDirective(DKind)
3640 ? OMP_IDENT_WORK_LOOP
3641 : OMP_IDENT_WORK_SECTIONS),
3642 getThreadID(CGF, Loc)};
3643 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini),
3647 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
3651 if (!CGF.HaveInsertPoint())
3653 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3654 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3655 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3658 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
3659 SourceLocation Loc, unsigned IVSize,
3660 bool IVSigned, Address IL,
3661 Address LB, Address UB,
3663 // Call __kmpc_dispatch_next(
3664 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3665 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3666 // kmp_int[32|64] *p_stride);
3667 llvm::Value *Args[] = {
3668 emitUpdateLocation(CGF, Loc),
3669 getThreadID(CGF, Loc),
3670 IL.getPointer(), // &isLastIter
3671 LB.getPointer(), // &Lower
3672 UB.getPointer(), // &Upper
3673 ST.getPointer() // &Stride
3676 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3677 return CGF.EmitScalarConversion(
3678 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3679 CGF.getContext().BoolTy, Loc);
3682 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
3683 llvm::Value *NumThreads,
3684 SourceLocation Loc) {
3685 if (!CGF.HaveInsertPoint())
3687 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3688 llvm::Value *Args[] = {
3689 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3690 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3691 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads),
3695 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
3696 OpenMPProcBindClauseKind ProcBind,
3697 SourceLocation Loc) {
3698 if (!CGF.HaveInsertPoint())
3700 // Constants for proc bind value accepted by the runtime.
3711 case OMPC_PROC_BIND_master:
3712 RuntimeProcBind = ProcBindMaster;
3714 case OMPC_PROC_BIND_close:
3715 RuntimeProcBind = ProcBindClose;
3717 case OMPC_PROC_BIND_spread:
3718 RuntimeProcBind = ProcBindSpread;
3720 case OMPC_PROC_BIND_unknown:
3721 llvm_unreachable("Unsupported proc_bind value.");
3723 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3724 llvm::Value *Args[] = {
3725 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3726 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3727 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args);
3730 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3731 SourceLocation Loc) {
3732 if (!CGF.HaveInsertPoint())
3734 // Build call void __kmpc_flush(ident_t *loc)
3735 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
3736 emitUpdateLocation(CGF, Loc));
3740 /// Indexes of fields for type kmp_task_t.
3741 enum KmpTaskTFields {
3742 /// List of shared variables.
3746 /// Partition id for the untied tasks.
3748 /// Function with call of destructors for private variables.
3752 /// (Taskloops only) Lower bound.
3754 /// (Taskloops only) Upper bound.
3756 /// (Taskloops only) Stride.
3758 /// (Taskloops only) Is last iteration flag.
3760 /// (Taskloops only) Reduction data.
3763 } // anonymous namespace
3765 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3766 return OffloadEntriesTargetRegion.empty() &&
3767 OffloadEntriesDeviceGlobalVar.empty();
3770 /// Initialize target region entry.
3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3772 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3773 StringRef ParentName, unsigned LineNum,
3775 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3776 "only required for the device "
3777 "code generation.");
3778 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3779 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3780 OMPTargetRegionEntryTargetRegion);
3781 ++OffloadingEntriesNum;
3784 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3785 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3786 StringRef ParentName, unsigned LineNum,
3787 llvm::Constant *Addr, llvm::Constant *ID,
3788 OMPTargetRegionEntryKind Flags) {
3789 // If we are emitting code for a target, the entry is already initialized,
3790 // only has to be registered.
3791 if (CGM.getLangOpts().OpenMPIsDevice) {
3792 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3793 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3794 DiagnosticsEngine::Error,
3795 "Unable to find target region on line '%0' in the device code.");
3796 CGM.getDiags().Report(DiagID) << LineNum;
3800 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3801 assert(Entry.isValid() && "Entry not initialized!");
3802 Entry.setAddress(Addr);
3804 Entry.setFlags(Flags);
3806 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3807 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3808 ++OffloadingEntriesNum;
3812 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3813 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3814 unsigned LineNum) const {
3815 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3816 if (PerDevice == OffloadEntriesTargetRegion.end())
3818 auto PerFile = PerDevice->second.find(FileID);
3819 if (PerFile == PerDevice->second.end())
3821 auto PerParentName = PerFile->second.find(ParentName);
3822 if (PerParentName == PerFile->second.end())
3824 auto PerLine = PerParentName->second.find(LineNum);
3825 if (PerLine == PerParentName->second.end())
3827 // Fail if this entry is already registered.
3828 if (PerLine->second.getAddress() || PerLine->second.getID())
3833 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3834 const OffloadTargetRegionEntryInfoActTy &Action) {
3835 // Scan all target region entries and perform the provided action.
3836 for (const auto &D : OffloadEntriesTargetRegion)
3837 for (const auto &F : D.second)
3838 for (const auto &P : F.second)
3839 for (const auto &L : P.second)
3840 Action(D.first, F.first, P.first(), L.first, L.second);
3843 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3844 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3845 OMPTargetGlobalVarEntryKind Flags,
3847 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3848 "only required for the device "
3849 "code generation.");
3850 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3851 ++OffloadingEntriesNum;
3854 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3855 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3857 OMPTargetGlobalVarEntryKind Flags,
3858 llvm::GlobalValue::LinkageTypes Linkage) {
3859 if (CGM.getLangOpts().OpenMPIsDevice) {
3860 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3861 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3862 "Entry not initialized!");
3863 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3864 "Resetting with the new address.");
3865 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3866 if (Entry.getVarSize().isZero()) {
3867 Entry.setVarSize(VarSize);
3868 Entry.setLinkage(Linkage);
3872 Entry.setVarSize(VarSize);
3873 Entry.setLinkage(Linkage);
3874 Entry.setAddress(Addr);
3876 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3877 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3878 assert(Entry.isValid() && Entry.getFlags() == Flags &&
3879 "Entry not initialized!");
3880 assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3881 "Resetting with the new address.");
3882 if (Entry.getVarSize().isZero()) {
3883 Entry.setVarSize(VarSize);
3884 Entry.setLinkage(Linkage);
3888 OffloadEntriesDeviceGlobalVar.try_emplace(
3889 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3890 ++OffloadingEntriesNum;
3894 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3895 actOnDeviceGlobalVarEntriesInfo(
3896 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3897 // Scan all target region entries and perform the provided action.
3898 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3899 Action(E.getKey(), E.getValue());
3903 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
3904 // If we don't have entries or if we are emitting code for the device, we
3905 // don't need to do anything.
3906 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3909 llvm::Module &M = CGM.getModule();
3910 ASTContext &C = CGM.getContext();
3912 // Get list of devices we care about
3913 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3915 // We should be creating an offloading descriptor only if there are devices
3917 assert(!Devices.empty() && "No OpenMP offloading devices??");
3919 // Create the external variables that will point to the begin and end of the
3920 // host entries section. These will be defined by the linker.
3921 llvm::Type *OffloadEntryTy =
3922 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3923 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3924 auto *HostEntriesBegin = new llvm::GlobalVariable(
3925 M, OffloadEntryTy, /*isConstant=*/true,
3926 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3928 std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3929 auto *HostEntriesEnd =
3930 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3931 llvm::GlobalValue::ExternalLinkage,
3932 /*Initializer=*/nullptr, EntriesEndName);
3934 // Create all device images
3935 auto *DeviceImageTy = cast<llvm::StructType>(
3936 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3937 ConstantInitBuilder DeviceImagesBuilder(CGM);
3938 ConstantArrayBuilder DeviceImagesEntries =
3939 DeviceImagesBuilder.beginArray(DeviceImageTy);
3941 for (const llvm::Triple &Device : Devices) {
3942 StringRef T = Device.getTriple();
3943 std::string BeginName = getName({"omp_offloading", "img_start", ""});
3944 auto *ImgBegin = new llvm::GlobalVariable(
3945 M, CGM.Int8Ty, /*isConstant=*/true,
3946 llvm::GlobalValue::ExternalWeakLinkage,
3947 /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3948 std::string EndName = getName({"omp_offloading", "img_end", ""});
3949 auto *ImgEnd = new llvm::GlobalVariable(
3950 M, CGM.Int8Ty, /*isConstant=*/true,
3951 llvm::GlobalValue::ExternalWeakLinkage,
3952 /*Initializer=*/nullptr, Twine(EndName).concat(T));
3954 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3956 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data,
3957 DeviceImagesEntries);
3960 // Create device images global array.
3961 std::string ImagesName = getName({"omp_offloading", "device_images"});
3962 llvm::GlobalVariable *DeviceImages =
3963 DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3964 CGM.getPointerAlign(),
3965 /*isConstant=*/true);
3966 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3968 // This is a Zero array to be used in the creation of the constant expressions
3969 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3970 llvm::Constant::getNullValue(CGM.Int32Ty)};
3972 // Create the target region descriptor.
3973 llvm::Constant *Data[] = {
3974 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3975 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3976 DeviceImages, Index),
3977 HostEntriesBegin, HostEntriesEnd};
3978 std::string Descriptor = getName({"omp_offloading", "descriptor"});
3979 llvm::GlobalVariable *Desc = createGlobalStruct(
3980 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3982 // Emit code to register or unregister the descriptor at execution
3983 // startup or closing, respectively.
3985 llvm::Function *UnRegFn;
3987 FunctionArgList Args;
3988 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3989 Args.push_back(&DummyPtr);
3991 CodeGenFunction CGF(CGM);
3992 // Disable debug info for global (de-)initializer because they are not part
3993 // of some particular construct.
3994 CGF.disableDebugInfo();
3996 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3997 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3998 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3999 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4000 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4001 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib),
4003 CGF.FinishFunction();
4005 llvm::Function *RegFn;
4007 CodeGenFunction CGF(CGM);
4008 // Disable debug info for global (de-)initializer because they are not part
4009 // of some particular construct.
4010 CGF.disableDebugInfo();
4011 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4012 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4014 // Encode offload target triples into the registration function name. It
4015 // will serve as a comdat key for the registration/unregistration code for
4016 // this particular combination of offloading targets.
4017 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4018 RegFnNameParts[0] = "omp_offloading";
4019 RegFnNameParts[1] = "descriptor_reg";
4020 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4021 [](const llvm::Triple &T) -> const std::string& {
4022 return T.getTriple();
4024 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4025 std::string Descriptor = getName(RegFnNameParts);
4026 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4027 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4028 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc);
4029 // Create a variable to drive the registration and unregistration of the
4030 // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4031 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4032 SourceLocation(), nullptr, C.CharTy,
4033 ImplicitParamDecl::Other);
4034 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4035 CGF.FinishFunction();
4037 if (CGM.supportsCOMDAT()) {
4038 // It is sufficient to call registration function only once, so create a
4039 // COMDAT group for registration/unregistration functions and associated
4040 // data. That would reduce startup time and code size. Registration
4041 // function serves as a COMDAT group key.
4042 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4043 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4044 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4045 RegFn->setComdat(ComdatKey);
4046 UnRegFn->setComdat(ComdatKey);
4047 DeviceImages->setComdat(ComdatKey);
4048 Desc->setComdat(ComdatKey);
4053 void CGOpenMPRuntime::createOffloadEntry(
4054 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4055 llvm::GlobalValue::LinkageTypes Linkage) {
4056 StringRef Name = Addr->getName();
4057 llvm::Module &M = CGM.getModule();
4058 llvm::LLVMContext &C = M.getContext();
4060 // Create constant string with the name.
4061 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4063 std::string StringName = getName({"omp_offloading", "entry_name"});
4064 auto *Str = new llvm::GlobalVariable(
4065 M, StrPtrInit->getType(), /*isConstant=*/true,
4066 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4067 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4069 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4070 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4071 llvm::ConstantInt::get(CGM.SizeTy, Size),
4072 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4073 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4074 std::string EntryName = getName({"omp_offloading", "entry", ""});
4075 llvm::GlobalVariable *Entry = createGlobalStruct(
4076 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4077 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4079 // The entry has to be created in the section the linker expects it to be.
4080 std::string Section = getName({"omp_offloading", "entries"});
4081 Entry->setSection(Section);
4084 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
4085 // Emit the offloading entries and metadata so that the device codegen side
4086 // can easily figure out what to emit. The produced metadata looks like
4089 // !omp_offload.info = !{!1, ...}
4091 // Right now we only generate metadata for function that contain target
4094 // If we do not have entries, we don't need to do anything.
4095 if (OffloadEntriesInfoManager.empty())
4098 llvm::Module &M = CGM.getModule();
4099 llvm::LLVMContext &C = M.getContext();
4100 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16>
4101 OrderedEntries(OffloadEntriesInfoManager.size());
4102 llvm::SmallVector<StringRef, 16> ParentFunctions(
4103 OffloadEntriesInfoManager.size());
4105 // Auxiliary methods to create metadata values and strings.
4106 auto &&GetMDInt = [this](unsigned V) {
4107 return llvm::ConstantAsMetadata::get(
4108 llvm::ConstantInt::get(CGM.Int32Ty, V));
4111 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4113 // Create the offloading info metadata node.
4114 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4116 // Create function that emits metadata for each target region entry;
4117 auto &&TargetRegionMetadataEmitter =
4118 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4119 unsigned DeviceID, unsigned FileID, StringRef ParentName,
4121 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
4122 // Generate metadata for target regions. Each entry of this metadata
4124 // - Entry 0 -> Kind of this type of metadata (0).
4125 // - Entry 1 -> Device ID of the file where the entry was identified.
4126 // - Entry 2 -> File ID of the file where the entry was identified.
4127 // - Entry 3 -> Mangled name of the function where the entry was
4129 // - Entry 4 -> Line in the file where the entry was identified.
4130 // - Entry 5 -> Order the entry was created.
4131 // The first element of the metadata node is the kind.
4132 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4133 GetMDInt(FileID), GetMDString(ParentName),
4134 GetMDInt(Line), GetMDInt(E.getOrder())};
4136 // Save this entry in the right position of the ordered entries array.
4137 OrderedEntries[E.getOrder()] = &E;
4138 ParentFunctions[E.getOrder()] = ParentName;
4140 // Add metadata to the named metadata node.
4141 MD->addOperand(llvm::MDNode::get(C, Ops));
4144 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
4145 TargetRegionMetadataEmitter);
4147 // Create function that emits metadata for each device global variable entry;
4148 auto &&DeviceGlobalVarMetadataEmitter =
4149 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4150 MD](StringRef MangledName,
4151 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
4153 // Generate metadata for global variables. Each entry of this metadata
4155 // - Entry 0 -> Kind of this type of metadata (1).
4156 // - Entry 1 -> Mangled name of the variable.
4157 // - Entry 2 -> Declare target kind.
4158 // - Entry 3 -> Order the entry was created.
4159 // The first element of the metadata node is the kind.
4160 llvm::Metadata *Ops[] = {
4161 GetMDInt(E.getKind()), GetMDString(MangledName),
4162 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4164 // Save this entry in the right position of the ordered entries array.
4165 OrderedEntries[E.getOrder()] = &E;
4167 // Add metadata to the named metadata node.
4168 MD->addOperand(llvm::MDNode::get(C, Ops));
4171 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
4172 DeviceGlobalVarMetadataEmitter);
4174 for (const auto *E : OrderedEntries) {
4175 assert(E && "All ordered entries must exist!");
4176 if (const auto *CE =
4177 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4179 if (!CE->getID() || !CE->getAddress()) {
4180 // Do not blame the entry if the parent funtion is not emitted.
4181 StringRef FnName = ParentFunctions[CE->getOrder()];
4182 if (!CGM.GetGlobalValue(FnName))
4184 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4185 DiagnosticsEngine::Error,
4186 "Offloading entry for target region is incorrect: either the "
4187 "address or the ID is invalid.");
4188 CGM.getDiags().Report(DiagID);
4191 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4192 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4193 } else if (const auto *CE =
4194 dyn_cast<OffloadEntriesInfoManagerTy::
4195 OffloadEntryInfoDeviceGlobalVar>(E)) {
4196 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
4197 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4200 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
4201 if (CGM.getLangOpts().OpenMPIsDevice &&
4202 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4204 if (!CE->getAddress()) {
4205 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4206 DiagnosticsEngine::Error,
4207 "Offloading entry for declare target variable is incorrect: the "
4208 "address is invalid.");
4209 CGM.getDiags().Report(DiagID);
4212 // The vaiable has no definition - no need to add the entry.
4213 if (CE->getVarSize().isZero())
4217 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
4218 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4219 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4220 "Declaret target link address is set.");
4221 if (CGM.getLangOpts().OpenMPIsDevice)
4223 if (!CE->getAddress()) {
4224 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4225 DiagnosticsEngine::Error,
4226 "Offloading entry for declare target variable is incorrect: the "
4227 "address is invalid.");
4228 CGM.getDiags().Report(DiagID);
4233 createOffloadEntry(CE->getAddress(), CE->getAddress(),
4234 CE->getVarSize().getQuantity(), Flags,
4237 llvm_unreachable("Unsupported entry kind.");
4242 /// Loads all the offload entries information from the host IR
4244 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
4245 // If we are in target mode, load the metadata from the host IR. This code has
4246 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4248 if (!CGM.getLangOpts().OpenMPIsDevice)
4251 if (CGM.getLangOpts().OMPHostIRFile.empty())
4254 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4255 if (auto EC = Buf.getError()) {
4256 CGM.getDiags().Report(diag::err_cannot_open_file)
4257 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4261 llvm::LLVMContext C;
4262 auto ME = expectedToErrorOrAndEmitErrors(
4263 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4265 if (auto EC = ME.getError()) {
4266 unsigned DiagID = CGM.getDiags().getCustomDiagID(
4267 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4268 CGM.getDiags().Report(DiagID)
4269 << CGM.getLangOpts().OMPHostIRFile << EC.message();
4273 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4277 for (llvm::MDNode *MN : MD->operands()) {
4278 auto &&GetMDInt = [MN](unsigned Idx) {
4279 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4280 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4283 auto &&GetMDString = [MN](unsigned Idx) {
4284 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4285 return V->getString();
4288 switch (GetMDInt(0)) {
4290 llvm_unreachable("Unexpected metadata!");
4292 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4293 OffloadingEntryInfoTargetRegion:
4294 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
4295 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4296 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4297 /*Order=*/GetMDInt(5));
4299 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
4300 OffloadingEntryInfoDeviceGlobalVar:
4301 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
4302 /*MangledName=*/GetMDString(1),
4303 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4304 /*Flags=*/GetMDInt(2)),
4305 /*Order=*/GetMDInt(3));
4311 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
4312 if (!KmpRoutineEntryPtrTy) {
4313 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4314 ASTContext &C = CGM.getContext();
4315 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4316 FunctionProtoType::ExtProtoInfo EPI;
4317 KmpRoutineEntryPtrQTy = C.getPointerType(
4318 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4319 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4323 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
4324 // Make sure the type of the entry is already created. This is the type we
4326 // struct __tgt_offload_entry{
4327 // void *addr; // Pointer to the offload entry info.
4328 // // (function or global)
4329 // char *name; // Name of the function or global.
4330 // size_t size; // Size of the entry info (0 if it a function).
4331 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4332 // int32_t reserved; // Reserved, to use by the runtime library.
4334 if (TgtOffloadEntryQTy.isNull()) {
4335 ASTContext &C = CGM.getContext();
4336 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4337 RD->startDefinition();
4338 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4339 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
4340 addFieldToRecordDecl(C, RD, C.getSizeType());
4341 addFieldToRecordDecl(
4342 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4343 addFieldToRecordDecl(
4344 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4345 RD->completeDefinition();
4346 RD->addAttr(PackedAttr::CreateImplicit(C));
4347 TgtOffloadEntryQTy = C.getRecordType(RD);
4349 return TgtOffloadEntryQTy;
4352 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() {
4353 // These are the types we need to build:
4354 // struct __tgt_device_image{
4355 // void *ImageStart; // Pointer to the target code start.
4356 // void *ImageEnd; // Pointer to the target code end.
4357 // // We also add the host entries to the device image, as it may be useful
4358 // // for the target runtime to have access to that information.
4359 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4361 // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4362 // // entries (non inclusive).
4364 if (TgtDeviceImageQTy.isNull()) {
4365 ASTContext &C = CGM.getContext();
4366 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4367 RD->startDefinition();
4368 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4369 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4370 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4371 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4372 RD->completeDefinition();
4373 TgtDeviceImageQTy = C.getRecordType(RD);
4375 return TgtDeviceImageQTy;
4378 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() {
4379 // struct __tgt_bin_desc{
4380 // int32_t NumDevices; // Number of devices supported.
4381 // __tgt_device_image *DeviceImages; // Arrays of device images
4382 // // (one per device).
4383 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4385 // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4386 // // entries (non inclusive).
4388 if (TgtBinaryDescriptorQTy.isNull()) {
4389 ASTContext &C = CGM.getContext();
4390 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4391 RD->startDefinition();
4392 addFieldToRecordDecl(
4393 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4394 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy()));
4395 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4396 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy()));
4397 RD->completeDefinition();
4398 TgtBinaryDescriptorQTy = C.getRecordType(RD);
4400 return TgtBinaryDescriptorQTy;
4404 struct PrivateHelpersTy {
4405 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4406 const VarDecl *PrivateElemInit)
4407 : Original(Original), PrivateCopy(PrivateCopy),
4408 PrivateElemInit(PrivateElemInit) {}
4409 const VarDecl *Original;
4410 const VarDecl *PrivateCopy;
4411 const VarDecl *PrivateElemInit;
4413 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4414 } // anonymous namespace
4417 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
4418 if (!Privates.empty()) {
4419 ASTContext &C = CGM.getContext();
4420 // Build struct .kmp_privates_t. {
4421 // /* private vars */
4423 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4424 RD->startDefinition();
4425 for (const auto &Pair : Privates) {
4426 const VarDecl *VD = Pair.second.Original;
4427 QualType Type = VD->getType().getNonReferenceType();
4428 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4429 if (VD->hasAttrs()) {
4430 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4431 E(VD->getAttrs().end());
4436 RD->completeDefinition();
4443 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
4444 QualType KmpInt32Ty,
4445 QualType KmpRoutineEntryPointerQTy) {
4446 ASTContext &C = CGM.getContext();
4447 // Build struct kmp_task_t {
4449 // kmp_routine_entry_t routine;
4450 // kmp_int32 part_id;
4451 // kmp_cmplrdata_t data1;
4452 // kmp_cmplrdata_t data2;
4453 // For taskloops additional fields:
4458 // void * reductions;
4460 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4461 UD->startDefinition();
4462 addFieldToRecordDecl(C, UD, KmpInt32Ty);
4463 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4464 UD->completeDefinition();
4465 QualType KmpCmplrdataTy = C.getRecordType(UD);
4466 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4467 RD->startDefinition();
4468 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4469 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4470 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4471 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4472 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4473 if (isOpenMPTaskLoopDirective(Kind)) {
4474 QualType KmpUInt64Ty =
4475 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4476 QualType KmpInt64Ty =
4477 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4478 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4479 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4480 addFieldToRecordDecl(C, RD, KmpInt64Ty);
4481 addFieldToRecordDecl(C, RD, KmpInt32Ty);
4482 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4484 RD->completeDefinition();
4489 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
4490 ArrayRef<PrivateDataTy> Privates) {
4491 ASTContext &C = CGM.getContext();
4492 // Build struct kmp_task_t_with_privates {
4493 // kmp_task_t task_data;
4494 // .kmp_privates_t. privates;
4496 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4497 RD->startDefinition();
4498 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4499 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4500 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4501 RD->completeDefinition();
4505 /// Emit a proxy function which accepts kmp_task_t as the second
4508 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4509 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4511 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4512 /// tt->reductions, tt->shareds);
4516 static llvm::Function *
4517 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
4518 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4519 QualType KmpTaskTWithPrivatesPtrQTy,
4520 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4521 QualType SharedsPtrTy, llvm::Function *TaskFunction,
4522 llvm::Value *TaskPrivatesMap) {
4523 ASTContext &C = CGM.getContext();
4524 FunctionArgList Args;
4525 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4526 ImplicitParamDecl::Other);
4527 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4528 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4529 ImplicitParamDecl::Other);
4530 Args.push_back(&GtidArg);
4531 Args.push_back(&TaskTypeArg);
4532 const auto &TaskEntryFnInfo =
4533 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4534 llvm::FunctionType *TaskEntryTy =
4535 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4536 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4537 auto *TaskEntry = llvm::Function::Create(
4538 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4539 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4540 TaskEntry->setDoesNotRecurse();
4541 CodeGenFunction CGF(CGM);
4542 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4545 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4548 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4549 // tt->task_data.shareds);
4550 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4551 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4552 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4553 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4554 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4555 const auto *KmpTaskTWithPrivatesQTyRD =
4556 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4558 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4559 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4560 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4562 llvm::Value *PartidParam = PartIdLVal.getPointer();
4564 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4565 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4566 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4567 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4568 CGF.ConvertTypeForMem(SharedsPtrTy));
4570 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4571 llvm::Value *PrivatesParam;
4572 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4573 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4574 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575 PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4577 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4580 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4583 .CreatePointerBitCastOrAddrSpaceCast(
4584 TDBase.getAddress(), CGF.VoidPtrTy)
4586 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4587 std::end(CommonArgs));
4588 if (isOpenMPTaskLoopDirective(Kind)) {
4589 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4590 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4591 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4592 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4593 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4594 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4595 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4596 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4597 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4598 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4599 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4600 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4601 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4602 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4603 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4604 CallArgs.push_back(LBParam);
4605 CallArgs.push_back(UBParam);
4606 CallArgs.push_back(StParam);
4607 CallArgs.push_back(LIParam);
4608 CallArgs.push_back(RParam);
4610 CallArgs.push_back(SharedsParam);
4612 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4614 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4615 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4616 CGF.FinishFunction();
4620 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
4622 QualType KmpInt32Ty,
4623 QualType KmpTaskTWithPrivatesPtrQTy,
4624 QualType KmpTaskTWithPrivatesQTy) {
4625 ASTContext &C = CGM.getContext();
4626 FunctionArgList Args;
4627 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4628 ImplicitParamDecl::Other);
4629 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4630 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4631 ImplicitParamDecl::Other);
4632 Args.push_back(&GtidArg);
4633 Args.push_back(&TaskTypeArg);
4634 const auto &DestructorFnInfo =
4635 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4636 llvm::FunctionType *DestructorFnTy =
4637 CGM.getTypes().GetFunctionType(DestructorFnInfo);
4639 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4640 auto *DestructorFn =
4641 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
4642 Name, &CGM.getModule());
4643 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4645 DestructorFn->setDoesNotRecurse();
4646 CodeGenFunction CGF(CGM);
4647 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4650 LValue Base = CGF.EmitLoadOfPointerLValue(
4651 CGF.GetAddrOfLocalVar(&TaskTypeArg),
4652 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4653 const auto *KmpTaskTWithPrivatesQTyRD =
4654 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4655 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4656 Base = CGF.EmitLValueForField(Base, *FI);
4657 for (const auto *Field :
4658 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4659 if (QualType::DestructionKind DtorKind =
4660 Field->getType().isDestructedType()) {
4661 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4662 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4665 CGF.FinishFunction();
4666 return DestructorFn;
4669 /// Emit a privates mapping function for correct handling of private and
4670 /// firstprivate variables.
4672 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4673 /// **noalias priv1,..., <tyn> **noalias privn) {
4674 /// *priv1 = &.privates.priv1;
4676 /// *privn = &.privates.privn;
4679 static llvm::Value *
4680 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
4681 ArrayRef<const Expr *> PrivateVars,
4682 ArrayRef<const Expr *> FirstprivateVars,
4683 ArrayRef<const Expr *> LastprivateVars,
4684 QualType PrivatesQTy,
4685 ArrayRef<PrivateDataTy> Privates) {
4686 ASTContext &C = CGM.getContext();
4687 FunctionArgList Args;
4688 ImplicitParamDecl TaskPrivatesArg(
4689 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4690 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4691 ImplicitParamDecl::Other);
4692 Args.push_back(&TaskPrivatesArg);
4693 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4694 unsigned Counter = 1;
4695 for (const Expr *E : PrivateVars) {
4696 Args.push_back(ImplicitParamDecl::Create(
4697 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4698 C.getPointerType(C.getPointerType(E->getType()))
4701 ImplicitParamDecl::Other));
4702 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4703 PrivateVarsPos[VD] = Counter;
4706 for (const Expr *E : FirstprivateVars) {
4707 Args.push_back(ImplicitParamDecl::Create(
4708 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4709 C.getPointerType(C.getPointerType(E->getType()))
4712 ImplicitParamDecl::Other));
4713 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4714 PrivateVarsPos[VD] = Counter;
4717 for (const Expr *E : LastprivateVars) {
4718 Args.push_back(ImplicitParamDecl::Create(
4719 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4720 C.getPointerType(C.getPointerType(E->getType()))
4723 ImplicitParamDecl::Other));
4724 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4725 PrivateVarsPos[VD] = Counter;
4728 const auto &TaskPrivatesMapFnInfo =
4729 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4730 llvm::FunctionType *TaskPrivatesMapTy =
4731 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4733 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4734 auto *TaskPrivatesMap = llvm::Function::Create(
4735 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4737 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4738 TaskPrivatesMapFnInfo);
4739 if (CGM.getLangOpts().Optimize) {
4740 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4741 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4742 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4744 CodeGenFunction CGF(CGM);
4745 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4746 TaskPrivatesMapFnInfo, Args, Loc, Loc);
4748 // *privi = &.privates.privi;
4749 LValue Base = CGF.EmitLoadOfPointerLValue(
4750 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4751 TaskPrivatesArg.getType()->castAs<PointerType>());
4752 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4754 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4755 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4756 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4758 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4759 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4760 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4761 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4764 CGF.FinishFunction();
4765 return TaskPrivatesMap;
4768 /// Emit initialization for private variables in task-based directives.
4769 static void emitPrivatesInit(CodeGenFunction &CGF,
4770 const OMPExecutableDirective &D,
4771 Address KmpTaskSharedsPtr, LValue TDBase,
4772 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4773 QualType SharedsTy, QualType SharedsPtrTy,
4774 const OMPTaskDataTy &Data,
4775 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4776 ASTContext &C = CGF.getContext();
4777 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4778 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4779 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
4782 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4783 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4786 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
4787 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
4788 // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4789 // PointersArray and SizesArray. The original variables for these arrays are
4790 // not captured and we get their addresses explicitly.
4791 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4792 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4793 SrcBase = CGF.MakeAddrLValue(
4794 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4795 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4798 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4799 for (const PrivateDataTy &Pair : Privates) {
4800 const VarDecl *VD = Pair.second.PrivateCopy;
4801 const Expr *Init = VD->getAnyInitializer();
4802 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4803 !CGF.isTrivialInitializer(Init)))) {
4804 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4805 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4806 const VarDecl *OriginalVD = Pair.second.Original;
4807 // Check if the variable is the target-based BasePointersArray,
4808 // PointersArray or SizesArray.
4809 LValue SharedRefLValue;
4810 QualType Type = PrivateLValue.getType();
4811 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4812 if (IsTargetTask && !SharedField) {
4813 assert(isa<ImplicitParamDecl>(OriginalVD) &&
4814 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4815 cast<CapturedDecl>(OriginalVD->getDeclContext())
4816 ->getNumParams() == 0 &&
4817 isa<TranslationUnitDecl>(
4818 cast<CapturedDecl>(OriginalVD->getDeclContext())
4819 ->getDeclContext()) &&
4820 "Expected artificial target data variable.");
4822 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4824 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4825 SharedRefLValue = CGF.MakeAddrLValue(
4826 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4827 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4828 SharedRefLValue.getTBAAInfo());
4830 if (Type->isArrayType()) {
4831 // Initialize firstprivate array.
4832 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4833 // Perform simple memcpy.
4834 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4836 // Initialize firstprivate array using element-by-element
4838 CGF.EmitOMPAggregateAssign(
4839 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4840 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4841 Address SrcElement) {
4842 // Clean up any temporaries needed by the initialization.
4843 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4844 InitScope.addPrivate(
4845 Elem, [SrcElement]() -> Address { return SrcElement; });
4846 (void)InitScope.Privatize();
4847 // Emit initialization for single element.
4848 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
4849 CGF, &CapturesInfo);
4850 CGF.EmitAnyExprToMem(Init, DestElement,
4851 Init->getType().getQualifiers(),
4852 /*IsInitializer=*/false);
4856 CodeGenFunction::OMPPrivateScope InitScope(CGF);
4857 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4858 return SharedRefLValue.getAddress();
4860 (void)InitScope.Privatize();
4861 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4862 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4863 /*capturedByInit=*/false);
4866 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4873 /// Check if duplication function is required for taskloops.
4874 static bool checkInitIsRequired(CodeGenFunction &CGF,
4875 ArrayRef<PrivateDataTy> Privates) {
4876 bool InitRequired = false;
4877 for (const PrivateDataTy &Pair : Privates) {
4878 const VarDecl *VD = Pair.second.PrivateCopy;
4879 const Expr *Init = VD->getAnyInitializer();
4880 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4881 !CGF.isTrivialInitializer(Init));
4885 return InitRequired;
4889 /// Emit task_dup function (for initialization of
4890 /// private/firstprivate/lastprivate vars and last_iter flag)
4892 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4894 /// // setup lastprivate flag
4895 /// task_dst->last = lastpriv;
4896 /// // could be constructor calls here...
4899 static llvm::Value *
4900 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4901 const OMPExecutableDirective &D,
4902 QualType KmpTaskTWithPrivatesPtrQTy,
4903 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4904 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4905 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4906 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4907 ASTContext &C = CGM.getContext();
4908 FunctionArgList Args;
4909 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4910 KmpTaskTWithPrivatesPtrQTy,
4911 ImplicitParamDecl::Other);
4912 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4913 KmpTaskTWithPrivatesPtrQTy,
4914 ImplicitParamDecl::Other);
4915 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4916 ImplicitParamDecl::Other);
4917 Args.push_back(&DstArg);
4918 Args.push_back(&SrcArg);
4919 Args.push_back(&LastprivArg);
4920 const auto &TaskDupFnInfo =
4921 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4922 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4923 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4924 auto *TaskDup = llvm::Function::Create(
4925 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4926 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4927 TaskDup->setDoesNotRecurse();
4928 CodeGenFunction CGF(CGM);
4929 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4932 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4933 CGF.GetAddrOfLocalVar(&DstArg),
4934 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4935 // task_dst->liter = lastpriv;
4937 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4938 LValue Base = CGF.EmitLValueForField(
4939 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4940 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4941 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4942 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4943 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4946 // Emit initial values for private copies (if any).
4947 assert(!Privates.empty());
4948 Address KmpTaskSharedsPtr = Address::invalid();
4949 if (!Data.FirstprivateVars.empty()) {
4950 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4951 CGF.GetAddrOfLocalVar(&SrcArg),
4952 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4953 LValue Base = CGF.EmitLValueForField(
4954 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4955 KmpTaskSharedsPtr = Address(
4956 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4957 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4960 CGF.getNaturalTypeAlignment(SharedsTy));
4962 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4963 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4964 CGF.FinishFunction();
4968 /// Checks if destructor function is required to be generated.
4969 /// \return true if cleanups are required, false otherwise.
4971 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4972 bool NeedsCleanup = false;
4973 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4974 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4975 for (const FieldDecl *FD : PrivateRD->fields()) {
4976 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4980 return NeedsCleanup;
4983 CGOpenMPRuntime::TaskResultTy
4984 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4985 const OMPExecutableDirective &D,
4986 llvm::Function *TaskFunction, QualType SharedsTy,
4987 Address Shareds, const OMPTaskDataTy &Data) {
4988 ASTContext &C = CGM.getContext();
4989 llvm::SmallVector<PrivateDataTy, 4> Privates;
4990 // Aggregate privates and sort them by the alignment.
4991 auto I = Data.PrivateCopies.begin();
4992 for (const Expr *E : Data.PrivateVars) {
4993 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4994 Privates.emplace_back(
4996 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4997 /*PrivateElemInit=*/nullptr));
5000 I = Data.FirstprivateCopies.begin();
5001 auto IElemInitRef = Data.FirstprivateInits.begin();
5002 for (const Expr *E : Data.FirstprivateVars) {
5003 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5004 Privates.emplace_back(
5007 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5008 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5012 I = Data.LastprivateCopies.begin();
5013 for (const Expr *E : Data.LastprivateVars) {
5014 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5015 Privates.emplace_back(
5017 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5018 /*PrivateElemInit=*/nullptr));
5021 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5022 return L.first > R.first;
5024 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5025 // Build type kmp_routine_entry_t (if not built yet).
5026 emitKmpRoutineEntryT(KmpInt32Ty);
5027 // Build type kmp_task_t (if not built yet).
5028 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
5029 if (SavedKmpTaskloopTQTy.isNull()) {
5030 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5031 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5033 KmpTaskTQTy = SavedKmpTaskloopTQTy;
5035 assert((D.getDirectiveKind() == OMPD_task ||
5036 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
5037 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
5038 "Expected taskloop, task or target directive");
5039 if (SavedKmpTaskTQTy.isNull()) {
5040 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
5041 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5043 KmpTaskTQTy = SavedKmpTaskTQTy;
5045 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5046 // Build particular struct kmp_task_t for the given task.
5047 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5048 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
5049 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5050 QualType KmpTaskTWithPrivatesPtrQTy =
5051 C.getPointerType(KmpTaskTWithPrivatesQTy);
5052 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5053 llvm::Type *KmpTaskTWithPrivatesPtrTy =
5054 KmpTaskTWithPrivatesTy->getPointerTo();
5055 llvm::Value *KmpTaskTWithPrivatesTySize =
5056 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5057 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5059 // Emit initial values for private copies (if any).
5060 llvm::Value *TaskPrivatesMap = nullptr;
5061 llvm::Type *TaskPrivatesMapTy =
5062 std::next(TaskFunction->arg_begin(), 3)->getType();
5063 if (!Privates.empty()) {
5064 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5065 TaskPrivatesMap = emitTaskPrivateMappingFunction(
5066 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5067 FI->getType(), Privates);
5068 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5069 TaskPrivatesMap, TaskPrivatesMapTy);
5071 TaskPrivatesMap = llvm::ConstantPointerNull::get(
5072 cast<llvm::PointerType>(TaskPrivatesMapTy));
5074 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5076 llvm::Function *TaskEntry = emitProxyTaskFunction(
5077 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5078 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5081 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5082 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5083 // kmp_routine_entry_t *task_entry);
5084 // Task flags. Format is taken from
5085 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5086 // description of kmp_tasking_flags struct.
5090 DestructorsFlag = 0x8,
5093 unsigned Flags = Data.Tied ? TiedFlag : 0;
5094 bool NeedsCleanup = false;
5095 if (!Privates.empty()) {
5096 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5098 Flags = Flags | DestructorsFlag;
5100 if (Data.Priority.getInt())
5101 Flags = Flags | PriorityFlag;
5102 llvm::Value *TaskFlags =
5103 Data.Final.getPointer()
5104 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5105 CGF.Builder.getInt32(FinalFlag),
5106 CGF.Builder.getInt32(/*C=*/0))
5107 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5108 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5109 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5110 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5111 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5112 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5113 TaskEntry, KmpRoutineEntryPtrTy)};
5114 llvm::Value *NewTask;
5115 if (D.hasClausesOfKind<OMPNowaitClause>()) {
5116 // Check if we have any device clause associated with the directive.
5117 const Expr *Device = nullptr;
5118 if (auto *C = D.getSingleClause<OMPDeviceClause>())
5119 Device = C->getDevice();
5120 // Emit device ID if any otherwise use default value.
5121 llvm::Value *DeviceID;
5123 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5124 CGF.Int64Ty, /*isSigned=*/true);
5126 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5127 AllocArgs.push_back(DeviceID);
5128 NewTask = CGF.EmitRuntimeCall(
5129 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs);
5131 NewTask = CGF.EmitRuntimeCall(
5132 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
5134 llvm::Value *NewTaskNewTaskTTy =
5135 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5136 NewTask, KmpTaskTWithPrivatesPtrTy);
5137 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5138 KmpTaskTWithPrivatesQTy);
5140 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5141 // Fill the data in the resulting kmp_task_t record.
5142 // Copy shareds if there are any.
5143 Address KmpTaskSharedsPtr = Address::invalid();
5144 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5146 Address(CGF.EmitLoadOfScalar(
5147 CGF.EmitLValueForField(
5148 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5151 CGF.getNaturalTypeAlignment(SharedsTy));
5152 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5153 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5154 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5156 // Emit initial values for private copies (if any).
5157 TaskResultTy Result;
5158 if (!Privates.empty()) {
5159 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5160 SharedsTy, SharedsPtrTy, Data, Privates,
5162 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
5163 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5164 Result.TaskDupFn = emitTaskDupFunction(
5165 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5166 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5167 /*WithLastIter=*/!Data.LastprivateVars.empty());
5170 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5171 enum { Priority = 0, Destructors = 1 };
5172 // Provide pointer to function with destructors for privates.
5173 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5174 const RecordDecl *KmpCmplrdataUD =
5175 (*FI)->getType()->getAsUnionType()->getDecl();
5177 llvm::Value *DestructorFn = emitDestructorsFunction(
5178 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5179 KmpTaskTWithPrivatesQTy);
5180 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5181 LValue DestructorsLV = CGF.EmitLValueForField(
5182 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5183 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5184 DestructorFn, KmpRoutineEntryPtrTy),
5188 if (Data.Priority.getInt()) {
5189 LValue Data2LV = CGF.EmitLValueForField(
5190 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5191 LValue PriorityLV = CGF.EmitLValueForField(
5192 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5193 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5195 Result.NewTask = NewTask;
5196 Result.TaskEntry = TaskEntry;
5197 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5198 Result.TDBase = TDBase;
5199 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5203 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5204 const OMPExecutableDirective &D,
5205 llvm::Function *TaskFunction,
5206 QualType SharedsTy, Address Shareds,
5208 const OMPTaskDataTy &Data) {
5209 if (!CGF.HaveInsertPoint())
5212 TaskResultTy Result =
5213 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5214 llvm::Value *NewTask = Result.NewTask;
5215 llvm::Function *TaskEntry = Result.TaskEntry;
5216 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5217 LValue TDBase = Result.TDBase;
5218 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5219 ASTContext &C = CGM.getContext();
5220 // Process list of dependences.
5221 Address DependenciesArray = Address::invalid();
5222 unsigned NumDependencies = Data.Dependences.size();
5223 if (NumDependencies) {
5224 // Dependence kind for RTL.
5225 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5226 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5227 RecordDecl *KmpDependInfoRD;
5229 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5230 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5231 if (KmpDependInfoTy.isNull()) {
5232 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5233 KmpDependInfoRD->startDefinition();
5234 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5235 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5236 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5237 KmpDependInfoRD->completeDefinition();
5238 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5240 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5242 // Define type kmp_depend_info[<Dependences.size()>];
5243 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5244 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5245 ArrayType::Normal, /*IndexTypeQuals=*/0);
5246 // kmp_depend_info[<Dependences.size()>] deps;
5248 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5249 for (unsigned I = 0; I < NumDependencies; ++I) {
5250 const Expr *E = Data.Dependences[I].second;
5251 LValue Addr = CGF.EmitLValue(E);
5253 QualType Ty = E->getType();
5254 if (const auto *ASE =
5255 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5257 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5258 llvm::Value *UpAddr =
5259 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5260 llvm::Value *LowIntPtr =
5261 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5262 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5263 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5265 Size = CGF.getTypeSize(Ty);
5267 LValue Base = CGF.MakeAddrLValue(
5268 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5270 // deps[i].base_addr = &<Dependences[i].second>;
5271 LValue BaseAddrLVal = CGF.EmitLValueForField(
5272 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5273 CGF.EmitStoreOfScalar(
5274 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5276 // deps[i].len = sizeof(<Dependences[i].second>);
5277 LValue LenLVal = CGF.EmitLValueForField(
5278 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5279 CGF.EmitStoreOfScalar(Size, LenLVal);
5280 // deps[i].flags = <Dependences[i].first>;
5281 RTLDependenceKindTy DepKind;
5282 switch (Data.Dependences[I].first) {
5283 case OMPC_DEPEND_in:
5286 // Out and InOut dependencies must use the same code.
5287 case OMPC_DEPEND_out:
5288 case OMPC_DEPEND_inout:
5291 case OMPC_DEPEND_mutexinoutset:
5292 DepKind = DepMutexInOutSet;
5294 case OMPC_DEPEND_source:
5295 case OMPC_DEPEND_sink:
5296 case OMPC_DEPEND_unknown:
5297 llvm_unreachable("Unknown task dependence type");
5299 LValue FlagsLVal = CGF.EmitLValueForField(
5300 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5301 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5304 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5308 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5310 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5311 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5312 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5313 // list is not empty
5314 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5315 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5316 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5317 llvm::Value *DepTaskArgs[7];
5318 if (NumDependencies) {
5319 DepTaskArgs[0] = UpLoc;
5320 DepTaskArgs[1] = ThreadID;
5321 DepTaskArgs[2] = NewTask;
5322 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5323 DepTaskArgs[4] = DependenciesArray.getPointer();
5324 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5325 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5327 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5329 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5331 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5332 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5333 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5335 if (NumDependencies) {
5336 CGF.EmitRuntimeCall(
5337 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
5339 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5342 // Check if parent region is untied and build return for untied task;
5344 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5345 Region->emitUntiedSwitch(CGF);
5348 llvm::Value *DepWaitTaskArgs[6];
5349 if (NumDependencies) {
5350 DepWaitTaskArgs[0] = UpLoc;
5351 DepWaitTaskArgs[1] = ThreadID;
5352 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5353 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5354 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5355 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5357 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5358 NumDependencies, &DepWaitTaskArgs,
5359 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5360 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5361 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5362 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5363 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5364 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5366 if (NumDependencies)
5367 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5369 // Call proxy_task_entry(gtid, new_task);
5370 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5371 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5373 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5374 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5378 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5379 // kmp_task_t *new_task);
5380 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5381 // kmp_task_t *new_task);
5382 RegionCodeGenTy RCG(CodeGen);
5383 CommonActionTy Action(
5384 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5385 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5386 RCG.setAction(Action);
5391 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5393 RegionCodeGenTy ThenRCG(ThenCodeGen);
5398 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5399 const OMPLoopDirective &D,
5400 llvm::Function *TaskFunction,
5401 QualType SharedsTy, Address Shareds,
5403 const OMPTaskDataTy &Data) {
5404 if (!CGF.HaveInsertPoint())
5406 TaskResultTy Result =
5407 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5408 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5410 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5411 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5412 // sched, kmp_uint64 grainsize, void *task_dup);
5413 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5414 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5417 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5420 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5423 LValue LBLVal = CGF.EmitLValueForField(
5425 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5427 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5428 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5429 /*IsInitializer=*/true);
5430 LValue UBLVal = CGF.EmitLValueForField(
5432 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5434 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5435 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5436 /*IsInitializer=*/true);
5437 LValue StLVal = CGF.EmitLValueForField(
5439 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5441 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5442 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5443 /*IsInitializer=*/true);
5444 // Store reductions address.
5445 LValue RedLVal = CGF.EmitLValueForField(
5447 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5448 if (Data.Reductions) {
5449 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5451 CGF.EmitNullInitialization(RedLVal.getAddress(),
5452 CGF.getContext().VoidPtrTy);
5454 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5455 llvm::Value *TaskArgs[] = {
5460 LBLVal.getPointer(),
5461 UBLVal.getPointer(),
5462 CGF.EmitLoadOfScalar(StLVal, Loc),
5463 llvm::ConstantInt::getSigned(
5464 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5465 llvm::ConstantInt::getSigned(
5466 CGF.IntTy, Data.Schedule.getPointer()
5467 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5469 Data.Schedule.getPointer()
5470 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5472 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5473 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5474 Result.TaskDupFn, CGF.VoidPtrTy)
5475 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5476 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
5479 /// Emit reduction operation for each element of array (required for
5480 /// array sections) LHS op = RHS.
5481 /// \param Type Type of array.
5482 /// \param LHSVar Variable on the left side of the reduction operation
5483 /// (references element of array in original variable).
5484 /// \param RHSVar Variable on the right side of the reduction operation
5485 /// (references element of array in original variable).
5486 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5488 static void EmitOMPAggregateReduction(
5489 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5490 const VarDecl *RHSVar,
5491 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5492 const Expr *, const Expr *)> &RedOpGen,
5493 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5494 const Expr *UpExpr = nullptr) {
5495 // Perform element-by-element initialization.
5497 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5498 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5500 // Drill down to the base element type on both arrays.
5501 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5502 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5504 llvm::Value *RHSBegin = RHSAddr.getPointer();
5505 llvm::Value *LHSBegin = LHSAddr.getPointer();
5506 // Cast from pointer to array type to pointer to single element.
5507 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5508 // The basic structure here is a while-do loop.
5509 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5510 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5511 llvm::Value *IsEmpty =
5512 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5513 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5515 // Enter the loop body, making that address the current address.
5516 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5517 CGF.EmitBlock(BodyBB);
5519 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5521 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5522 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5523 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5524 Address RHSElementCurrent =
5525 Address(RHSElementPHI,
5526 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5528 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5529 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5530 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5531 Address LHSElementCurrent =
5532 Address(LHSElementPHI,
5533 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5536 CodeGenFunction::OMPPrivateScope Scope(CGF);
5537 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5538 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5540 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5541 Scope.ForceCleanup();
5543 // Shift the address forward by one element.
5544 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5545 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5546 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5547 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5548 // Check whether we've reached the end.
5550 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5551 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5552 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5553 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5556 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5559 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5560 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5561 /// UDR combiner function.
5562 static void emitReductionCombiner(CodeGenFunction &CGF,
5563 const Expr *ReductionOp) {
5564 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5565 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5566 if (const auto *DRE =
5567 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5568 if (const auto *DRD =
5569 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5570 std::pair<llvm::Function *, llvm::Function *> Reduction =
5571 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5572 RValue Func = RValue::get(Reduction.first);
5573 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5574 CGF.EmitIgnoredExpr(ReductionOp);
5577 CGF.EmitIgnoredExpr(ReductionOp);
5580 llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5581 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5582 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5583 ArrayRef<const Expr *> ReductionOps) {
5584 ASTContext &C = CGM.getContext();
5586 // void reduction_func(void *LHSArg, void *RHSArg);
5587 FunctionArgList Args;
5588 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5589 ImplicitParamDecl::Other);
5590 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5591 ImplicitParamDecl::Other);
5592 Args.push_back(&LHSArg);
5593 Args.push_back(&RHSArg);
5595 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5596 std::string Name = getName({"omp", "reduction", "reduction_func"});
5597 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5598 llvm::GlobalValue::InternalLinkage, Name,
5600 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5601 Fn->setDoesNotRecurse();
5602 CodeGenFunction CGF(CGM);
5603 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5605 // Dst = (void*[n])(LHSArg);
5606 // Src = (void*[n])(RHSArg);
5607 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5608 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5609 ArgsType), CGF.getPointerAlign());
5610 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5611 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5612 ArgsType), CGF.getPointerAlign());
5615 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5617 CodeGenFunction::OMPPrivateScope Scope(CGF);
5618 auto IPriv = Privates.begin();
5620 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5621 const auto *RHSVar =
5622 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5623 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5624 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5626 const auto *LHSVar =
5627 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5628 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5629 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5631 QualType PrivTy = (*IPriv)->getType();
5632 if (PrivTy->isVariablyModifiedType()) {
5633 // Get array size and emit VLA type.
5635 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5636 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5637 const VariableArrayType *VLA =
5638 CGF.getContext().getAsVariableArrayType(PrivTy);
5639 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5640 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5641 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5642 CGF.EmitVariablyModifiedType(PrivTy);
5646 IPriv = Privates.begin();
5647 auto ILHS = LHSExprs.begin();
5648 auto IRHS = RHSExprs.begin();
5649 for (const Expr *E : ReductionOps) {
5650 if ((*IPriv)->getType()->isArrayType()) {
5651 // Emit reduction for array section.
5652 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5653 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5654 EmitOMPAggregateReduction(
5655 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5656 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5657 emitReductionCombiner(CGF, E);
5660 // Emit reduction for array subscript or single variable.
5661 emitReductionCombiner(CGF, E);
5667 Scope.ForceCleanup();
5668 CGF.FinishFunction();
5672 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5673 const Expr *ReductionOp,
5674 const Expr *PrivateRef,
5675 const DeclRefExpr *LHS,
5676 const DeclRefExpr *RHS) {
5677 if (PrivateRef->getType()->isArrayType()) {
5678 // Emit reduction for array section.
5679 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5680 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5681 EmitOMPAggregateReduction(
5682 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5683 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5684 emitReductionCombiner(CGF, ReductionOp);
5687 // Emit reduction for array subscript or single variable.
5688 emitReductionCombiner(CGF, ReductionOp);
5692 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5693 ArrayRef<const Expr *> Privates,
5694 ArrayRef<const Expr *> LHSExprs,
5695 ArrayRef<const Expr *> RHSExprs,
5696 ArrayRef<const Expr *> ReductionOps,
5697 ReductionOptionsTy Options) {
5698 if (!CGF.HaveInsertPoint())
5701 bool WithNowait = Options.WithNowait;
5702 bool SimpleReduction = Options.SimpleReduction;
5704 // Next code should be emitted for reduction:
5706 // static kmp_critical_name lock = { 0 };
5708 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5709 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5711 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5712 // *(Type<n>-1*)rhs[<n>-1]);
5716 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5717 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5718 // RedList, reduce_func, &<lock>)) {
5721 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5723 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5727 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5729 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5734 // if SimpleReduction is true, only the next code is generated:
5736 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5739 ASTContext &C = CGM.getContext();
5741 if (SimpleReduction) {
5742 CodeGenFunction::RunCleanupsScope Scope(CGF);
5743 auto IPriv = Privates.begin();
5744 auto ILHS = LHSExprs.begin();
5745 auto IRHS = RHSExprs.begin();
5746 for (const Expr *E : ReductionOps) {
5747 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5748 cast<DeclRefExpr>(*IRHS));
5756 // 1. Build a list of reduction variables.
5757 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5758 auto Size = RHSExprs.size();
5759 for (const Expr *E : Privates) {
5760 if (E->getType()->isVariablyModifiedType())
5761 // Reserve place for array size.
5764 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5765 QualType ReductionArrayTy =
5766 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
5767 /*IndexTypeQuals=*/0);
5768 Address ReductionList =
5769 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5770 auto IPriv = Privates.begin();
5772 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5773 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5774 CGF.Builder.CreateStore(
5775 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5776 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5778 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5779 // Store array size.
5781 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5782 llvm::Value *Size = CGF.Builder.CreateIntCast(
5784 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5786 CGF.SizeTy, /*isSigned=*/false);
5787 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5792 // 2. Emit reduce_func().
5793 llvm::Function *ReductionFn = emitReductionFunction(
5794 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5795 LHSExprs, RHSExprs, ReductionOps);
5797 // 3. Create static kmp_critical_name lock = { 0 };
5798 std::string Name = getName({"reduction"});
5799 llvm::Value *Lock = getCriticalRegionLock(Name);
5801 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5802 // RedList, reduce_func, &<lock>);
5803 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5804 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5805 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5806 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5807 ReductionList.getPointer(), CGF.VoidPtrTy);
5808 llvm::Value *Args[] = {
5809 IdentTLoc, // ident_t *<loc>
5810 ThreadId, // i32 <gtid>
5811 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5812 ReductionArrayTySize, // size_type sizeof(RedList)
5813 RL, // void *RedList
5814 ReductionFn, // void (*) (void *, void *) <reduce_func>
5815 Lock // kmp_critical_name *&<lock>
5817 llvm::Value *Res = CGF.EmitRuntimeCall(
5818 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
5819 : OMPRTL__kmpc_reduce),
5822 // 5. Build switch(res)
5823 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5824 llvm::SwitchInst *SwInst =
5825 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5829 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5831 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5833 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5834 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5835 CGF.EmitBlock(Case1BB);
5837 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5838 llvm::Value *EndArgs[] = {
5839 IdentTLoc, // ident_t *<loc>
5840 ThreadId, // i32 <gtid>
5841 Lock // kmp_critical_name *&<lock>
5843 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5844 CodeGenFunction &CGF, PrePostActionTy &Action) {
5845 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5846 auto IPriv = Privates.begin();
5847 auto ILHS = LHSExprs.begin();
5848 auto IRHS = RHSExprs.begin();
5849 for (const Expr *E : ReductionOps) {
5850 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5851 cast<DeclRefExpr>(*IRHS));
5857 RegionCodeGenTy RCG(CodeGen);
5858 CommonActionTy Action(
5859 nullptr, llvm::None,
5860 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
5861 : OMPRTL__kmpc_end_reduce),
5863 RCG.setAction(Action);
5866 CGF.EmitBranch(DefaultBB);
5870 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5873 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5874 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5875 CGF.EmitBlock(Case2BB);
5877 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5878 CodeGenFunction &CGF, PrePostActionTy &Action) {
5879 auto ILHS = LHSExprs.begin();
5880 auto IRHS = RHSExprs.begin();
5881 auto IPriv = Privates.begin();
5882 for (const Expr *E : ReductionOps) {
5883 const Expr *XExpr = nullptr;
5884 const Expr *EExpr = nullptr;
5885 const Expr *UpExpr = nullptr;
5886 BinaryOperatorKind BO = BO_Comma;
5887 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5888 if (BO->getOpcode() == BO_Assign) {
5889 XExpr = BO->getLHS();
5890 UpExpr = BO->getRHS();
5893 // Try to emit update expression as a simple atomic.
5894 const Expr *RHSExpr = UpExpr;
5896 // Analyze RHS part of the whole expression.
5897 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5898 RHSExpr->IgnoreParenImpCasts())) {
5899 // If this is a conditional operator, analyze its condition for
5900 // min/max reduction operator.
5901 RHSExpr = ACO->getCond();
5903 if (const auto *BORHS =
5904 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5905 EExpr = BORHS->getRHS();
5906 BO = BORHS->getOpcode();
5910 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5911 auto &&AtomicRedGen = [BO, VD,
5912 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5913 const Expr *EExpr, const Expr *UpExpr) {
5914 LValue X = CGF.EmitLValue(XExpr);
5917 E = CGF.EmitAnyExpr(EExpr);
5918 CGF.EmitOMPAtomicSimpleUpdateExpr(
5919 X, E, BO, /*IsXLHSInRHSPart=*/true,
5920 llvm::AtomicOrdering::Monotonic, Loc,
5921 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5922 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5923 PrivateScope.addPrivate(
5924 VD, [&CGF, VD, XRValue, Loc]() {
5925 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5926 CGF.emitOMPSimpleStore(
5927 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5928 VD->getType().getNonReferenceType(), Loc);
5931 (void)PrivateScope.Privatize();
5932 return CGF.EmitAnyExpr(UpExpr);
5935 if ((*IPriv)->getType()->isArrayType()) {
5936 // Emit atomic reduction for array section.
5937 const auto *RHSVar =
5938 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5939 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5940 AtomicRedGen, XExpr, EExpr, UpExpr);
5942 // Emit atomic reduction for array subscript or single variable.
5943 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5946 // Emit as a critical region.
5947 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5948 const Expr *, const Expr *) {
5949 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5950 std::string Name = RT.getName({"atomic_reduction"});
5951 RT.emitCriticalRegion(
5953 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5955 emitReductionCombiner(CGF, E);
5959 if ((*IPriv)->getType()->isArrayType()) {
5960 const auto *LHSVar =
5961 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5962 const auto *RHSVar =
5963 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5964 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5967 CritRedGen(CGF, nullptr, nullptr, nullptr);
5975 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5977 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5978 llvm::Value *EndArgs[] = {
5979 IdentTLoc, // ident_t *<loc>
5980 ThreadId, // i32 <gtid>
5981 Lock // kmp_critical_name *&<lock>
5983 CommonActionTy Action(nullptr, llvm::None,
5984 createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5986 AtomicRCG.setAction(Action);
5992 CGF.EmitBranch(DefaultBB);
5993 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5996 /// Generates unique name for artificial threadprivate variables.
5997 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5998 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
6000 SmallString<256> Buffer;
6001 llvm::raw_svector_ostream Out(Buffer);
6002 const clang::DeclRefExpr *DE;
6003 const VarDecl *D = ::getBaseDecl(Ref, DE);
6005 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6006 D = D->getCanonicalDecl();
6007 std::string Name = CGM.getOpenMPRuntime().getName(
6008 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6009 Out << Prefix << Name << "_"
6010 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6014 /// Emits reduction initializer function:
6016 /// void @.red_init(void* %arg) {
6017 /// %0 = bitcast void* %arg to <type>*
6018 /// store <type> <init>, <type>* %0
6022 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
6024 ReductionCodeGen &RCG, unsigned N) {
6025 ASTContext &C = CGM.getContext();
6026 FunctionArgList Args;
6027 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6028 ImplicitParamDecl::Other);
6029 Args.emplace_back(&Param);
6030 const auto &FnInfo =
6031 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6032 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6033 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6034 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6035 Name, &CGM.getModule());
6036 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6037 Fn->setDoesNotRecurse();
6038 CodeGenFunction CGF(CGM);
6039 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6040 Address PrivateAddr = CGF.EmitLoadOfPointer(
6041 CGF.GetAddrOfLocalVar(&Param),
6042 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6043 llvm::Value *Size = nullptr;
6044 // If the size of the reduction item is non-constant, load it from global
6045 // threadprivate variable.
6046 if (RCG.getSizes(N).second) {
6047 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6048 CGF, CGM.getContext().getSizeType(),
6049 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6050 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6051 CGM.getContext().getSizeType(), Loc);
6053 RCG.emitAggregateType(CGF, N, Size);
6055 // If initializer uses initializer from declare reduction construct, emit a
6056 // pointer to the address of the original reduction item (reuired by reduction
6058 if (RCG.usesReductionInitializer(N)) {
6059 Address SharedAddr =
6060 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6061 CGF, CGM.getContext().VoidPtrTy,
6062 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6063 SharedAddr = CGF.EmitLoadOfPointer(
6065 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6066 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6068 SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6069 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6070 CGM.getContext().VoidPtrTy);
6072 // Emit the initializer:
6073 // %0 = bitcast void* %arg to <type>*
6074 // store <type> <init>, <type>* %0
6075 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6076 [](CodeGenFunction &) { return false; });
6077 CGF.FinishFunction();
6081 /// Emits reduction combiner function:
6083 /// void @.red_comb(void* %arg0, void* %arg1) {
6084 /// %lhs = bitcast void* %arg0 to <type>*
6085 /// %rhs = bitcast void* %arg1 to <type>*
6086 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6087 /// store <type> %2, <type>* %lhs
6091 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
6093 ReductionCodeGen &RCG, unsigned N,
6094 const Expr *ReductionOp,
6095 const Expr *LHS, const Expr *RHS,
6096 const Expr *PrivateRef) {
6097 ASTContext &C = CGM.getContext();
6098 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6099 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6100 FunctionArgList Args;
6101 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6102 C.VoidPtrTy, ImplicitParamDecl::Other);
6103 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6104 ImplicitParamDecl::Other);
6105 Args.emplace_back(&ParamInOut);
6106 Args.emplace_back(&ParamIn);
6107 const auto &FnInfo =
6108 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6109 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6110 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6111 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6112 Name, &CGM.getModule());
6113 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6114 Fn->setDoesNotRecurse();
6115 CodeGenFunction CGF(CGM);
6116 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6117 llvm::Value *Size = nullptr;
6118 // If the size of the reduction item is non-constant, load it from global
6119 // threadprivate variable.
6120 if (RCG.getSizes(N).second) {
6121 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6122 CGF, CGM.getContext().getSizeType(),
6123 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6124 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6125 CGM.getContext().getSizeType(), Loc);
6127 RCG.emitAggregateType(CGF, N, Size);
6128 // Remap lhs and rhs variables to the addresses of the function arguments.
6129 // %lhs = bitcast void* %arg0 to <type>*
6130 // %rhs = bitcast void* %arg1 to <type>*
6131 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6132 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6133 // Pull out the pointer to the variable.
6134 Address PtrAddr = CGF.EmitLoadOfPointer(
6135 CGF.GetAddrOfLocalVar(&ParamInOut),
6136 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6137 return CGF.Builder.CreateElementBitCast(
6138 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6140 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6141 // Pull out the pointer to the variable.
6142 Address PtrAddr = CGF.EmitLoadOfPointer(
6143 CGF.GetAddrOfLocalVar(&ParamIn),
6144 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6145 return CGF.Builder.CreateElementBitCast(
6146 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6148 PrivateScope.Privatize();
6149 // Emit the combiner body:
6150 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6151 // store <type> %2, <type>* %lhs
6152 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6153 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6154 cast<DeclRefExpr>(RHS));
6155 CGF.FinishFunction();
6159 /// Emits reduction finalizer function:
6161 /// void @.red_fini(void* %arg) {
6162 /// %0 = bitcast void* %arg to <type>*
6163 /// <destroy>(<type>* %0)
6167 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6169 ReductionCodeGen &RCG, unsigned N) {
6170 if (!RCG.needCleanups(N))
6172 ASTContext &C = CGM.getContext();
6173 FunctionArgList Args;
6174 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6175 ImplicitParamDecl::Other);
6176 Args.emplace_back(&Param);
6177 const auto &FnInfo =
6178 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6179 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6180 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6181 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6182 Name, &CGM.getModule());
6183 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6184 Fn->setDoesNotRecurse();
6185 CodeGenFunction CGF(CGM);
6186 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6187 Address PrivateAddr = CGF.EmitLoadOfPointer(
6188 CGF.GetAddrOfLocalVar(&Param),
6189 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6190 llvm::Value *Size = nullptr;
6191 // If the size of the reduction item is non-constant, load it from global
6192 // threadprivate variable.
6193 if (RCG.getSizes(N).second) {
6194 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6195 CGF, CGM.getContext().getSizeType(),
6196 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6197 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6198 CGM.getContext().getSizeType(), Loc);
6200 RCG.emitAggregateType(CGF, N, Size);
6201 // Emit the finalizer body:
6202 // <destroy>(<type>* %0)
6203 RCG.emitCleanups(CGF, N, PrivateAddr);
6204 CGF.FinishFunction();
6208 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6209 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6210 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6211 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6214 // Build typedef struct:
6215 // kmp_task_red_input {
6216 // void *reduce_shar; // shared reduction item
6217 // size_t reduce_size; // size of data item
6218 // void *reduce_init; // data initialization routine
6219 // void *reduce_fini; // data finalization routine
6220 // void *reduce_comb; // data combiner routine
6221 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6222 // } kmp_task_red_input_t;
6223 ASTContext &C = CGM.getContext();
6224 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6225 RD->startDefinition();
6226 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6227 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6228 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6229 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6230 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6231 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6232 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6233 RD->completeDefinition();
6234 QualType RDType = C.getRecordType(RD);
6235 unsigned Size = Data.ReductionVars.size();
6236 llvm::APInt ArraySize(/*numBits=*/64, Size);
6237 QualType ArrayRDType = C.getConstantArrayType(
6238 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6239 // kmp_task_red_input_t .rd_input.[Size];
6240 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6241 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
6243 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6244 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6245 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6246 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6247 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6248 TaskRedInput.getPointer(), Idxs,
6249 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6251 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6252 // ElemLVal.reduce_shar = &Shareds[Cnt];
6253 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6254 RCG.emitSharedLValue(CGF, Cnt);
6255 llvm::Value *CastedShared =
6256 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
6257 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6258 RCG.emitAggregateType(CGF, Cnt);
6259 llvm::Value *SizeValInChars;
6260 llvm::Value *SizeVal;
6261 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6262 // We use delayed creation/initialization for VLAs, array sections and
6263 // custom reduction initializations. It is required because runtime does not
6264 // provide the way to pass the sizes of VLAs/array sections to
6265 // initializer/combiner/finalizer functions and does not pass the pointer to
6266 // original reduction item to the initializer. Instead threadprivate global
6267 // variables are used to store these values and use them in the functions.
6268 bool DelayedCreation = !!SizeVal;
6269 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6270 /*isSigned=*/false);
6271 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6272 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6273 // ElemLVal.reduce_init = init;
6274 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6275 llvm::Value *InitAddr =
6276 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6277 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6278 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6279 // ElemLVal.reduce_fini = fini;
6280 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6281 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6282 llvm::Value *FiniAddr = Fini
6283 ? CGF.EmitCastToVoidPtr(Fini)
6284 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6285 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6286 // ElemLVal.reduce_comb = comb;
6287 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6288 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6289 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6290 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6291 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6292 // ElemLVal.flags = 0;
6293 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6294 if (DelayedCreation) {
6295 CGF.EmitStoreOfScalar(
6296 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6299 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6301 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6303 llvm::Value *Args[] = {
6304 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6306 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6307 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6309 return CGF.EmitRuntimeCall(
6310 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
6313 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6315 ReductionCodeGen &RCG,
6317 auto Sizes = RCG.getSizes(N);
6318 // Emit threadprivate global variable if the type is non-constant
6319 // (Sizes.second = nullptr).
6321 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6322 /*isSigned=*/false);
6323 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6324 CGF, CGM.getContext().getSizeType(),
6325 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6326 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6328 // Store address of the original reduction item if custom initializer is used.
6329 if (RCG.usesReductionInitializer(N)) {
6330 Address SharedAddr = getAddrOfArtificialThreadPrivate(
6331 CGF, CGM.getContext().VoidPtrTy,
6332 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6333 CGF.Builder.CreateStore(
6334 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6335 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
6336 SharedAddr, /*IsVolatile=*/false);
6340 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6342 llvm::Value *ReductionsPtr,
6343 LValue SharedLVal) {
6344 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6346 llvm::Value *Args[] = {
6347 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6350 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
6353 CGF.EmitRuntimeCall(
6354 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
6355 SharedLVal.getAlignment());
6358 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
6359 SourceLocation Loc) {
6360 if (!CGF.HaveInsertPoint())
6362 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6365 // Ignore return result until untied tasks are supported.
6366 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
6367 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6368 Region->emitUntiedSwitch(CGF);
6371 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6372 OpenMPDirectiveKind InnerKind,
6373 const RegionCodeGenTy &CodeGen,
6375 if (!CGF.HaveInsertPoint())
6377 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6378 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6389 } // anonymous namespace
6391 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6392 RTCancelKind CancelKind = CancelNoreq;
6393 if (CancelRegion == OMPD_parallel)
6394 CancelKind = CancelParallel;
6395 else if (CancelRegion == OMPD_for)
6396 CancelKind = CancelLoop;
6397 else if (CancelRegion == OMPD_sections)
6398 CancelKind = CancelSections;
6400 assert(CancelRegion == OMPD_taskgroup);
6401 CancelKind = CancelTaskgroup;
6406 void CGOpenMPRuntime::emitCancellationPointCall(
6407 CodeGenFunction &CGF, SourceLocation Loc,
6408 OpenMPDirectiveKind CancelRegion) {
6409 if (!CGF.HaveInsertPoint())
6411 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6412 // global_tid, kmp_int32 cncl_kind);
6413 if (auto *OMPRegionInfo =
6414 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6415 // For 'cancellation point taskgroup', the task region info may not have a
6416 // cancel. This may instead happen in another adjacent task.
6417 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6418 llvm::Value *Args[] = {
6419 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6420 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6421 // Ignore return result until untied tasks are supported.
6422 llvm::Value *Result = CGF.EmitRuntimeCall(
6423 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args);
6424 // if (__kmpc_cancellationpoint()) {
6425 // exit from construct;
6427 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6428 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6429 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6430 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6431 CGF.EmitBlock(ExitBB);
6432 // exit from construct;
6433 CodeGenFunction::JumpDest CancelDest =
6434 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6435 CGF.EmitBranchThroughCleanup(CancelDest);
6436 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6441 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6443 OpenMPDirectiveKind CancelRegion) {
6444 if (!CGF.HaveInsertPoint())
6446 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6447 // kmp_int32 cncl_kind);
6448 if (auto *OMPRegionInfo =
6449 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6450 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6451 PrePostActionTy &) {
6452 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6453 llvm::Value *Args[] = {
6454 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6455 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6456 // Ignore return result until untied tasks are supported.
6457 llvm::Value *Result = CGF.EmitRuntimeCall(
6458 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6459 // if (__kmpc_cancel()) {
6460 // exit from construct;
6462 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6463 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6464 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6465 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6466 CGF.EmitBlock(ExitBB);
6467 // exit from construct;
6468 CodeGenFunction::JumpDest CancelDest =
6469 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6470 CGF.EmitBranchThroughCleanup(CancelDest);
6471 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6474 emitOMPIfClause(CGF, IfCond, ThenGen,
6475 [](CodeGenFunction &, PrePostActionTy &) {});
6477 RegionCodeGenTy ThenRCG(ThenGen);
6483 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6484 const OMPExecutableDirective &D, StringRef ParentName,
6485 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6486 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6487 assert(!ParentName.empty() && "Invalid target region parent name!");
6488 HasEmittedTargetRegion = true;
6489 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6490 IsOffloadEntry, CodeGen);
6493 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6494 const OMPExecutableDirective &D, StringRef ParentName,
6495 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6496 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6497 // Create a unique name for the entry function using the source location
6498 // information of the current target region. The name will be something like:
6500 // __omp_offloading_DD_FFFF_PP_lBB
6502 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6503 // mangled name of the function that encloses the target region and BB is the
6504 // line number of the target region.
6509 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6511 SmallString<64> EntryFnName;
6513 llvm::raw_svector_ostream OS(EntryFnName);
6514 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6515 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6518 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6520 CodeGenFunction CGF(CGM, true);
6521 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6522 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6524 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6526 // If this target outline function is not an offload entry, we don't need to
6528 if (!IsOffloadEntry)
6531 // The target region ID is used by the runtime library to identify the current
6532 // target region, so it only has to be unique and not necessarily point to
6533 // anything. It could be the pointer to the outlined function that implements
6534 // the target region, but we aren't using that so that the compiler doesn't
6535 // need to keep that, and could therefore inline the host function if proven
6536 // worthwhile during optimization. In the other hand, if emitting code for the
6537 // device, the ID has to be the function address so that it can retrieved from
6538 // the offloading entry and launched by the runtime library. We also mark the
6539 // outlined function to have external linkage in case we are emitting code for
6540 // the device, because these functions will be entry points to the device.
6542 if (CGM.getLangOpts().OpenMPIsDevice) {
6543 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6544 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6545 OutlinedFn->setDSOLocal(false);
6547 std::string Name = getName({EntryFnName, "region_id"});
6548 OutlinedFnID = new llvm::GlobalVariable(
6549 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6550 llvm::GlobalValue::WeakAnyLinkage,
6551 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6554 // Register the information for the entry associated with this target region.
6555 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6556 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6557 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6560 /// Checks if the expression is constant or does not have non-trivial function
6562 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6563 // We can skip constant expressions.
6564 // We can skip expressions with trivial calls or simple expressions.
6565 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6566 !E->hasNonTrivialCall(Ctx)) &&
6567 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6570 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6572 const Stmt *Child = Body->IgnoreContainers();
6573 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6575 for (const Stmt *S : C->body()) {
6576 if (const auto *E = dyn_cast<Expr>(S)) {
6577 if (isTrivial(Ctx, E))
6580 // Some of the statements can be ignored.
6581 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6582 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6584 // Analyze declarations.
6585 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6586 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6587 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6588 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6589 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6590 isa<UsingDirectiveDecl>(D) ||
6591 isa<OMPDeclareReductionDecl>(D) ||
6592 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6594 const auto *VD = dyn_cast<VarDecl>(D);
6597 return VD->isConstexpr() ||
6598 ((VD->getType().isTrivialType(Ctx) ||
6599 VD->getType()->isReferenceType()) &&
6600 (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6604 // Found multiple children - cannot get the one child only.
6610 Child = Child->IgnoreContainers();
6615 /// Emit the number of teams for a target directive. Inspect the num_teams
6616 /// clause associated with a teams construct combined or closely nested
6617 /// with the target directive.
6619 /// Emit a team of size one for directives such as 'target parallel' that
6620 /// have no associated teams construct.
6622 /// Otherwise, return nullptr.
6623 static llvm::Value *
6624 emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
6625 const OMPExecutableDirective &D) {
6626 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6627 "Clauses associated with the teams directive expected to be emitted "
6628 "only for the host!");
6629 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6630 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6631 "Expected target-based executable directive.");
6632 CGBuilderTy &Bld = CGF.Builder;
6633 switch (DirectiveKind) {
6635 const auto *CS = D.getInnermostCapturedStmt();
6637 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6638 const Stmt *ChildStmt =
6639 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6640 if (const auto *NestedDir =
6641 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6642 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6643 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6644 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6645 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6646 const Expr *NumTeams =
6647 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6648 llvm::Value *NumTeamsVal =
6649 CGF.EmitScalarExpr(NumTeams,
6650 /*IgnoreResultAssign*/ true);
6651 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6654 return Bld.getInt32(0);
6656 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6657 isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6658 return Bld.getInt32(1);
6659 return Bld.getInt32(0);
6663 case OMPD_target_teams:
6664 case OMPD_target_teams_distribute:
6665 case OMPD_target_teams_distribute_simd:
6666 case OMPD_target_teams_distribute_parallel_for:
6667 case OMPD_target_teams_distribute_parallel_for_simd: {
6668 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6669 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6670 const Expr *NumTeams =
6671 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672 llvm::Value *NumTeamsVal =
6673 CGF.EmitScalarExpr(NumTeams,
6674 /*IgnoreResultAssign*/ true);
6675 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6678 return Bld.getInt32(0);
6680 case OMPD_target_parallel:
6681 case OMPD_target_parallel_for:
6682 case OMPD_target_parallel_for_simd:
6683 case OMPD_target_simd:
6684 return Bld.getInt32(1);
6687 case OMPD_parallel_for:
6688 case OMPD_parallel_sections:
6690 case OMPD_parallel_for_simd:
6692 case OMPD_cancellation_point:
6694 case OMPD_threadprivate:
6703 case OMPD_taskyield:
6706 case OMPD_taskgroup:
6710 case OMPD_target_data:
6711 case OMPD_target_exit_data:
6712 case OMPD_target_enter_data:
6713 case OMPD_distribute:
6714 case OMPD_distribute_simd:
6715 case OMPD_distribute_parallel_for:
6716 case OMPD_distribute_parallel_for_simd:
6717 case OMPD_teams_distribute:
6718 case OMPD_teams_distribute_simd:
6719 case OMPD_teams_distribute_parallel_for:
6720 case OMPD_teams_distribute_parallel_for_simd:
6721 case OMPD_target_update:
6722 case OMPD_declare_simd:
6723 case OMPD_declare_target:
6724 case OMPD_end_declare_target:
6725 case OMPD_declare_reduction:
6726 case OMPD_declare_mapper:
6728 case OMPD_taskloop_simd:
6733 llvm_unreachable("Unexpected directive kind.");
6736 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6737 llvm::Value *DefaultThreadLimitVal) {
6738 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6739 CGF.getContext(), CS->getCapturedStmt());
6740 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6741 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6742 llvm::Value *NumThreads = nullptr;
6743 llvm::Value *CondVal = nullptr;
6744 // Handle if clause. If if clause present, the number of threads is
6745 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6746 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6747 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6748 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6749 const OMPIfClause *IfClause = nullptr;
6750 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6751 if (C->getNameModifier() == OMPD_unknown ||
6752 C->getNameModifier() == OMPD_parallel) {
6758 const Expr *Cond = IfClause->getCondition();
6760 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6762 return CGF.Builder.getInt32(1);
6764 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6765 if (const auto *PreInit =
6766 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6767 for (const auto *I : PreInit->decls()) {
6768 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6769 CGF.EmitVarDecl(cast<VarDecl>(*I));
6771 CodeGenFunction::AutoVarEmission Emission =
6772 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6773 CGF.EmitAutoVarCleanups(Emission);
6777 CondVal = CGF.EvaluateExprAsBool(Cond);
6781 // Check the value of num_threads clause iff if clause was not specified
6782 // or is not evaluated to false.
6783 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6784 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6785 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6786 const auto *NumThreadsClause =
6787 Dir->getSingleClause<OMPNumThreadsClause>();
6788 CodeGenFunction::LexicalScope Scope(
6789 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6790 if (const auto *PreInit =
6791 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6792 for (const auto *I : PreInit->decls()) {
6793 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6794 CGF.EmitVarDecl(cast<VarDecl>(*I));
6796 CodeGenFunction::AutoVarEmission Emission =
6797 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6798 CGF.EmitAutoVarCleanups(Emission);
6802 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6803 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6804 /*isSigned=*/false);
6805 if (DefaultThreadLimitVal)
6806 NumThreads = CGF.Builder.CreateSelect(
6807 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6808 DefaultThreadLimitVal, NumThreads);
6810 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6811 : CGF.Builder.getInt32(0);
6813 // Process condition of the if clause.
6815 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6816 CGF.Builder.getInt32(1));
6820 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6821 return CGF.Builder.getInt32(1);
6822 return DefaultThreadLimitVal;
6824 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6825 : CGF.Builder.getInt32(0);
6828 /// Emit the number of threads for a target directive. Inspect the
6829 /// thread_limit clause associated with a teams construct combined or closely
6830 /// nested with the target directive.
6832 /// Emit the num_threads clause for directives such as 'target parallel' that
6833 /// have no associated teams construct.
6835 /// Otherwise, return nullptr.
6836 static llvm::Value *
6837 emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
6838 const OMPExecutableDirective &D) {
6839 assert(!CGF.getLangOpts().OpenMPIsDevice &&
6840 "Clauses associated with the teams directive expected to be emitted "
6841 "only for the host!");
6842 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6843 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6844 "Expected target-based executable directive.");
6845 CGBuilderTy &Bld = CGF.Builder;
6846 llvm::Value *ThreadLimitVal = nullptr;
6847 llvm::Value *NumThreadsVal = nullptr;
6848 switch (DirectiveKind) {
6850 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6851 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6853 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6854 CGF.getContext(), CS->getCapturedStmt());
6855 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6856 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6857 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6858 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6859 const auto *ThreadLimitClause =
6860 Dir->getSingleClause<OMPThreadLimitClause>();
6861 CodeGenFunction::LexicalScope Scope(
6862 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6863 if (const auto *PreInit =
6864 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6865 for (const auto *I : PreInit->decls()) {
6866 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6867 CGF.EmitVarDecl(cast<VarDecl>(*I));
6869 CodeGenFunction::AutoVarEmission Emission =
6870 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6871 CGF.EmitAutoVarCleanups(Emission);
6875 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6876 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6878 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6880 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6881 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6882 CS = Dir->getInnermostCapturedStmt();
6883 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6884 CGF.getContext(), CS->getCapturedStmt());
6885 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6887 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6888 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6889 CS = Dir->getInnermostCapturedStmt();
6890 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6893 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6894 return Bld.getInt32(1);
6896 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6898 case OMPD_target_teams: {
6899 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6900 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6901 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6902 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6903 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6905 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6907 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6908 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6910 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6911 CGF.getContext(), CS->getCapturedStmt());
6912 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6913 if (Dir->getDirectiveKind() == OMPD_distribute) {
6914 CS = Dir->getInnermostCapturedStmt();
6915 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6919 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6921 case OMPD_target_teams_distribute:
6922 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6923 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6924 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6925 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6926 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6928 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6930 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6931 case OMPD_target_parallel:
6932 case OMPD_target_parallel_for:
6933 case OMPD_target_parallel_for_simd:
6934 case OMPD_target_teams_distribute_parallel_for:
6935 case OMPD_target_teams_distribute_parallel_for_simd: {
6936 llvm::Value *CondVal = nullptr;
6937 // Handle if clause. If if clause present, the number of threads is
6938 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6939 if (D.hasClausesOfKind<OMPIfClause>()) {
6940 const OMPIfClause *IfClause = nullptr;
6941 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6942 if (C->getNameModifier() == OMPD_unknown ||
6943 C->getNameModifier() == OMPD_parallel) {
6949 const Expr *Cond = IfClause->getCondition();
6951 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6953 return Bld.getInt32(1);
6955 CodeGenFunction::RunCleanupsScope Scope(CGF);
6956 CondVal = CGF.EvaluateExprAsBool(Cond);
6960 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6961 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6962 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6963 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6964 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6966 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6968 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6969 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6970 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6971 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6972 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6974 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6975 ThreadLimitVal = ThreadLimitVal
6976 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6978 NumThreadsVal, ThreadLimitVal)
6981 if (!ThreadLimitVal)
6982 ThreadLimitVal = Bld.getInt32(0);
6984 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6985 return ThreadLimitVal;
6987 case OMPD_target_teams_distribute_simd:
6988 case OMPD_target_simd:
6989 return Bld.getInt32(1);
6992 case OMPD_parallel_for:
6993 case OMPD_parallel_sections:
6995 case OMPD_parallel_for_simd:
6997 case OMPD_cancellation_point:
6999 case OMPD_threadprivate:
7008 case OMPD_taskyield:
7011 case OMPD_taskgroup:
7015 case OMPD_target_data:
7016 case OMPD_target_exit_data:
7017 case OMPD_target_enter_data:
7018 case OMPD_distribute:
7019 case OMPD_distribute_simd:
7020 case OMPD_distribute_parallel_for:
7021 case OMPD_distribute_parallel_for_simd:
7022 case OMPD_teams_distribute:
7023 case OMPD_teams_distribute_simd:
7024 case OMPD_teams_distribute_parallel_for:
7025 case OMPD_teams_distribute_parallel_for_simd:
7026 case OMPD_target_update:
7027 case OMPD_declare_simd:
7028 case OMPD_declare_target:
7029 case OMPD_end_declare_target:
7030 case OMPD_declare_reduction:
7031 case OMPD_declare_mapper:
7033 case OMPD_taskloop_simd:
7038 llvm_unreachable("Unsupported directive kind.");
7042 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
7044 // Utility to handle information from clauses associated with a given
7045 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7046 // It provides a convenient interface to obtain the information and generate
7047 // code for that information.
7048 class MappableExprsHandler {
7050 /// Values for bit flags used to specify the mapping type for
7052 enum OpenMPOffloadMappingFlags : uint64_t {
7055 /// Allocate memory on the device and move data from host to device.
7057 /// Allocate memory on the device and move data from device to host.
7058 OMP_MAP_FROM = 0x02,
7059 /// Always perform the requested mapping action on the element, even
7060 /// if it was already mapped before.
7061 OMP_MAP_ALWAYS = 0x04,
7062 /// Delete the element from the device environment, ignoring the
7063 /// current reference count associated with the element.
7064 OMP_MAP_DELETE = 0x08,
7065 /// The element being mapped is a pointer-pointee pair; both the
7066 /// pointer and the pointee should be mapped.
7067 OMP_MAP_PTR_AND_OBJ = 0x10,
7068 /// This flags signals that the base address of an entry should be
7069 /// passed to the target kernel as an argument.
7070 OMP_MAP_TARGET_PARAM = 0x20,
7071 /// Signal that the runtime library has to return the device pointer
7072 /// in the current position for the data being mapped. Used when we have the
7073 /// use_device_ptr clause.
7074 OMP_MAP_RETURN_PARAM = 0x40,
7075 /// This flag signals that the reference being passed is a pointer to
7077 OMP_MAP_PRIVATE = 0x80,
7078 /// Pass the element to the device by value.
7079 OMP_MAP_LITERAL = 0x100,
7081 OMP_MAP_IMPLICIT = 0x200,
7082 /// The 16 MSBs of the flags indicate whether the entry is member of some
7084 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7085 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7088 /// Class that associates information with a base pointer to be passed to the
7089 /// runtime library.
7090 class BasePointerInfo {
7091 /// The base pointer.
7092 llvm::Value *Ptr = nullptr;
7093 /// The base declaration that refers to this device pointer, or null if
7095 const ValueDecl *DevPtrDecl = nullptr;
7098 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7099 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7100 llvm::Value *operator*() const { return Ptr; }
7101 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7102 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7105 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7106 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7107 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7109 /// Map between a struct and the its lowest & highest elements which have been
7111 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7112 /// HE(FieldIndex, Pointer)}
7113 struct StructRangeInfoTy {
7114 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7115 0, Address::invalid()};
7116 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7117 0, Address::invalid()};
7118 Address Base = Address::invalid();
7122 /// Kind that defines how a device pointer has to be returned.
7124 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7125 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7126 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7127 bool ReturnDevicePointer = false;
7128 bool IsImplicit = false;
7130 MapInfo() = default;
7132 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7133 OpenMPMapClauseKind MapType,
7134 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7135 bool ReturnDevicePointer, bool IsImplicit)
7136 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7137 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7140 /// If use_device_ptr is used on a pointer which is a struct member and there
7141 /// is no map information about it, then emission of that entry is deferred
7142 /// until the whole struct has been processed.
7143 struct DeferredDevicePtrEntryTy {
7144 const Expr *IE = nullptr;
7145 const ValueDecl *VD = nullptr;
7147 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7151 /// Directive from where the map clauses were extracted.
7152 const OMPExecutableDirective &CurDir;
7154 /// Function the directive is being generated for.
7155 CodeGenFunction &CGF;
7157 /// Set of all first private variables in the current directive.
7158 /// bool data is set to true if the variable is implicitly marked as
7159 /// firstprivate, false otherwise.
7160 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7162 /// Map between device pointer declarations and their expression components.
7163 /// The key value for declarations in 'this' is null.
7166 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7169 llvm::Value *getExprTypeSize(const Expr *E) const {
7170 QualType ExprTy = E->getType().getCanonicalType();
7172 // Reference types are ignored for mapping purposes.
7173 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7174 ExprTy = RefTy->getPointeeType().getCanonicalType();
7176 // Given that an array section is considered a built-in type, we need to
7177 // do the calculation based on the length of the section instead of relying
7178 // on CGF.getTypeSize(E->getType()).
7179 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7180 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7181 OAE->getBase()->IgnoreParenImpCasts())
7182 .getCanonicalType();
7184 // If there is no length associated with the expression, that means we
7185 // are using the whole length of the base.
7186 if (!OAE->getLength() && OAE->getColonLoc().isValid())
7187 return CGF.getTypeSize(BaseTy);
7189 llvm::Value *ElemSize;
7190 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7193 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194 assert(ATy && "Expecting array type if not a pointer type.");
7195 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7198 // If we don't have a length at this point, that is because we have an
7199 // array section with a single element.
7200 if (!OAE->getLength())
7203 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7205 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7206 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7208 return CGF.getTypeSize(ExprTy);
7211 /// Return the corresponding bits for a given map clause modifier. Add
7212 /// a flag marking the map as a pointer if requested. Add a flag marking the
7213 /// map as the first one of a series of maps that relate to the same map
7215 OpenMPOffloadMappingFlags getMapTypeBits(
7216 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7217 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7218 OpenMPOffloadMappingFlags Bits =
7219 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7221 case OMPC_MAP_alloc:
7222 case OMPC_MAP_release:
7223 // alloc and release is the default behavior in the runtime library, i.e.
7224 // if we don't pass any bits alloc/release that is what the runtime is
7225 // going to do. Therefore, we don't need to signal anything for these two
7232 Bits |= OMP_MAP_FROM;
7234 case OMPC_MAP_tofrom:
7235 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7237 case OMPC_MAP_delete:
7238 Bits |= OMP_MAP_DELETE;
7240 case OMPC_MAP_unknown:
7241 llvm_unreachable("Unexpected map type!");
7244 Bits |= OMP_MAP_PTR_AND_OBJ;
7245 if (AddIsTargetParamFlag)
7246 Bits |= OMP_MAP_TARGET_PARAM;
7247 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7248 != MapModifiers.end())
7249 Bits |= OMP_MAP_ALWAYS;
7253 /// Return true if the provided expression is a final array section. A
7254 /// final array section, is one whose length can't be proved to be one.
7255 bool isFinalArraySectionExpression(const Expr *E) const {
7256 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7258 // It is not an array section and therefore not a unity-size one.
7262 // An array section with no colon always refer to a single element.
7263 if (OASE->getColonLoc().isInvalid())
7266 const Expr *Length = OASE->getLength();
7268 // If we don't have a length we have to check if the array has size 1
7269 // for this dimension. Also, we should always expect a length if the
7270 // base type is pointer.
7272 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7273 OASE->getBase()->IgnoreParenImpCasts())
7274 .getCanonicalType();
7275 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7276 return ATy->getSize().getSExtValue() != 1;
7277 // If we don't have a constant dimension length, we have to consider
7278 // the current section as having any size, so it is not necessarily
7279 // unitary. If it happen to be unity size, that's user fault.
7283 // Check if the length evaluates to 1.
7284 Expr::EvalResult Result;
7285 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7286 return true; // Can have more that size 1.
7288 llvm::APSInt ConstLength = Result.Val.getInt();
7289 return ConstLength.getSExtValue() != 1;
7292 /// Generate the base pointers, section pointers, sizes and map type
7293 /// bits for the provided map type, map modifier, and expression components.
7294 /// \a IsFirstComponent should be set to true if the provided set of
7295 /// components is the first associated with a capture.
7296 void generateInfoForComponentList(
7297 OpenMPMapClauseKind MapType,
7298 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7299 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7300 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7301 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7302 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7304 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7305 OverlappedElements = llvm::None) const {
7306 // The following summarizes what has to be generated for each map and the
7307 // types below. The generated information is expressed in this order:
7308 // base pointer, section pointer, size, flags
7309 // (to add to the ones that come from the map type and modifier).
7330 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7333 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7336 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7339 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7342 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7345 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7348 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7351 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7354 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7356 // map(to: s.p[:22])
7357 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7358 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7359 // &(s.p), &(s.p[0]), 22*sizeof(double),
7360 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7361 // (*) alloc space for struct members, only this is a target parameter
7362 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7363 // optimizes this entry out, same in the examples below)
7364 // (***) map the pointee (map: to)
7367 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7369 // map(from: s.ps->s.i)
7370 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7371 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7372 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7374 // map(to: s.ps->ps)
7375 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7376 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7377 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7379 // map(s.ps->ps->ps)
7380 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7381 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7382 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7383 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7385 // map(to: s.ps->ps->s.f[:22])
7386 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7389 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7392 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7395 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7398 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7401 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7403 // map(to: ps->p[:22])
7404 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7405 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7406 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7409 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7411 // map(from: ps->ps->s.i)
7412 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7413 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7414 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7416 // map(from: ps->ps->ps)
7417 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7418 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7419 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7421 // map(ps->ps->ps->ps)
7422 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7423 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7424 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7425 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7427 // map(to: ps->ps->ps->s.f[:22])
7428 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7431 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7433 // map(to: s.f[:22]) map(from: s.p[:33])
7434 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7435 // sizeof(double*) (**), TARGET_PARAM
7436 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7437 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7438 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7439 // (*) allocate contiguous space needed to fit all mapped members even if
7440 // we allocate space for members not mapped (in this example,
7441 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7442 // them as well because they fall between &s.f[0] and &s.p)
7444 // map(from: s.f[:22]) map(to: ps->p[:33])
7445 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7446 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7447 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7448 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7449 // (*) the struct this entry pertains to is the 2nd element in the list of
7450 // arguments, hence MEMBER_OF(2)
7452 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7453 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7454 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7455 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7456 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7457 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7458 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7459 // (*) the struct this entry pertains to is the 4th element in the list
7460 // of arguments, hence MEMBER_OF(4)
7462 // Track if the map information being generated is the first for a capture.
7463 bool IsCaptureFirstInfo = IsFirstComponentList;
7464 // When the variable is on a declare target link or in a to clause with
7465 // unified memory, a reference is needed to hold the host/device address
7467 bool RequiresReference = false;
7469 // Scan the components from the base to the complete expression.
7470 auto CI = Components.rbegin();
7471 auto CE = Components.rend();
7474 // Track if the map information being generated is the first for a list of
7476 bool IsExpressionFirstInfo = true;
7477 Address BP = Address::invalid();
7478 const Expr *AssocExpr = I->getAssociatedExpression();
7479 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7480 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7482 if (isa<MemberExpr>(AssocExpr)) {
7483 // The base is the 'this' pointer. The content of the pointer is going
7484 // to be the base of the field being mapped.
7485 BP = CGF.LoadCXXThisAddress();
7486 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7488 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7489 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7491 // The base is the reference to the variable.
7493 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7494 if (const auto *VD =
7495 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7496 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7497 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7498 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7499 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7500 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7501 RequiresReference = true;
7502 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7507 // If the variable is a pointer and is being dereferenced (i.e. is not
7508 // the last component), the base has to be the pointer itself, not its
7509 // reference. References are ignored for mapping purposes.
7511 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7512 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7513 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7515 // We do not need to generate individual map information for the
7516 // pointer, it can be associated with the combined storage.
7521 // Track whether a component of the list should be marked as MEMBER_OF some
7522 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7523 // in a component list should be marked as MEMBER_OF, all subsequent entries
7524 // do not belong to the base struct. E.g.
7526 // s.ps->ps->ps->f[:]
7528 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7529 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7530 // is the pointee of ps(2) which is not member of struct s, so it should not
7531 // be marked as such (it is still PTR_AND_OBJ).
7532 // The variable is initialized to false so that PTR_AND_OBJ entries which
7533 // are not struct members are not considered (e.g. array of pointers to
7535 bool ShouldBeMemberOf = false;
7537 // Variable keeping track of whether or not we have encountered a component
7538 // in the component list which is a member expression. Useful when we have a
7539 // pointer or a final array section, in which case it is the previous
7540 // component in the list which tells us whether we have a member expression.
7542 // While processing the final array section "[:]" it is "f" which tells us
7543 // whether we are dealing with a member of a declared struct.
7544 const MemberExpr *EncounteredME = nullptr;
7546 for (; I != CE; ++I) {
7547 // If the current component is member of a struct (parent struct) mark it.
7548 if (!EncounteredME) {
7549 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7550 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7551 // as MEMBER_OF the parent struct.
7553 ShouldBeMemberOf = true;
7556 auto Next = std::next(I);
7558 // We need to generate the addresses and sizes if this is the last
7559 // component, if the component is a pointer or if it is an array section
7560 // whose length can't be proved to be one. If this is a pointer, it
7561 // becomes the base address for the following components.
7563 // A final array section, is one whose length can't be proved to be one.
7564 bool IsFinalArraySection =
7565 isFinalArraySectionExpression(I->getAssociatedExpression());
7567 // Get information on whether the element is a pointer. Have to do a
7568 // special treatment for array sections given that they are built-in
7571 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7573 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7575 ->isAnyPointerType()) ||
7576 I->getAssociatedExpression()->getType()->isAnyPointerType();
7578 if (Next == CE || IsPointer || IsFinalArraySection) {
7579 // If this is not the last component, we expect the pointer to be
7580 // associated with an array expression or member expression.
7581 assert((Next == CE ||
7582 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7583 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7584 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7585 "Unexpected expression");
7588 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7590 // If this component is a pointer inside the base struct then we don't
7591 // need to create any entry for it - it will be combined with the object
7592 // it is pointing to into a single PTR_AND_OBJ entry.
7593 bool IsMemberPointer =
7594 IsPointer && EncounteredME &&
7595 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7597 if (!OverlappedElements.empty()) {
7598 // Handle base element with the info for overlapped elements.
7599 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7600 assert(Next == CE &&
7601 "Expected last element for the overlapped elements.");
7602 assert(!IsPointer &&
7603 "Unexpected base element with the pointer type.");
7604 // Mark the whole struct as the struct that requires allocation on the
7606 PartialStruct.LowestElem = {0, LB};
7607 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7608 I->getAssociatedExpression()->getType());
7609 Address HB = CGF.Builder.CreateConstGEP(
7610 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB,
7612 TypeSize.getQuantity() - 1);
7613 PartialStruct.HighestElem = {
7614 std::numeric_limits<decltype(
7615 PartialStruct.HighestElem.first)>::max(),
7617 PartialStruct.Base = BP;
7618 // Emit data for non-overlapped data.
7619 OpenMPOffloadMappingFlags Flags =
7621 getMapTypeBits(MapType, MapModifiers, IsImplicit,
7622 /*AddPtrFlag=*/false,
7623 /*AddIsTargetParamFlag=*/false);
7625 llvm::Value *Size = nullptr;
7626 // Do bitcopy of all non-overlapped structure elements.
7627 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7628 Component : OverlappedElements) {
7629 Address ComponentLB = Address::invalid();
7630 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7632 if (MC.getAssociatedDeclaration()) {
7634 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7636 Size = CGF.Builder.CreatePtrDiff(
7637 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7638 CGF.EmitCastToVoidPtr(LB.getPointer()));
7642 BasePointers.push_back(BP.getPointer());
7643 Pointers.push_back(LB.getPointer());
7644 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7645 /*isSigned=*/true));
7646 Types.push_back(Flags);
7647 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7649 BasePointers.push_back(BP.getPointer());
7650 Pointers.push_back(LB.getPointer());
7651 Size = CGF.Builder.CreatePtrDiff(
7652 CGF.EmitCastToVoidPtr(
7653 CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7654 CGF.EmitCastToVoidPtr(LB.getPointer()));
7656 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7657 Types.push_back(Flags);
7660 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7661 if (!IsMemberPointer) {
7662 BasePointers.push_back(BP.getPointer());
7663 Pointers.push_back(LB.getPointer());
7665 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7667 // We need to add a pointer flag for each map that comes from the
7668 // same expression except for the first one. We also need to signal
7669 // this map is the first one that relates with the current capture
7670 // (there is a set of entries for each capture).
7671 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7672 MapType, MapModifiers, IsImplicit,
7673 !IsExpressionFirstInfo || RequiresReference,
7674 IsCaptureFirstInfo && !RequiresReference);
7676 if (!IsExpressionFirstInfo) {
7677 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7678 // then we reset the TO/FROM/ALWAYS/DELETE flags.
7680 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7683 if (ShouldBeMemberOf) {
7684 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7685 // should be later updated with the correct value of MEMBER_OF.
7686 Flags |= OMP_MAP_MEMBER_OF;
7687 // From now on, all subsequent PTR_AND_OBJ entries should not be
7688 // marked as MEMBER_OF.
7689 ShouldBeMemberOf = false;
7693 Types.push_back(Flags);
7696 // If we have encountered a member expression so far, keep track of the
7697 // mapped member. If the parent is "*this", then the value declaration
7699 if (EncounteredME) {
7700 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7701 unsigned FieldIndex = FD->getFieldIndex();
7703 // Update info about the lowest and highest elements for this struct
7704 if (!PartialStruct.Base.isValid()) {
7705 PartialStruct.LowestElem = {FieldIndex, LB};
7706 PartialStruct.HighestElem = {FieldIndex, LB};
7707 PartialStruct.Base = BP;
7708 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7709 PartialStruct.LowestElem = {FieldIndex, LB};
7710 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7711 PartialStruct.HighestElem = {FieldIndex, LB};
7715 // If we have a final array section, we are done with this expression.
7716 if (IsFinalArraySection)
7719 // The pointer becomes the base for the next element.
7723 IsExpressionFirstInfo = false;
7724 IsCaptureFirstInfo = false;
7729 /// Return the adjusted map modifiers if the declaration a capture refers to
7730 /// appears in a first-private clause. This is expected to be used only with
7731 /// directives that start with 'target'.
7732 MappableExprsHandler::OpenMPOffloadMappingFlags
7733 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7734 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7736 // A first private variable captured by reference will use only the
7737 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7738 // declaration is known as first-private in this handler.
7739 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7740 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7741 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef)
7742 return MappableExprsHandler::OMP_MAP_ALWAYS |
7743 MappableExprsHandler::OMP_MAP_TO;
7744 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7745 return MappableExprsHandler::OMP_MAP_TO |
7746 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7747 return MappableExprsHandler::OMP_MAP_PRIVATE |
7748 MappableExprsHandler::OMP_MAP_TO;
7750 return MappableExprsHandler::OMP_MAP_TO |
7751 MappableExprsHandler::OMP_MAP_FROM;
7754 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7755 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7756 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7760 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7761 OpenMPOffloadMappingFlags MemberOfFlag) {
7762 // If the entry is PTR_AND_OBJ but has not been marked with the special
7763 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7764 // marked as MEMBER_OF.
7765 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7766 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7769 // Reset the placeholder value to prepare the flag for the assignment of the
7770 // proper MEMBER_OF value.
7771 Flags &= ~OMP_MAP_MEMBER_OF;
7772 Flags |= MemberOfFlag;
7775 void getPlainLayout(const CXXRecordDecl *RD,
7776 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7777 bool AsBase) const {
7778 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7780 llvm::StructType *St =
7781 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7783 unsigned NumElements = St->getNumElements();
7785 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7786 RecordLayout(NumElements);
7789 for (const auto &I : RD->bases()) {
7792 const auto *Base = I.getType()->getAsCXXRecordDecl();
7793 // Ignore empty bases.
7794 if (Base->isEmpty() || CGF.getContext()
7795 .getASTRecordLayout(Base)
7796 .getNonVirtualSize()
7800 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7801 RecordLayout[FieldIndex] = Base;
7803 // Fill in virtual bases.
7804 for (const auto &I : RD->vbases()) {
7805 const auto *Base = I.getType()->getAsCXXRecordDecl();
7806 // Ignore empty bases.
7807 if (Base->isEmpty())
7809 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7810 if (RecordLayout[FieldIndex])
7812 RecordLayout[FieldIndex] = Base;
7814 // Fill in all the fields.
7815 assert(!RD->isUnion() && "Unexpected union.");
7816 for (const auto *Field : RD->fields()) {
7817 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7818 // will fill in later.)
7819 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7820 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7821 RecordLayout[FieldIndex] = Field;
7824 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7825 &Data : RecordLayout) {
7828 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7829 getPlainLayout(Base, Layout, /*AsBase=*/true);
7831 Layout.push_back(Data.get<const FieldDecl *>());
7836 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7837 : CurDir(Dir), CGF(CGF) {
7838 // Extract firstprivate clause information.
7839 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7840 for (const auto *D : C->varlists())
7841 FirstPrivateDecls.try_emplace(
7842 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7843 // Extract device pointer clause information.
7844 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7845 for (auto L : C->component_lists())
7846 DevPointersMap[L.first].push_back(L.second);
7849 /// Generate code for the combined entry if we have a partially mapped struct
7850 /// and take care of the mapping flags of the arguments corresponding to
7851 /// individual struct members.
7852 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7853 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7854 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7855 const StructRangeInfoTy &PartialStruct) const {
7856 // Base is the base of the struct
7857 BasePointers.push_back(PartialStruct.Base.getPointer());
7858 // Pointer is the address of the lowest element
7859 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7860 Pointers.push_back(LB);
7861 // Size is (addr of {highest+1} element) - (addr of lowest element)
7862 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7863 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7864 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7865 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7866 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7867 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7868 /*isSigned=*/false);
7869 Sizes.push_back(Size);
7870 // Map type is always TARGET_PARAM
7871 Types.push_back(OMP_MAP_TARGET_PARAM);
7872 // Remove TARGET_PARAM flag from the first element
7873 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7875 // All other current entries will be MEMBER_OF the combined entry
7876 // (except for PTR_AND_OBJ entries which do not have a placeholder value
7877 // 0xFFFF in the MEMBER_OF field).
7878 OpenMPOffloadMappingFlags MemberOfFlag =
7879 getMemberOfFlag(BasePointers.size() - 1);
7880 for (auto &M : CurTypes)
7881 setCorrectMemberOfFlag(M, MemberOfFlag);
7884 /// Generate all the base pointers, section pointers, sizes and map
7885 /// types for the extracted mappable expressions. Also, for each item that
7886 /// relates with a device pointer, a pair of the relevant declaration and
7887 /// index where it occurs is appended to the device pointers info array.
7888 void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7889 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7890 MapFlagsArrayTy &Types) const {
7891 // We have to process the component lists that relate with the same
7892 // declaration in a single chunk so that we can generate the map flags
7893 // correctly. Therefore, we organize all lists in a map.
7894 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7896 // Helper function to fill the information map for the different supported
7898 auto &&InfoGen = [&Info](
7900 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7901 OpenMPMapClauseKind MapType,
7902 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7903 bool ReturnDevicePointer, bool IsImplicit) {
7904 const ValueDecl *VD =
7905 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7906 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7910 // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7911 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7912 for (const auto &L : C->component_lists()) {
7913 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7914 /*ReturnDevicePointer=*/false, C->isImplicit());
7916 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7917 for (const auto &L : C->component_lists()) {
7918 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7919 /*ReturnDevicePointer=*/false, C->isImplicit());
7921 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7922 for (const auto &L : C->component_lists()) {
7923 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7924 /*ReturnDevicePointer=*/false, C->isImplicit());
7927 // Look at the use_device_ptr clause information and mark the existing map
7928 // entries as such. If there is no map information for an entry in the
7929 // use_device_ptr list, we create one with map type 'alloc' and zero size
7930 // section. It is the user fault if that was not mapped before. If there is
7931 // no map information and the pointer is a struct member, then we defer the
7932 // emission of that entry until the whole struct has been processed.
7933 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7936 // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7937 for (const auto *C :
7938 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7939 for (const auto &L : C->component_lists()) {
7940 assert(!L.second.empty() && "Not expecting empty list of components!");
7941 const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7942 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7943 const Expr *IE = L.second.back().getAssociatedExpression();
7944 // If the first component is a member expression, we have to look into
7945 // 'this', which maps to null in the map of map information. Otherwise
7946 // look directly for the information.
7947 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7949 // We potentially have map information for this declaration already.
7950 // Look for the first set of components that refer to it.
7951 if (It != Info.end()) {
7952 auto CI = std::find_if(
7953 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7954 return MI.Components.back().getAssociatedDeclaration() == VD;
7956 // If we found a map entry, signal that the pointer has to be returned
7957 // and move on to the next declaration.
7958 if (CI != It->second.end()) {
7959 CI->ReturnDevicePointer = true;
7964 // We didn't find any match in our map information - generate a zero
7965 // size array section - if the pointer is a struct member we defer this
7966 // action until the whole struct has been processed.
7967 // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7968 if (isa<MemberExpr>(IE)) {
7969 // Insert the pointer into Info to be processed by
7970 // generateInfoForComponentList. Because it is a member pointer
7971 // without a pointee, no entry will be generated for it, therefore
7972 // we need to generate one after the whole struct has been processed.
7973 // Nonetheless, generateInfoForComponentList must be called to take
7974 // the pointer into account for the calculation of the range of the
7976 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7977 /*ReturnDevicePointer=*/false, C->isImplicit());
7978 DeferredInfo[nullptr].emplace_back(IE, VD);
7980 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7981 this->CGF.EmitLValue(IE), IE->getExprLoc());
7982 BasePointers.emplace_back(Ptr, VD);
7983 Pointers.push_back(Ptr);
7984 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
7985 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7990 for (const auto &M : Info) {
7991 // We need to know when we generate information for the first component
7992 // associated with a capture, because the mapping flags depend on it.
7993 bool IsFirstComponentList = true;
7995 // Temporary versions of arrays
7996 MapBaseValuesArrayTy CurBasePointers;
7997 MapValuesArrayTy CurPointers;
7998 MapValuesArrayTy CurSizes;
7999 MapFlagsArrayTy CurTypes;
8000 StructRangeInfoTy PartialStruct;
8002 for (const MapInfo &L : M.second) {
8003 assert(!L.Components.empty() &&
8004 "Not expecting declaration with no component lists.");
8006 // Remember the current base pointer index.
8007 unsigned CurrentBasePointersIdx = CurBasePointers.size();
8008 // FIXME: MSVC 2013 seems to require this-> to find the member method.
8009 this->generateInfoForComponentList(
8010 L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8011 CurPointers, CurSizes, CurTypes, PartialStruct,
8012 IsFirstComponentList, L.IsImplicit);
8014 // If this entry relates with a device pointer, set the relevant
8015 // declaration and add the 'return pointer' flag.
8016 if (L.ReturnDevicePointer) {
8017 assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8018 "Unexpected number of mapped base pointers.");
8020 const ValueDecl *RelevantVD =
8021 L.Components.back().getAssociatedDeclaration();
8022 assert(RelevantVD &&
8023 "No relevant declaration related with device pointer??");
8025 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8026 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8028 IsFirstComponentList = false;
8031 // Append any pending zero-length pointers which are struct members and
8032 // used with use_device_ptr.
8033 auto CI = DeferredInfo.find(M.first);
8034 if (CI != DeferredInfo.end()) {
8035 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8036 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8037 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8038 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8039 CurBasePointers.emplace_back(BasePtr, L.VD);
8040 CurPointers.push_back(Ptr);
8041 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8042 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8043 // value MEMBER_OF=FFFF so that the entry is later updated with the
8044 // correct value of MEMBER_OF.
8045 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8050 // If there is an entry in PartialStruct it means we have a struct with
8051 // individual members mapped. Emit an extra combined entry.
8052 if (PartialStruct.Base.isValid())
8053 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8056 // We need to append the results of this capture to what we already have.
8057 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8058 Pointers.append(CurPointers.begin(), CurPointers.end());
8059 Sizes.append(CurSizes.begin(), CurSizes.end());
8060 Types.append(CurTypes.begin(), CurTypes.end());
8064 /// Emit capture info for lambdas for variables captured by reference.
8065 void generateInfoForLambdaCaptures(
8066 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8067 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8068 MapFlagsArrayTy &Types,
8069 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8070 const auto *RD = VD->getType()
8072 .getNonReferenceType()
8073 ->getAsCXXRecordDecl();
8074 if (!RD || !RD->isLambda())
8076 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8077 LValue VDLVal = CGF.MakeAddrLValue(
8078 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8079 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8080 FieldDecl *ThisCapture = nullptr;
8081 RD->getCaptureFields(Captures, ThisCapture);
8084 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8085 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8086 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8087 BasePointers.push_back(ThisLVal.getPointer());
8088 Pointers.push_back(ThisLValVal.getPointer());
8090 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8091 CGF.Int64Ty, /*isSigned=*/true));
8092 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8093 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8095 for (const LambdaCapture &LC : RD->captures()) {
8096 if (!LC.capturesVariable())
8098 const VarDecl *VD = LC.getCapturedVar();
8099 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8101 auto It = Captures.find(VD);
8102 assert(It != Captures.end() && "Found lambda capture without field.");
8103 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8104 if (LC.getCaptureKind() == LCK_ByRef) {
8105 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8106 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8107 BasePointers.push_back(VarLVal.getPointer());
8108 Pointers.push_back(VarLValVal.getPointer());
8109 Sizes.push_back(CGF.Builder.CreateIntCast(
8111 VD->getType().getCanonicalType().getNonReferenceType()),
8112 CGF.Int64Ty, /*isSigned=*/true));
8114 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8115 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8116 BasePointers.push_back(VarLVal.getPointer());
8117 Pointers.push_back(VarRVal.getScalarVal());
8118 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8120 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8121 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8125 /// Set correct indices for lambdas captures.
8126 void adjustMemberOfForLambdaCaptures(
8127 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8128 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8129 MapFlagsArrayTy &Types) const {
8130 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8131 // Set correct member_of idx for all implicit lambda captures.
8132 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8133 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8135 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8136 assert(BasePtr && "Unable to find base lambda address.");
8138 for (unsigned J = I; J > 0; --J) {
8139 unsigned Idx = J - 1;
8140 if (Pointers[Idx] != BasePtr)
8145 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8146 // All other current entries will be MEMBER_OF the combined entry
8147 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8148 // 0xFFFF in the MEMBER_OF field).
8149 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8150 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8154 /// Generate the base pointers, section pointers, sizes and map types
8155 /// associated to a given capture.
8156 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8158 MapBaseValuesArrayTy &BasePointers,
8159 MapValuesArrayTy &Pointers,
8160 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8161 StructRangeInfoTy &PartialStruct) const {
8162 assert(!Cap->capturesVariableArrayType() &&
8163 "Not expecting to generate map info for a variable array type!");
8165 // We need to know when we generating information for the first component
8166 const ValueDecl *VD = Cap->capturesThis()
8168 : Cap->getCapturedVar()->getCanonicalDecl();
8170 // If this declaration appears in a is_device_ptr clause we just have to
8171 // pass the pointer by value. If it is a reference to a declaration, we just
8173 if (DevPointersMap.count(VD)) {
8174 BasePointers.emplace_back(Arg, VD);
8175 Pointers.push_back(Arg);
8177 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178 CGF.Int64Ty, /*isSigned=*/true));
8179 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8184 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8185 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8186 SmallVector<MapData, 4> DeclComponentLists;
8187 // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8188 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8189 for (const auto &L : C->decl_component_lists(VD)) {
8190 assert(L.first == VD &&
8191 "We got information for the wrong declaration??");
8192 assert(!L.second.empty() &&
8193 "Not expecting declaration with no component lists.");
8194 DeclComponentLists.emplace_back(L.second, C->getMapType(),
8195 C->getMapTypeModifiers(),
8200 // Find overlapping elements (including the offset from the base element).
8201 llvm::SmallDenseMap<
8204 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8208 for (const MapData &L : DeclComponentLists) {
8209 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8210 OpenMPMapClauseKind MapType;
8211 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8213 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8215 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8216 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8217 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8218 auto CI = Components.rbegin();
8219 auto CE = Components.rend();
8220 auto SI = Components1.rbegin();
8221 auto SE = Components1.rend();
8222 for (; CI != CE && SI != SE; ++CI, ++SI) {
8223 if (CI->getAssociatedExpression()->getStmtClass() !=
8224 SI->getAssociatedExpression()->getStmtClass())
8226 // Are we dealing with different variables/fields?
8227 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8230 // Found overlapping if, at least for one component, reached the head of
8231 // the components list.
8232 if (CI == CE || SI == SE) {
8233 assert((CI != CE || SI != SE) &&
8234 "Unexpected full match of the mapping components.");
8235 const MapData &BaseData = CI == CE ? L : L1;
8236 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8237 SI == SE ? Components : Components1;
8238 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8239 OverlappedElements.getSecond().push_back(SubData);
8243 // Sort the overlapped elements for each item.
8244 llvm::SmallVector<const FieldDecl *, 4> Layout;
8245 if (!OverlappedData.empty()) {
8246 if (const auto *CRD =
8247 VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8248 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8250 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8251 Layout.append(RD->field_begin(), RD->field_end());
8254 for (auto &Pair : OverlappedData) {
8258 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8259 OMPClauseMappableExprCommon::MappableExprComponentListRef
8261 auto CI = First.rbegin();
8262 auto CE = First.rend();
8263 auto SI = Second.rbegin();
8264 auto SE = Second.rend();
8265 for (; CI != CE && SI != SE; ++CI, ++SI) {
8266 if (CI->getAssociatedExpression()->getStmtClass() !=
8267 SI->getAssociatedExpression()->getStmtClass())
8269 // Are we dealing with different variables/fields?
8270 if (CI->getAssociatedDeclaration() !=
8271 SI->getAssociatedDeclaration())
8275 // Lists contain the same elements.
8276 if (CI == CE && SI == SE)
8279 // List with less elements is less than list with more elements.
8280 if (CI == CE || SI == SE)
8283 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8284 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8285 if (FD1->getParent() == FD2->getParent())
8286 return FD1->getFieldIndex() < FD2->getFieldIndex();
8288 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8289 return FD == FD1 || FD == FD2;
8295 // Associated with a capture, because the mapping flags depend on it.
8296 // Go through all of the elements with the overlapped elements.
8297 for (const auto &Pair : OverlappedData) {
8298 const MapData &L = *Pair.getFirst();
8299 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8300 OpenMPMapClauseKind MapType;
8301 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8303 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8304 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8305 OverlappedComponents = Pair.getSecond();
8306 bool IsFirstComponentList = true;
8307 generateInfoForComponentList(MapType, MapModifiers, Components,
8308 BasePointers, Pointers, Sizes, Types,
8309 PartialStruct, IsFirstComponentList,
8310 IsImplicit, OverlappedComponents);
8312 // Go through other elements without overlapped elements.
8313 bool IsFirstComponentList = OverlappedData.empty();
8314 for (const MapData &L : DeclComponentLists) {
8315 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8316 OpenMPMapClauseKind MapType;
8317 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8319 std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8320 auto It = OverlappedData.find(&L);
8321 if (It == OverlappedData.end())
8322 generateInfoForComponentList(MapType, MapModifiers, Components,
8323 BasePointers, Pointers, Sizes, Types,
8324 PartialStruct, IsFirstComponentList,
8326 IsFirstComponentList = false;
8330 /// Generate the base pointers, section pointers, sizes and map types
8331 /// associated with the declare target link variables.
8332 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8333 MapValuesArrayTy &Pointers,
8334 MapValuesArrayTy &Sizes,
8335 MapFlagsArrayTy &Types) const {
8336 // Map other list items in the map clause which are not captured variables
8337 // but "declare target link" global variables.
8338 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8339 for (const auto &L : C->component_lists()) {
8342 const auto *VD = dyn_cast<VarDecl>(L.first);
8345 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
8346 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8347 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8348 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8350 StructRangeInfoTy PartialStruct;
8351 generateInfoForComponentList(
8352 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8353 Pointers, Sizes, Types, PartialStruct,
8354 /*IsFirstComponentList=*/true, C->isImplicit());
8355 assert(!PartialStruct.Base.isValid() &&
8356 "No partial structs for declare target link expected.");
8361 /// Generate the default map information for a given capture \a CI,
8362 /// record field declaration \a RI and captured value \a CV.
8363 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8364 const FieldDecl &RI, llvm::Value *CV,
8365 MapBaseValuesArrayTy &CurBasePointers,
8366 MapValuesArrayTy &CurPointers,
8367 MapValuesArrayTy &CurSizes,
8368 MapFlagsArrayTy &CurMapTypes) const {
8369 bool IsImplicit = true;
8370 // Do the default mapping.
8371 if (CI.capturesThis()) {
8372 CurBasePointers.push_back(CV);
8373 CurPointers.push_back(CV);
8374 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8376 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8377 CGF.Int64Ty, /*isSigned=*/true));
8378 // Default map type.
8379 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8380 } else if (CI.capturesVariableByCopy()) {
8381 CurBasePointers.push_back(CV);
8382 CurPointers.push_back(CV);
8383 if (!RI.getType()->isAnyPointerType()) {
8384 // We have to signal to the runtime captures passed by value that are
8386 CurMapTypes.push_back(OMP_MAP_LITERAL);
8387 CurSizes.push_back(CGF.Builder.CreateIntCast(
8388 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8390 // Pointers are implicitly mapped with a zero size and no flags
8391 // (other than first map that is added for all implicit maps).
8392 CurMapTypes.push_back(OMP_MAP_NONE);
8393 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8395 const VarDecl *VD = CI.getCapturedVar();
8396 auto I = FirstPrivateDecls.find(VD);
8397 if (I != FirstPrivateDecls.end())
8398 IsImplicit = I->getSecond();
8400 assert(CI.capturesVariable() && "Expected captured reference.");
8401 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8402 QualType ElementType = PtrTy->getPointeeType();
8403 CurSizes.push_back(CGF.Builder.CreateIntCast(
8404 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8405 // The default map type for a scalar/complex type is 'to' because by
8406 // default the value doesn't have to be retrieved. For an aggregate
8407 // type, the default is 'tofrom'.
8408 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8409 const VarDecl *VD = CI.getCapturedVar();
8410 auto I = FirstPrivateDecls.find(VD);
8411 if (I != FirstPrivateDecls.end() &&
8412 VD->getType().isConstant(CGF.getContext())) {
8413 llvm::Constant *Addr =
8414 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8415 // Copy the value of the original variable to the new global copy.
8416 CGF.Builder.CreateMemCpy(
8417 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8418 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8419 CurSizes.back(), /*IsVolatile=*/false);
8420 // Use new global variable as the base pointers.
8421 CurBasePointers.push_back(Addr);
8422 CurPointers.push_back(Addr);
8424 CurBasePointers.push_back(CV);
8425 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8426 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8427 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8428 AlignmentSource::Decl));
8429 CurPointers.push_back(PtrAddr.getPointer());
8431 CurPointers.push_back(CV);
8434 if (I != FirstPrivateDecls.end())
8435 IsImplicit = I->getSecond();
8437 // Every default map produces a single argument which is a target parameter.
8438 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8440 // Add flag stating this is an implicit map.
8442 CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8445 } // anonymous namespace
8447 /// Emit the arrays used to pass the captures and map information to the
8448 /// offloading runtime library. If there is no map or capture information,
8449 /// return nullptr by reference.
8451 emitOffloadingArrays(CodeGenFunction &CGF,
8452 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
8453 MappableExprsHandler::MapValuesArrayTy &Pointers,
8454 MappableExprsHandler::MapValuesArrayTy &Sizes,
8455 MappableExprsHandler::MapFlagsArrayTy &MapTypes,
8456 CGOpenMPRuntime::TargetDataInfo &Info) {
8457 CodeGenModule &CGM = CGF.CGM;
8458 ASTContext &Ctx = CGF.getContext();
8460 // Reset the array information.
8461 Info.clearArrayInfo();
8462 Info.NumberOfPtrs = BasePointers.size();
8464 if (Info.NumberOfPtrs) {
8465 // Detect if we have any capture size requiring runtime evaluation of the
8466 // size so that a constant array could be eventually used.
8467 bool hasRuntimeEvaluationCaptureSize = false;
8468 for (llvm::Value *S : Sizes)
8469 if (!isa<llvm::Constant>(S)) {
8470 hasRuntimeEvaluationCaptureSize = true;
8474 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8475 QualType PointerArrayType =
8476 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8477 /*IndexTypeQuals=*/0);
8479 Info.BasePointersArray =
8480 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8481 Info.PointersArray =
8482 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8484 // If we don't have any VLA types or other types that require runtime
8485 // evaluation, we can use a constant array for the map sizes, otherwise we
8486 // need to fill up the arrays as we do for the pointers.
8488 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8489 if (hasRuntimeEvaluationCaptureSize) {
8490 QualType SizeArrayType =
8491 Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8492 /*IndexTypeQuals=*/0);
8494 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8496 // We expect all the sizes to be constant, so we collect them to create
8497 // a constant array.
8498 SmallVector<llvm::Constant *, 16> ConstSizes;
8499 for (llvm::Value *S : Sizes)
8500 ConstSizes.push_back(cast<llvm::Constant>(S));
8502 auto *SizesArrayInit = llvm::ConstantArray::get(
8503 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8504 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8505 auto *SizesArrayGbl = new llvm::GlobalVariable(
8506 CGM.getModule(), SizesArrayInit->getType(),
8507 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8508 SizesArrayInit, Name);
8509 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8510 Info.SizesArray = SizesArrayGbl;
8513 // The map types are always constant so we don't need to generate code to
8514 // fill arrays. Instead, we create an array constant.
8515 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8516 llvm::copy(MapTypes, Mapping.begin());
8517 llvm::Constant *MapTypesArrayInit =
8518 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8519 std::string MaptypesName =
8520 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8521 auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8522 CGM.getModule(), MapTypesArrayInit->getType(),
8523 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8524 MapTypesArrayInit, MaptypesName);
8525 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8526 Info.MapTypesArray = MapTypesArrayGbl;
8528 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8529 llvm::Value *BPVal = *BasePointers[I];
8530 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
8531 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8532 Info.BasePointersArray, 0, I);
8533 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8534 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8535 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8536 CGF.Builder.CreateStore(BPVal, BPAddr);
8538 if (Info.requiresDevicePointerInfo())
8539 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8540 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8542 llvm::Value *PVal = Pointers[I];
8543 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
8544 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8545 Info.PointersArray, 0, I);
8546 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8547 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8548 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8549 CGF.Builder.CreateStore(PVal, PAddr);
8551 if (hasRuntimeEvaluationCaptureSize) {
8552 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
8553 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8557 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8558 CGF.Builder.CreateStore(
8559 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8565 /// Emit the arguments to be passed to the runtime library based on the
8566 /// arrays of pointers, sizes and map types.
8567 static void emitOffloadingArraysArgument(
8568 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8569 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8570 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8571 CodeGenModule &CGM = CGF.CGM;
8572 if (Info.NumberOfPtrs) {
8573 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8574 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8575 Info.BasePointersArray,
8576 /*Idx0=*/0, /*Idx1=*/0);
8577 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8578 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8582 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8583 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8584 /*Idx0=*/0, /*Idx1=*/0);
8585 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8586 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8591 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8592 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8593 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8595 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8599 /// Check for inner distribute directive.
8600 static const OMPExecutableDirective *
8601 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8602 const auto *CS = D.getInnermostCapturedStmt();
8604 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8605 const Stmt *ChildStmt =
8606 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8608 if (const auto *NestedDir =
8609 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8610 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8611 switch (D.getDirectiveKind()) {
8613 if (isOpenMPDistributeDirective(DKind))
8615 if (DKind == OMPD_teams) {
8616 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8617 /*IgnoreCaptured=*/true);
8620 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8621 if (const auto *NND =
8622 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8623 DKind = NND->getDirectiveKind();
8624 if (isOpenMPDistributeDirective(DKind))
8629 case OMPD_target_teams:
8630 if (isOpenMPDistributeDirective(DKind))
8633 case OMPD_target_parallel:
8634 case OMPD_target_simd:
8635 case OMPD_target_parallel_for:
8636 case OMPD_target_parallel_for_simd:
8638 case OMPD_target_teams_distribute:
8639 case OMPD_target_teams_distribute_simd:
8640 case OMPD_target_teams_distribute_parallel_for:
8641 case OMPD_target_teams_distribute_parallel_for_simd:
8644 case OMPD_parallel_for:
8645 case OMPD_parallel_sections:
8647 case OMPD_parallel_for_simd:
8649 case OMPD_cancellation_point:
8651 case OMPD_threadprivate:
8660 case OMPD_taskyield:
8663 case OMPD_taskgroup:
8667 case OMPD_target_data:
8668 case OMPD_target_exit_data:
8669 case OMPD_target_enter_data:
8670 case OMPD_distribute:
8671 case OMPD_distribute_simd:
8672 case OMPD_distribute_parallel_for:
8673 case OMPD_distribute_parallel_for_simd:
8674 case OMPD_teams_distribute:
8675 case OMPD_teams_distribute_simd:
8676 case OMPD_teams_distribute_parallel_for:
8677 case OMPD_teams_distribute_parallel_for_simd:
8678 case OMPD_target_update:
8679 case OMPD_declare_simd:
8680 case OMPD_declare_target:
8681 case OMPD_end_declare_target:
8682 case OMPD_declare_reduction:
8683 case OMPD_declare_mapper:
8685 case OMPD_taskloop_simd:
8688 llvm_unreachable("Unexpected directive.");
8695 void CGOpenMPRuntime::emitTargetNumIterationsCall(
8696 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8697 const llvm::function_ref<llvm::Value *(
8698 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8699 OpenMPDirectiveKind Kind = D.getDirectiveKind();
8700 const OMPExecutableDirective *TD = &D;
8701 // Get nested teams distribute kind directive, if any.
8702 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
8703 TD = getNestedDistributeDirective(CGM.getContext(), D);
8706 const auto *LD = cast<OMPLoopDirective>(TD);
8707 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8708 PrePostActionTy &) {
8709 llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8711 // Emit device ID if any.
8712 llvm::Value *DeviceID;
8714 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8715 CGF.Int64Ty, /*isSigned=*/true);
8717 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8719 llvm::Value *Args[] = {DeviceID, NumIterations};
8720 CGF.EmitRuntimeCall(
8721 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
8723 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8726 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
8727 const OMPExecutableDirective &D,
8728 llvm::Function *OutlinedFn,
8729 llvm::Value *OutlinedFnID,
8730 const Expr *IfCond, const Expr *Device) {
8731 if (!CGF.HaveInsertPoint())
8734 assert(OutlinedFn && "Invalid outlined function!");
8736 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8737 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
8738 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8739 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8740 PrePostActionTy &) {
8741 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8743 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8745 CodeGenFunction::OMPTargetDataInfo InputInfo;
8746 llvm::Value *MapTypesArray = nullptr;
8747 // Fill up the pointer arrays and transfer execution to the device.
8748 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8749 &MapTypesArray, &CS, RequiresOuterTask,
8750 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8751 // On top of the arrays that were filled up, the target offloading call
8752 // takes as arguments the device id as well as the host pointer. The host
8753 // pointer is used by the runtime library to identify the current target
8754 // region, so it only has to be unique and not necessarily point to
8755 // anything. It could be the pointer to the outlined function that
8756 // implements the target region, but we aren't using that so that the
8757 // compiler doesn't need to keep that, and could therefore inline the host
8758 // function if proven worthwhile during optimization.
8760 // From this point on, we need to have an ID of the target region defined.
8761 assert(OutlinedFnID && "Invalid outlined function ID!");
8763 // Emit device ID if any.
8764 llvm::Value *DeviceID;
8766 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8767 CGF.Int64Ty, /*isSigned=*/true);
8769 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8772 // Emit the number of elements in the offloading arrays.
8773 llvm::Value *PointerNum =
8774 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8776 // Return value of the runtime offloading call.
8777 llvm::Value *Return;
8779 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8780 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8782 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8783 // The target region is an outlined function launched by the runtime
8784 // via calls __tgt_target() or __tgt_target_teams().
8786 // __tgt_target() launches a target region with one team and one thread,
8787 // executing a serial region. This master thread may in turn launch
8788 // more threads within its team upon encountering a parallel region,
8789 // however, no additional teams can be launched on the device.
8791 // __tgt_target_teams() launches a target region with one or more teams,
8792 // each with one or more threads. This call is required for target
8793 // constructs such as:
8795 // 'target' / 'teams'
8796 // 'target teams distribute parallel for'
8797 // 'target parallel'
8800 // Note that on the host and CPU targets, the runtime implementation of
8801 // these calls simply call the outlined function without forking threads.
8802 // The outlined functions themselves have runtime calls to
8803 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8804 // the compiler in emitTeamsCall() and emitParallelCall().
8806 // In contrast, on the NVPTX target, the implementation of
8807 // __tgt_target_teams() launches a GPU kernel with the requested number
8808 // of teams and threads so no additional calls to the runtime are required.
8810 // If we have NumTeams defined this means that we have an enclosed teams
8811 // region. Therefore we also expect to have NumThreads defined. These two
8812 // values should be defined in the presence of a teams directive,
8813 // regardless of having any clauses associated. If the user is using teams
8814 // but no clauses, these two values will be the default that should be
8815 // passed to the runtime library - a 32-bit integer with the value zero.
8816 assert(NumThreads && "Thread limit expression should be available along "
8817 "with number of teams.");
8818 llvm::Value *OffloadingArgs[] = {DeviceID,
8821 InputInfo.BasePointersArray.getPointer(),
8822 InputInfo.PointersArray.getPointer(),
8823 InputInfo.SizesArray.getPointer(),
8827 Return = CGF.EmitRuntimeCall(
8828 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
8829 : OMPRTL__tgt_target_teams),
8832 llvm::Value *OffloadingArgs[] = {DeviceID,
8835 InputInfo.BasePointersArray.getPointer(),
8836 InputInfo.PointersArray.getPointer(),
8837 InputInfo.SizesArray.getPointer(),
8839 Return = CGF.EmitRuntimeCall(
8840 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
8841 : OMPRTL__tgt_target),
8845 // Check the error code and execute the host version if required.
8846 llvm::BasicBlock *OffloadFailedBlock =
8847 CGF.createBasicBlock("omp_offload.failed");
8848 llvm::BasicBlock *OffloadContBlock =
8849 CGF.createBasicBlock("omp_offload.cont");
8850 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8851 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8853 CGF.EmitBlock(OffloadFailedBlock);
8854 if (RequiresOuterTask) {
8855 CapturedVars.clear();
8856 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8858 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8859 CGF.EmitBranch(OffloadContBlock);
8861 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8864 // Notify that the host version must be executed.
8865 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8866 RequiresOuterTask](CodeGenFunction &CGF,
8867 PrePostActionTy &) {
8868 if (RequiresOuterTask) {
8869 CapturedVars.clear();
8870 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8872 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8875 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8876 &CapturedVars, RequiresOuterTask,
8877 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8878 // Fill up the arrays with all the captured variables.
8879 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
8880 MappableExprsHandler::MapValuesArrayTy Pointers;
8881 MappableExprsHandler::MapValuesArrayTy Sizes;
8882 MappableExprsHandler::MapFlagsArrayTy MapTypes;
8884 // Get mappable expression information.
8885 MappableExprsHandler MEHandler(D, CGF);
8886 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8888 auto RI = CS.getCapturedRecordDecl()->field_begin();
8889 auto CV = CapturedVars.begin();
8890 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8891 CE = CS.capture_end();
8892 CI != CE; ++CI, ++RI, ++CV) {
8893 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
8894 MappableExprsHandler::MapValuesArrayTy CurPointers;
8895 MappableExprsHandler::MapValuesArrayTy CurSizes;
8896 MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
8897 MappableExprsHandler::StructRangeInfoTy PartialStruct;
8899 // VLA sizes are passed to the outlined region by copy and do not have map
8900 // information associated.
8901 if (CI->capturesVariableArrayType()) {
8902 CurBasePointers.push_back(*CV);
8903 CurPointers.push_back(*CV);
8904 CurSizes.push_back(CGF.Builder.CreateIntCast(
8905 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
8906 // Copy to the device as an argument. No need to retrieve it.
8907 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8908 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
8909 MappableExprsHandler::OMP_MAP_IMPLICIT);
8911 // If we have any information in the map clause, we use it, otherwise we
8912 // just do a default mapping.
8913 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8914 CurSizes, CurMapTypes, PartialStruct);
8915 if (CurBasePointers.empty())
8916 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8917 CurPointers, CurSizes, CurMapTypes);
8918 // Generate correct mapping for variables captured by reference in
8920 if (CI->capturesVariable())
8921 MEHandler.generateInfoForLambdaCaptures(
8922 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8923 CurMapTypes, LambdaPointers);
8925 // We expect to have at least an element of information for this capture.
8926 assert(!CurBasePointers.empty() &&
8927 "Non-existing map pointer for capture!");
8928 assert(CurBasePointers.size() == CurPointers.size() &&
8929 CurBasePointers.size() == CurSizes.size() &&
8930 CurBasePointers.size() == CurMapTypes.size() &&
8931 "Inconsistent map information sizes!");
8933 // If there is an entry in PartialStruct it means we have a struct with
8934 // individual members mapped. Emit an extra combined entry.
8935 if (PartialStruct.Base.isValid())
8936 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8937 CurMapTypes, PartialStruct);
8939 // We need to append the results of this capture to what we already have.
8940 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8941 Pointers.append(CurPointers.begin(), CurPointers.end());
8942 Sizes.append(CurSizes.begin(), CurSizes.end());
8943 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8945 // Adjust MEMBER_OF flags for the lambdas captures.
8946 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8947 Pointers, MapTypes);
8948 // Map other list items in the map clause which are not captured variables
8949 // but "declare target link" global variables.
8950 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8953 TargetDataInfo Info;
8954 // Fill up the arrays and create the arguments.
8955 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8956 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
8957 Info.PointersArray, Info.SizesArray,
8958 Info.MapTypesArray, Info);
8959 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8960 InputInfo.BasePointersArray =
8961 Address(Info.BasePointersArray, CGM.getPointerAlign());
8962 InputInfo.PointersArray =
8963 Address(Info.PointersArray, CGM.getPointerAlign());
8964 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8965 MapTypesArray = Info.MapTypesArray;
8966 if (RequiresOuterTask)
8967 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8969 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8972 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8973 CodeGenFunction &CGF, PrePostActionTy &) {
8974 if (RequiresOuterTask) {
8975 CodeGenFunction::OMPTargetDataInfo InputInfo;
8976 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8978 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8982 // If we have a target function ID it means that we need to support
8983 // offloading, otherwise, just execute on the host. We need to execute on host
8984 // regardless of the conditional in the if clause if, e.g., the user do not
8985 // specify target triples.
8988 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8990 RegionCodeGenTy ThenRCG(TargetThenGen);
8994 RegionCodeGenTy ElseRCG(TargetElseGen);
8999 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9000 StringRef ParentName) {
9004 // Codegen OMP target directives that offload compute to the device.
9005 bool RequiresDeviceCodegen =
9006 isa<OMPExecutableDirective>(S) &&
9007 isOpenMPTargetExecutionDirective(
9008 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9010 if (RequiresDeviceCodegen) {
9011 const auto &E = *cast<OMPExecutableDirective>(S);
9015 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
9018 // Is this a target region that should not be emitted as an entry point? If
9019 // so just signal we are done with this target region.
9020 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
9024 switch (E.getDirectiveKind()) {
9026 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9027 cast<OMPTargetDirective>(E));
9029 case OMPD_target_parallel:
9030 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9031 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9033 case OMPD_target_teams:
9034 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9035 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9037 case OMPD_target_teams_distribute:
9038 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9039 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9041 case OMPD_target_teams_distribute_simd:
9042 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9043 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9045 case OMPD_target_parallel_for:
9046 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9047 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9049 case OMPD_target_parallel_for_simd:
9050 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9051 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9053 case OMPD_target_simd:
9054 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9055 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9057 case OMPD_target_teams_distribute_parallel_for:
9058 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9060 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9062 case OMPD_target_teams_distribute_parallel_for_simd:
9064 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9066 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9070 case OMPD_parallel_for:
9071 case OMPD_parallel_sections:
9073 case OMPD_parallel_for_simd:
9075 case OMPD_cancellation_point:
9077 case OMPD_threadprivate:
9086 case OMPD_taskyield:
9089 case OMPD_taskgroup:
9093 case OMPD_target_data:
9094 case OMPD_target_exit_data:
9095 case OMPD_target_enter_data:
9096 case OMPD_distribute:
9097 case OMPD_distribute_simd:
9098 case OMPD_distribute_parallel_for:
9099 case OMPD_distribute_parallel_for_simd:
9100 case OMPD_teams_distribute:
9101 case OMPD_teams_distribute_simd:
9102 case OMPD_teams_distribute_parallel_for:
9103 case OMPD_teams_distribute_parallel_for_simd:
9104 case OMPD_target_update:
9105 case OMPD_declare_simd:
9106 case OMPD_declare_target:
9107 case OMPD_end_declare_target:
9108 case OMPD_declare_reduction:
9109 case OMPD_declare_mapper:
9111 case OMPD_taskloop_simd:
9114 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9119 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9120 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9123 scanForTargetRegionsFunctions(
9124 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9128 // If this is a lambda function, look into its body.
9129 if (const auto *L = dyn_cast<LambdaExpr>(S))
9132 // Keep looking for target regions recursively.
9133 for (const Stmt *II : S->children())
9134 scanForTargetRegionsFunctions(II, ParentName);
9137 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9138 // If emitting code for the host, we do not process FD here. Instead we do
9139 // the normal code generation.
9140 if (!CGM.getLangOpts().OpenMPIsDevice)
9143 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9144 StringRef Name = CGM.getMangledName(GD);
9145 // Try to detect target regions in the function.
9146 if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9147 scanForTargetRegionsFunctions(FD->getBody(), Name);
9149 // Do not to emit function if it is not marked as declare target.
9150 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9151 AlreadyEmittedTargetFunctions.count(Name) == 0;
9154 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9155 if (!CGM.getLangOpts().OpenMPIsDevice)
9158 // Check if there are Ctors/Dtors in this declaration and look for target
9159 // regions in it. We use the complete variant to produce the kernel name
9161 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9162 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9163 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9164 StringRef ParentName =
9165 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9166 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9168 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9169 StringRef ParentName =
9170 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
9171 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9175 // Do not to emit variable if it is not marked as declare target.
9176 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9177 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9178 cast<VarDecl>(GD.getDecl()));
9179 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9180 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9181 HasRequiresUnifiedSharedMemory)) {
9182 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9189 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF,
9190 const VarDecl *VD) {
9191 assert(VD->getType().isConstant(CGM.getContext()) &&
9192 "Expected constant variable.");
9194 llvm::Constant *Addr;
9195 llvm::GlobalValue::LinkageTypes Linkage;
9196 QualType Ty = VD->getType();
9197 SmallString<128> Buffer;
9202 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID,
9204 llvm::raw_svector_ostream OS(Buffer);
9205 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9206 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9209 Linkage = llvm::GlobalValue::InternalLinkage;
9211 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName,
9212 getDefaultFirstprivateAddressSpace());
9213 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9214 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9215 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9216 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9217 VarName, Addr, VarSize,
9218 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage);
9222 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9223 llvm::Constant *Addr) {
9224 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9225 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9227 if (CGM.getLangOpts().OpenMPIsDevice) {
9228 // Register non-target variables being emitted in device code (debug info
9230 StringRef VarName = CGM.getMangledName(VD);
9231 EmittedNonTargetVariables.try_emplace(VarName, Addr);
9235 // Register declare target variables.
9236 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
9239 llvm::GlobalValue::LinkageTypes Linkage;
9241 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9242 !HasRequiresUnifiedSharedMemory) {
9243 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9244 VarName = CGM.getMangledName(VD);
9245 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
9246 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9247 assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9249 VarSize = CharUnits::Zero();
9251 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9252 // Temp solution to prevent optimizations of the internal variables.
9253 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9254 std::string RefName = getName({VarName, "ref"});
9255 if (!CGM.GetGlobalValue(RefName)) {
9256 llvm::Constant *AddrRef =
9257 getOrCreateInternalVariable(Addr->getType(), RefName);
9258 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9259 GVAddrRef->setConstant(/*Val=*/true);
9260 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9261 GVAddrRef->setInitializer(Addr);
9262 CGM.addCompilerUsedGlobal(GVAddrRef);
9266 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9267 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9268 HasRequiresUnifiedSharedMemory)) &&
9269 "Declare target attribute must link or to with unified memory.");
9270 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9271 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
9273 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
9275 if (CGM.getLangOpts().OpenMPIsDevice) {
9276 VarName = Addr->getName();
9279 VarName = getAddrOfDeclareTargetVar(VD).getName();
9280 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9282 VarSize = CGM.getPointerSize();
9283 Linkage = llvm::GlobalValue::WeakAnyLinkage;
9286 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
9287 VarName, Addr, VarSize, Flags, Linkage);
9290 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9291 if (isa<FunctionDecl>(GD.getDecl()) ||
9292 isa<OMPDeclareReductionDecl>(GD.getDecl()))
9293 return emitTargetFunctions(GD);
9295 return emitTargetGlobalVariable(GD);
9298 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9299 for (const VarDecl *VD : DeferredGlobalVariables) {
9300 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9301 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9304 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9305 !HasRequiresUnifiedSharedMemory) {
9308 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9309 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9310 HasRequiresUnifiedSharedMemory)) &&
9311 "Expected link clause or to clause with unified memory.");
9312 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9317 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
9318 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9319 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
9320 " Expected target-based directive.");
9323 void CGOpenMPRuntime::checkArchForUnifiedAddressing(
9324 const OMPRequiresDecl *D) {
9325 for (const OMPClause *Clause : D->clauselists()) {
9326 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9327 HasRequiresUnifiedSharedMemory = true;
9333 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
9335 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9337 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9338 switch(A->getAllocatorType()) {
9339 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9340 // Not supported, fallback to the default mem space.
9341 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9342 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9343 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9344 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9345 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9346 case OMPAllocateDeclAttr::OMPConstMemAlloc:
9347 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9348 AS = LangAS::Default;
9350 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9351 llvm_unreachable("Expected predefined allocator for the variables with the "
9357 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
9358 return HasRequiresUnifiedSharedMemory;
9361 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
9364 if (CGM.getLangOpts().OpenMPIsDevice) {
9365 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9366 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9370 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
9371 if (CGM.getLangOpts().OpenMPIsDevice)
9372 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9375 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
9376 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9379 StringRef Name = CGM.getMangledName(GD);
9380 const auto *D = cast<FunctionDecl>(GD.getDecl());
9381 // Do not to emit function if it is marked as declare target as it was already
9383 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9384 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9385 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9386 return !F->isDeclaration();
9392 return !AlreadyEmittedTargetFunctions.insert(Name).second;
9395 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
9396 // If we don't have entries or if we are emitting code for the device, we
9397 // don't need to do anything.
9398 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9399 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9400 (OffloadEntriesInfoManager.empty() &&
9401 !HasEmittedDeclareTargetRegion &&
9402 !HasEmittedTargetRegion))
9405 // Create and register the function that handles the requires directives.
9406 ASTContext &C = CGM.getContext();
9408 llvm::Function *RequiresRegFn;
9410 CodeGenFunction CGF(CGM);
9411 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9412 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9413 std::string ReqName = getName({"omp_offloading", "requires_reg"});
9414 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9415 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9416 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9417 // TODO: check for other requires clauses.
9418 // The requires directive takes effect only when a target region is
9419 // present in the compilation unit. Otherwise it is ignored and not
9420 // passed to the runtime. This avoids the runtime from throwing an error
9421 // for mismatching requires clauses across compilation units that don't
9422 // contain at least 1 target region.
9423 assert((HasEmittedTargetRegion ||
9424 HasEmittedDeclareTargetRegion ||
9425 !OffloadEntriesInfoManager.empty()) &&
9426 "Target or declare target region expected.");
9427 if (HasRequiresUnifiedSharedMemory)
9428 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9429 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
9430 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9431 CGF.FinishFunction();
9433 return RequiresRegFn;
9436 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
9437 // If we have offloading in the current module, we need to emit the entries
9438 // now and register the offloading descriptor.
9439 createOffloadEntriesAndInfoMetadata();
9441 // Create and register the offloading binary descriptors. This is the main
9442 // entity that captures all the information about offloading in the current
9443 // compilation unit.
9444 return createOffloadingBinaryDescriptorRegistration();
9447 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
9448 const OMPExecutableDirective &D,
9450 llvm::Function *OutlinedFn,
9451 ArrayRef<llvm::Value *> CapturedVars) {
9452 if (!CGF.HaveInsertPoint())
9455 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9456 CodeGenFunction::RunCleanupsScope Scope(CGF);
9458 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9459 llvm::Value *Args[] = {
9461 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9462 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9463 llvm::SmallVector<llvm::Value *, 16> RealArgs;
9464 RealArgs.append(std::begin(Args), std::end(Args));
9465 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9467 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9468 CGF.EmitRuntimeCall(RTLFn, RealArgs);
9471 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
9472 const Expr *NumTeams,
9473 const Expr *ThreadLimit,
9474 SourceLocation Loc) {
9475 if (!CGF.HaveInsertPoint())
9478 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9480 llvm::Value *NumTeamsVal =
9482 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9483 CGF.CGM.Int32Ty, /* isSigned = */ true)
9484 : CGF.Builder.getInt32(0);
9486 llvm::Value *ThreadLimitVal =
9488 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9489 CGF.CGM.Int32Ty, /* isSigned = */ true)
9490 : CGF.Builder.getInt32(0);
9492 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9493 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9495 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
9499 void CGOpenMPRuntime::emitTargetDataCalls(
9500 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9501 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9502 if (!CGF.HaveInsertPoint())
9505 // Action used to replace the default codegen action and turn privatization
9507 PrePostActionTy NoPrivAction;
9509 // Generate the code for the opening of the data environment. Capture all the
9510 // arguments of the runtime call by reference because they are used in the
9511 // closing of the region.
9512 auto &&BeginThenGen = [this, &D, Device, &Info,
9513 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9514 // Fill up the arrays with all the mapped variables.
9515 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9516 MappableExprsHandler::MapValuesArrayTy Pointers;
9517 MappableExprsHandler::MapValuesArrayTy Sizes;
9518 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9520 // Get map clause information.
9521 MappableExprsHandler MCHandler(D, CGF);
9522 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9524 // Fill up the arrays and create the arguments.
9525 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9527 llvm::Value *BasePointersArrayArg = nullptr;
9528 llvm::Value *PointersArrayArg = nullptr;
9529 llvm::Value *SizesArrayArg = nullptr;
9530 llvm::Value *MapTypesArrayArg = nullptr;
9531 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9532 SizesArrayArg, MapTypesArrayArg, Info);
9534 // Emit device ID if any.
9535 llvm::Value *DeviceID = nullptr;
9537 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9538 CGF.Int64Ty, /*isSigned=*/true);
9540 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9543 // Emit the number of elements in the offloading arrays.
9544 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9546 llvm::Value *OffloadingArgs[] = {
9547 DeviceID, PointerNum, BasePointersArrayArg,
9548 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9549 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin),
9552 // If device pointer privatization is required, emit the body of the region
9553 // here. It will have to be duplicated: with and without privatization.
9554 if (!Info.CaptureDeviceAddrMap.empty())
9558 // Generate code for the closing of the data region.
9559 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9560 PrePostActionTy &) {
9561 assert(Info.isValid() && "Invalid data environment closing arguments.");
9563 llvm::Value *BasePointersArrayArg = nullptr;
9564 llvm::Value *PointersArrayArg = nullptr;
9565 llvm::Value *SizesArrayArg = nullptr;
9566 llvm::Value *MapTypesArrayArg = nullptr;
9567 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9568 SizesArrayArg, MapTypesArrayArg, Info);
9570 // Emit device ID if any.
9571 llvm::Value *DeviceID = nullptr;
9573 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9574 CGF.Int64Ty, /*isSigned=*/true);
9576 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9579 // Emit the number of elements in the offloading arrays.
9580 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9582 llvm::Value *OffloadingArgs[] = {
9583 DeviceID, PointerNum, BasePointersArrayArg,
9584 PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9585 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end),
9589 // If we need device pointer privatization, we need to emit the body of the
9590 // region with no privatization in the 'else' branch of the conditional.
9591 // Otherwise, we don't have to do anything.
9592 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9593 PrePostActionTy &) {
9594 if (!Info.CaptureDeviceAddrMap.empty()) {
9595 CodeGen.setAction(NoPrivAction);
9600 // We don't have to do anything to close the region if the if clause evaluates
9602 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9605 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9607 RegionCodeGenTy RCG(BeginThenGen);
9611 // If we don't require privatization of device pointers, we emit the body in
9612 // between the runtime calls. This avoids duplicating the body code.
9613 if (Info.CaptureDeviceAddrMap.empty()) {
9614 CodeGen.setAction(NoPrivAction);
9619 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9621 RegionCodeGenTy RCG(EndThenGen);
9626 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
9627 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9628 const Expr *Device) {
9629 if (!CGF.HaveInsertPoint())
9632 assert((isa<OMPTargetEnterDataDirective>(D) ||
9633 isa<OMPTargetExitDataDirective>(D) ||
9634 isa<OMPTargetUpdateDirective>(D)) &&
9635 "Expecting either target enter, exit data, or update directives.");
9637 CodeGenFunction::OMPTargetDataInfo InputInfo;
9638 llvm::Value *MapTypesArray = nullptr;
9639 // Generate the code for the opening of the data environment.
9640 auto &&ThenGen = [this, &D, Device, &InputInfo,
9641 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9642 // Emit device ID if any.
9643 llvm::Value *DeviceID = nullptr;
9645 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9646 CGF.Int64Ty, /*isSigned=*/true);
9648 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9651 // Emit the number of elements in the offloading arrays.
9652 llvm::Constant *PointerNum =
9653 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9655 llvm::Value *OffloadingArgs[] = {DeviceID,
9657 InputInfo.BasePointersArray.getPointer(),
9658 InputInfo.PointersArray.getPointer(),
9659 InputInfo.SizesArray.getPointer(),
9662 // Select the right runtime function call for each expected standalone
9664 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9665 OpenMPRTLFunction RTLFn;
9666 switch (D.getDirectiveKind()) {
9667 case OMPD_target_enter_data:
9668 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9669 : OMPRTL__tgt_target_data_begin;
9671 case OMPD_target_exit_data:
9672 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9673 : OMPRTL__tgt_target_data_end;
9675 case OMPD_target_update:
9676 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9677 : OMPRTL__tgt_target_data_update;
9681 case OMPD_parallel_for:
9682 case OMPD_parallel_sections:
9684 case OMPD_parallel_for_simd:
9686 case OMPD_cancellation_point:
9688 case OMPD_threadprivate:
9697 case OMPD_taskyield:
9700 case OMPD_taskgroup:
9704 case OMPD_target_data:
9705 case OMPD_distribute:
9706 case OMPD_distribute_simd:
9707 case OMPD_distribute_parallel_for:
9708 case OMPD_distribute_parallel_for_simd:
9709 case OMPD_teams_distribute:
9710 case OMPD_teams_distribute_simd:
9711 case OMPD_teams_distribute_parallel_for:
9712 case OMPD_teams_distribute_parallel_for_simd:
9713 case OMPD_declare_simd:
9714 case OMPD_declare_target:
9715 case OMPD_end_declare_target:
9716 case OMPD_declare_reduction:
9717 case OMPD_declare_mapper:
9719 case OMPD_taskloop_simd:
9721 case OMPD_target_simd:
9722 case OMPD_target_teams_distribute:
9723 case OMPD_target_teams_distribute_simd:
9724 case OMPD_target_teams_distribute_parallel_for:
9725 case OMPD_target_teams_distribute_parallel_for_simd:
9726 case OMPD_target_teams:
9727 case OMPD_target_parallel:
9728 case OMPD_target_parallel_for:
9729 case OMPD_target_parallel_for_simd:
9732 llvm_unreachable("Unexpected standalone target data directive.");
9735 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9738 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9739 CodeGenFunction &CGF, PrePostActionTy &) {
9740 // Fill up the arrays with all the mapped variables.
9741 MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
9742 MappableExprsHandler::MapValuesArrayTy Pointers;
9743 MappableExprsHandler::MapValuesArrayTy Sizes;
9744 MappableExprsHandler::MapFlagsArrayTy MapTypes;
9746 // Get map clause information.
9747 MappableExprsHandler MEHandler(D, CGF);
9748 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9750 TargetDataInfo Info;
9751 // Fill up the arrays and create the arguments.
9752 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9753 emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
9754 Info.PointersArray, Info.SizesArray,
9755 Info.MapTypesArray, Info);
9756 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9757 InputInfo.BasePointersArray =
9758 Address(Info.BasePointersArray, CGM.getPointerAlign());
9759 InputInfo.PointersArray =
9760 Address(Info.PointersArray, CGM.getPointerAlign());
9761 InputInfo.SizesArray =
9762 Address(Info.SizesArray, CGM.getPointerAlign());
9763 MapTypesArray = Info.MapTypesArray;
9764 if (D.hasClausesOfKind<OMPDependClause>())
9765 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9767 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9771 emitOMPIfClause(CGF, IfCond, TargetThenGen,
9772 [](CodeGenFunction &CGF, PrePostActionTy &) {});
9774 RegionCodeGenTy ThenRCG(TargetThenGen);
9780 /// Kind of parameter in a function with 'declare simd' directive.
9781 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9782 /// Attribute set of the parameter.
9783 struct ParamAttrTy {
9784 ParamKindTy Kind = Vector;
9785 llvm::APSInt StrideOrArg;
9786 llvm::APSInt Alignment;
9790 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9791 ArrayRef<ParamAttrTy> ParamAttrs) {
9792 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9793 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9794 // of that clause. The VLEN value must be power of 2.
9795 // In other case the notion of the function`s "characteristic data type" (CDT)
9796 // is used to compute the vector length.
9797 // CDT is defined in the following order:
9798 // a) For non-void function, the CDT is the return type.
9799 // b) If the function has any non-uniform, non-linear parameters, then the
9800 // CDT is the type of the first such parameter.
9801 // c) If the CDT determined by a) or b) above is struct, union, or class
9802 // type which is pass-by-value (except for the type that maps to the
9803 // built-in complex data type), the characteristic data type is int.
9804 // d) If none of the above three cases is applicable, the CDT is int.
9805 // The VLEN is then determined based on the CDT and the size of vector
9806 // register of that ISA for which current vector version is generated. The
9807 // VLEN is computed using the formula below:
9808 // VLEN = sizeof(vector_register) / sizeof(CDT),
9809 // where vector register size specified in section 3.2.1 Registers and the
9810 // Stack Frame of original AMD64 ABI document.
9811 QualType RetType = FD->getReturnType();
9812 if (RetType.isNull())
9814 ASTContext &C = FD->getASTContext();
9816 if (!RetType.isNull() && !RetType->isVoidType()) {
9819 unsigned Offset = 0;
9820 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9821 if (ParamAttrs[Offset].Kind == Vector)
9822 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9826 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9827 if (ParamAttrs[I + Offset].Kind == Vector) {
9828 CDT = FD->getParamDecl(I)->getType();
9836 CDT = CDT->getCanonicalTypeUnqualified();
9837 if (CDT->isRecordType() || CDT->isUnionType())
9839 return C.getTypeSize(CDT);
9843 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9844 const llvm::APSInt &VLENVal,
9845 ArrayRef<ParamAttrTy> ParamAttrs,
9846 OMPDeclareSimdDeclAttr::BranchStateTy State) {
9849 unsigned VecRegSize;
9851 ISADataTy ISAData[] = {
9865 llvm::SmallVector<char, 2> Masked;
9867 case OMPDeclareSimdDeclAttr::BS_Undefined:
9868 Masked.push_back('N');
9869 Masked.push_back('M');
9871 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9872 Masked.push_back('N');
9874 case OMPDeclareSimdDeclAttr::BS_Inbranch:
9875 Masked.push_back('M');
9878 for (char Mask : Masked) {
9879 for (const ISADataTy &Data : ISAData) {
9880 SmallString<256> Buffer;
9881 llvm::raw_svector_ostream Out(Buffer);
9882 Out << "_ZGV" << Data.ISA << Mask;
9884 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
9885 assert(NumElts && "Non-zero simdlen/cdtsize expected");
9886 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
9890 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9891 switch (ParamAttr.Kind){
9892 case LinearWithVarStride:
9893 Out << 's' << ParamAttr.StrideOrArg;
9897 if (!!ParamAttr.StrideOrArg)
9898 Out << ParamAttr.StrideOrArg;
9907 if (!!ParamAttr.Alignment)
9908 Out << 'a' << ParamAttr.Alignment;
9910 Out << '_' << Fn->getName();
9911 Fn->addFnAttr(Out.str());
9916 // This are the Functions that are needed to mangle the name of the
9917 // vector functions generated by the compiler, according to the rules
9918 // defined in the "Vector Function ABI specifications for AArch64",
9920 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9922 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9924 /// TODO: Need to implement the behavior for reference marked with a
9925 /// var or no linear modifiers (1.b in the section). For this, we
9926 /// need to extend ParamKindTy to support the linear modifiers.
9927 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9928 QT = QT.getCanonicalType();
9930 if (QT->isVoidType())
9933 if (Kind == ParamKindTy::Uniform)
9936 if (Kind == ParamKindTy::Linear)
9939 // TODO: Handle linear references with modifiers
9941 if (Kind == ParamKindTy::LinearWithVarStride)
9947 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9948 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9949 QT = QT.getCanonicalType();
9950 unsigned Size = C.getTypeSize(QT);
9952 // Only scalars and complex within 16 bytes wide set PVB to true.
9953 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9956 if (QT->isFloatingType())
9959 if (QT->isIntegerType())
9962 if (QT->isPointerType())
9965 // TODO: Add support for complex types (section 3.1.2, item 2).
9970 /// Computes the lane size (LS) of a return type or of an input parameter,
9971 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9972 /// TODO: Add support for references, section 3.2.1, item 1.
9973 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9974 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9975 QualType PTy = QT.getCanonicalType()->getPointeeType();
9976 if (getAArch64PBV(PTy, C))
9977 return C.getTypeSize(PTy);
9979 if (getAArch64PBV(QT, C))
9980 return C.getTypeSize(QT);
9982 return C.getTypeSize(C.getUIntPtrType());
9985 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9986 // signature of the scalar function, as defined in 3.2.2 of the
9988 static std::tuple<unsigned, unsigned, bool>
9989 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9990 QualType RetType = FD->getReturnType().getCanonicalType();
9992 ASTContext &C = FD->getASTContext();
9994 bool OutputBecomesInput = false;
9996 llvm::SmallVector<unsigned, 8> Sizes;
9997 if (!RetType->isVoidType()) {
9998 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9999 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10000 OutputBecomesInput = true;
10002 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10003 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10004 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10007 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10008 // The LS of a function parameter / return value can only be a power
10009 // of 2, starting from 8 bits, up to 128.
10010 assert(std::all_of(Sizes.begin(), Sizes.end(),
10011 [](unsigned Size) {
10012 return Size == 8 || Size == 16 || Size == 32 ||
10013 Size == 64 || Size == 128;
10017 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10018 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10019 OutputBecomesInput);
10022 /// Mangle the parameter part of the vector function name according to
10023 /// their OpenMP classification. The mangling function is defined in
10024 /// section 3.5 of the AAVFABI.
10025 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10026 SmallString<256> Buffer;
10027 llvm::raw_svector_ostream Out(Buffer);
10028 for (const auto &ParamAttr : ParamAttrs) {
10029 switch (ParamAttr.Kind) {
10030 case LinearWithVarStride:
10031 Out << "ls" << ParamAttr.StrideOrArg;
10035 // Don't print the step value if it is not present or if it is
10037 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10038 Out << ParamAttr.StrideOrArg;
10048 if (!!ParamAttr.Alignment)
10049 Out << 'a' << ParamAttr.Alignment;
10055 // Function used to add the attribute. The parameter `VLEN` is
10056 // templated to allow the use of "x" when targeting scalable functions
10058 template <typename T>
10059 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10060 char ISA, StringRef ParSeq,
10061 StringRef MangledName, bool OutputBecomesInput,
10062 llvm::Function *Fn) {
10063 SmallString<256> Buffer;
10064 llvm::raw_svector_ostream Out(Buffer);
10065 Out << Prefix << ISA << LMask << VLEN;
10066 if (OutputBecomesInput)
10068 Out << ParSeq << "_" << MangledName;
10069 Fn->addFnAttr(Out.str());
10072 // Helper function to generate the Advanced SIMD names depending on
10073 // the value of the NDS when simdlen is not present.
10074 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10075 StringRef Prefix, char ISA,
10076 StringRef ParSeq, StringRef MangledName,
10077 bool OutputBecomesInput,
10078 llvm::Function *Fn) {
10081 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10082 OutputBecomesInput, Fn);
10083 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10084 OutputBecomesInput, Fn);
10087 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10088 OutputBecomesInput, Fn);
10089 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10090 OutputBecomesInput, Fn);
10093 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10094 OutputBecomesInput, Fn);
10095 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10096 OutputBecomesInput, Fn);
10100 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10101 OutputBecomesInput, Fn);
10104 llvm_unreachable("Scalar type is too wide.");
10108 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10109 static void emitAArch64DeclareSimdFunction(
10110 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10111 ArrayRef<ParamAttrTy> ParamAttrs,
10112 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10113 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10115 // Get basic data for building the vector signature.
10116 const auto Data = getNDSWDS(FD, ParamAttrs);
10117 const unsigned NDS = std::get<0>(Data);
10118 const unsigned WDS = std::get<1>(Data);
10119 const bool OutputBecomesInput = std::get<2>(Data);
10121 // Check the values provided via `simdlen` by the user.
10122 // 1. A `simdlen(1)` doesn't produce vector signatures,
10123 if (UserVLEN == 1) {
10124 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10125 DiagnosticsEngine::Warning,
10126 "The clause simdlen(1) has no effect when targeting aarch64.");
10127 CGM.getDiags().Report(SLoc, DiagID);
10131 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10132 // Advanced SIMD output.
10133 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10134 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10135 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10136 "power of 2 when targeting Advanced SIMD.");
10137 CGM.getDiags().Report(SLoc, DiagID);
10141 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10143 if (ISA == 's' && UserVLEN != 0) {
10144 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10145 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10146 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10147 "lanes in the architectural constraints "
10148 "for SVE (min is 128-bit, max is "
10149 "2048-bit, by steps of 128-bit)");
10150 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10155 // Sort out parameter sequence.
10156 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10157 StringRef Prefix = "_ZGV";
10158 // Generate simdlen from user input (if any).
10161 // SVE generates only a masked function.
10162 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10163 OutputBecomesInput, Fn);
10165 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10166 // Advanced SIMD generates one or two functions, depending on
10167 // the `[not]inbranch` clause.
10169 case OMPDeclareSimdDeclAttr::BS_Undefined:
10170 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10171 OutputBecomesInput, Fn);
10172 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10173 OutputBecomesInput, Fn);
10175 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10176 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10177 OutputBecomesInput, Fn);
10179 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10180 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10181 OutputBecomesInput, Fn);
10186 // If no user simdlen is provided, follow the AAVFABI rules for
10187 // generating the vector length.
10189 // SVE, section 3.4.1, item 1.
10190 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10191 OutputBecomesInput, Fn);
10193 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10194 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10195 // two vector names depending on the use of the clause
10196 // `[not]inbranch`.
10198 case OMPDeclareSimdDeclAttr::BS_Undefined:
10199 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10200 OutputBecomesInput, Fn);
10201 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10202 OutputBecomesInput, Fn);
10204 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10205 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10206 OutputBecomesInput, Fn);
10208 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10209 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10210 OutputBecomesInput, Fn);
10217 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10218 llvm::Function *Fn) {
10219 ASTContext &C = CGM.getContext();
10220 FD = FD->getMostRecentDecl();
10221 // Map params to their positions in function decl.
10222 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10223 if (isa<CXXMethodDecl>(FD))
10224 ParamPositions.try_emplace(FD, 0);
10225 unsigned ParamPos = ParamPositions.size();
10226 for (const ParmVarDecl *P : FD->parameters()) {
10227 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10231 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10232 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10233 // Mark uniform parameters.
10234 for (const Expr *E : Attr->uniforms()) {
10235 E = E->IgnoreParenImpCasts();
10237 if (isa<CXXThisExpr>(E)) {
10238 Pos = ParamPositions[FD];
10240 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10241 ->getCanonicalDecl();
10242 Pos = ParamPositions[PVD];
10244 ParamAttrs[Pos].Kind = Uniform;
10246 // Get alignment info.
10247 auto NI = Attr->alignments_begin();
10248 for (const Expr *E : Attr->aligneds()) {
10249 E = E->IgnoreParenImpCasts();
10252 if (isa<CXXThisExpr>(E)) {
10253 Pos = ParamPositions[FD];
10254 ParmTy = E->getType();
10256 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10257 ->getCanonicalDecl();
10258 Pos = ParamPositions[PVD];
10259 ParmTy = PVD->getType();
10261 ParamAttrs[Pos].Alignment =
10263 ? (*NI)->EvaluateKnownConstInt(C)
10264 : llvm::APSInt::getUnsigned(
10265 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10269 // Mark linear parameters.
10270 auto SI = Attr->steps_begin();
10271 auto MI = Attr->modifiers_begin();
10272 for (const Expr *E : Attr->linears()) {
10273 E = E->IgnoreParenImpCasts();
10275 if (isa<CXXThisExpr>(E)) {
10276 Pos = ParamPositions[FD];
10278 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10279 ->getCanonicalDecl();
10280 Pos = ParamPositions[PVD];
10282 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10283 ParamAttr.Kind = Linear;
10285 Expr::EvalResult Result;
10286 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10287 if (const auto *DRE =
10288 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10289 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10290 ParamAttr.Kind = LinearWithVarStride;
10291 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10292 ParamPositions[StridePVD->getCanonicalDecl()]);
10296 ParamAttr.StrideOrArg = Result.Val.getInt();
10302 llvm::APSInt VLENVal;
10303 SourceLocation ExprLoc;
10304 const Expr *VLENExpr = Attr->getSimdlen();
10306 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10307 ExprLoc = VLENExpr->getExprLoc();
10309 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10310 if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10311 CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10312 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10313 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10314 unsigned VLEN = VLENVal.getExtValue();
10315 StringRef MangledName = Fn->getName();
10316 if (CGM.getTarget().hasFeature("sve"))
10317 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10318 MangledName, 's', 128, Fn, ExprLoc);
10319 if (CGM.getTarget().hasFeature("neon"))
10320 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10321 MangledName, 'n', 128, Fn, ExprLoc);
10324 FD = FD->getPreviousDecl();
10329 /// Cleanup action for doacross support.
10330 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10332 static const int DoacrossFinArgs = 2;
10335 llvm::FunctionCallee RTLFn;
10336 llvm::Value *Args[DoacrossFinArgs];
10339 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10340 ArrayRef<llvm::Value *> CallArgs)
10342 assert(CallArgs.size() == DoacrossFinArgs);
10343 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10345 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10346 if (!CGF.HaveInsertPoint())
10348 CGF.EmitRuntimeCall(RTLFn, Args);
10353 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10354 const OMPLoopDirective &D,
10355 ArrayRef<Expr *> NumIterations) {
10356 if (!CGF.HaveInsertPoint())
10359 ASTContext &C = CGM.getContext();
10360 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10362 if (KmpDimTy.isNull()) {
10363 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10364 // kmp_int64 lo; // lower
10365 // kmp_int64 up; // upper
10366 // kmp_int64 st; // stride
10368 RD = C.buildImplicitRecord("kmp_dim");
10369 RD->startDefinition();
10370 addFieldToRecordDecl(C, RD, Int64Ty);
10371 addFieldToRecordDecl(C, RD, Int64Ty);
10372 addFieldToRecordDecl(C, RD, Int64Ty);
10373 RD->completeDefinition();
10374 KmpDimTy = C.getRecordType(RD);
10376 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10378 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10380 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0);
10382 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10383 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10384 enum { LowerFD = 0, UpperFD, StrideFD };
10385 // Fill dims with data.
10386 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10387 LValue DimsLVal = CGF.MakeAddrLValue(
10388 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10389 // dims.upper = num_iterations;
10390 LValue UpperLVal = CGF.EmitLValueForField(
10391 DimsLVal, *std::next(RD->field_begin(), UpperFD));
10392 llvm::Value *NumIterVal =
10393 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10394 D.getNumIterations()->getType(), Int64Ty,
10395 D.getNumIterations()->getExprLoc());
10396 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10397 // dims.stride = 1;
10398 LValue StrideLVal = CGF.EmitLValueForField(
10399 DimsLVal, *std::next(RD->field_begin(), StrideFD));
10400 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10404 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10405 // kmp_int32 num_dims, struct kmp_dim * dims);
10406 llvm::Value *Args[] = {
10407 emitUpdateLocation(CGF, D.getBeginLoc()),
10408 getThreadID(CGF, D.getBeginLoc()),
10409 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10410 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10411 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10414 llvm::FunctionCallee RTLFn =
10415 createRuntimeFunction(OMPRTL__kmpc_doacross_init);
10416 CGF.EmitRuntimeCall(RTLFn, Args);
10417 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10418 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10419 llvm::FunctionCallee FiniRTLFn =
10420 createRuntimeFunction(OMPRTL__kmpc_doacross_fini);
10421 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10422 llvm::makeArrayRef(FiniArgs));
10425 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10426 const OMPDependClause *C) {
10428 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10429 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10430 QualType ArrayTy = CGM.getContext().getConstantArrayType(
10431 Int64Ty, Size, ArrayType::Normal, 0);
10432 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10433 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10434 const Expr *CounterVal = C->getLoopData(I);
10435 assert(CounterVal);
10436 llvm::Value *CntVal = CGF.EmitScalarConversion(
10437 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10438 CounterVal->getExprLoc());
10439 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10440 /*Volatile=*/false, Int64Ty);
10442 llvm::Value *Args[] = {
10443 emitUpdateLocation(CGF, C->getBeginLoc()),
10444 getThreadID(CGF, C->getBeginLoc()),
10445 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10446 llvm::FunctionCallee RTLFn;
10447 if (C->getDependencyKind() == OMPC_DEPEND_source) {
10448 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post);
10450 assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10451 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait);
10453 CGF.EmitRuntimeCall(RTLFn, Args);
10456 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
10457 llvm::FunctionCallee Callee,
10458 ArrayRef<llvm::Value *> Args) const {
10459 assert(Loc.isValid() && "Outlined function call location must be valid.");
10460 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10462 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10463 if (Fn->doesNotThrow()) {
10464 CGF.EmitNounwindRuntimeCall(Fn, Args);
10468 CGF.EmitRuntimeCall(Callee, Args);
10471 void CGOpenMPRuntime::emitOutlinedFunctionCall(
10472 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10473 ArrayRef<llvm::Value *> Args) const {
10474 emitCall(CGF, Loc, OutlinedFn, Args);
10477 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
10478 if (const auto *FD = dyn_cast<FunctionDecl>(D))
10479 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10480 HasEmittedDeclareTargetRegion = true;
10483 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
10484 const VarDecl *NativeParam,
10485 const VarDecl *TargetParam) const {
10486 return CGF.GetAddrOfLocalVar(NativeParam);
10490 /// Cleanup action for allocate support.
10491 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10493 static const int CleanupArgs = 3;
10496 llvm::FunctionCallee RTLFn;
10497 llvm::Value *Args[CleanupArgs];
10500 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10501 ArrayRef<llvm::Value *> CallArgs)
10503 assert(CallArgs.size() == CleanupArgs &&
10504 "Size of arguments does not match.");
10505 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10507 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10508 if (!CGF.HaveInsertPoint())
10510 CGF.EmitRuntimeCall(RTLFn, Args);
10515 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
10516 const VarDecl *VD) {
10518 return Address::invalid();
10519 const VarDecl *CVD = VD->getCanonicalDecl();
10520 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10521 return Address::invalid();
10522 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10523 // Use the default allocation.
10524 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10525 !AA->getAllocator())
10526 return Address::invalid();
10528 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10529 if (CVD->getType()->isVariablyModifiedType()) {
10530 Size = CGF.getTypeSize(CVD->getType());
10531 // Align the size: ((size + align - 1) / align) * align
10532 Size = CGF.Builder.CreateNUWAdd(
10533 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10534 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10535 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10537 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10538 Size = CGM.getSize(Sz.alignTo(Align));
10540 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10541 assert(AA->getAllocator() &&
10542 "Expected allocator expression for non-default allocator.");
10543 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10544 // According to the standard, the original allocator type is a enum (integer).
10545 // Convert to pointer type, if required.
10546 if (Allocator->getType()->isIntegerTy())
10547 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10548 else if (Allocator->getType()->isPointerTy())
10549 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10551 llvm::Value *Args[] = {ThreadID, Size, Allocator};
10553 llvm::Value *Addr =
10554 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args,
10555 CVD->getName() + ".void.addr");
10556 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10558 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10560 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10561 llvm::makeArrayRef(FiniArgs));
10562 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
10564 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10565 CVD->getName() + ".addr");
10566 return Address(Addr, Align);
10569 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
10570 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10571 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10572 llvm_unreachable("Not supported in SIMD-only mode");
10575 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
10576 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10577 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10578 llvm_unreachable("Not supported in SIMD-only mode");
10581 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
10582 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10583 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10584 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10585 bool Tied, unsigned &NumberOfParts) {
10586 llvm_unreachable("Not supported in SIMD-only mode");
10589 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
10590 SourceLocation Loc,
10591 llvm::Function *OutlinedFn,
10592 ArrayRef<llvm::Value *> CapturedVars,
10593 const Expr *IfCond) {
10594 llvm_unreachable("Not supported in SIMD-only mode");
10597 void CGOpenMPSIMDRuntime::emitCriticalRegion(
10598 CodeGenFunction &CGF, StringRef CriticalName,
10599 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10600 const Expr *Hint) {
10601 llvm_unreachable("Not supported in SIMD-only mode");
10604 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
10605 const RegionCodeGenTy &MasterOpGen,
10606 SourceLocation Loc) {
10607 llvm_unreachable("Not supported in SIMD-only mode");
10610 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
10611 SourceLocation Loc) {
10612 llvm_unreachable("Not supported in SIMD-only mode");
10615 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
10616 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10617 SourceLocation Loc) {
10618 llvm_unreachable("Not supported in SIMD-only mode");
10621 void CGOpenMPSIMDRuntime::emitSingleRegion(
10622 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10623 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10624 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10625 ArrayRef<const Expr *> AssignmentOps) {
10626 llvm_unreachable("Not supported in SIMD-only mode");
10629 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
10630 const RegionCodeGenTy &OrderedOpGen,
10631 SourceLocation Loc,
10633 llvm_unreachable("Not supported in SIMD-only mode");
10636 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
10637 SourceLocation Loc,
10638 OpenMPDirectiveKind Kind,
10640 bool ForceSimpleCall) {
10641 llvm_unreachable("Not supported in SIMD-only mode");
10644 void CGOpenMPSIMDRuntime::emitForDispatchInit(
10645 CodeGenFunction &CGF, SourceLocation Loc,
10646 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10647 bool Ordered, const DispatchRTInput &DispatchValues) {
10648 llvm_unreachable("Not supported in SIMD-only mode");
10651 void CGOpenMPSIMDRuntime::emitForStaticInit(
10652 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
10653 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10654 llvm_unreachable("Not supported in SIMD-only mode");
10657 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
10658 CodeGenFunction &CGF, SourceLocation Loc,
10659 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10660 llvm_unreachable("Not supported in SIMD-only mode");
10663 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
10664 SourceLocation Loc,
10667 llvm_unreachable("Not supported in SIMD-only mode");
10670 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
10671 SourceLocation Loc,
10672 OpenMPDirectiveKind DKind) {
10673 llvm_unreachable("Not supported in SIMD-only mode");
10676 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
10677 SourceLocation Loc,
10678 unsigned IVSize, bool IVSigned,
10679 Address IL, Address LB,
10680 Address UB, Address ST) {
10681 llvm_unreachable("Not supported in SIMD-only mode");
10684 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
10685 llvm::Value *NumThreads,
10686 SourceLocation Loc) {
10687 llvm_unreachable("Not supported in SIMD-only mode");
10690 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
10691 OpenMPProcBindClauseKind ProcBind,
10692 SourceLocation Loc) {
10693 llvm_unreachable("Not supported in SIMD-only mode");
10696 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
10699 SourceLocation Loc) {
10700 llvm_unreachable("Not supported in SIMD-only mode");
10703 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
10704 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10705 CodeGenFunction *CGF) {
10706 llvm_unreachable("Not supported in SIMD-only mode");
10709 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
10710 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10711 llvm_unreachable("Not supported in SIMD-only mode");
10714 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
10715 ArrayRef<const Expr *> Vars,
10716 SourceLocation Loc) {
10717 llvm_unreachable("Not supported in SIMD-only mode");
10720 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
10721 const OMPExecutableDirective &D,
10722 llvm::Function *TaskFunction,
10723 QualType SharedsTy, Address Shareds,
10724 const Expr *IfCond,
10725 const OMPTaskDataTy &Data) {
10726 llvm_unreachable("Not supported in SIMD-only mode");
10729 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
10730 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10731 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10732 const Expr *IfCond, const OMPTaskDataTy &Data) {
10733 llvm_unreachable("Not supported in SIMD-only mode");
10736 void CGOpenMPSIMDRuntime::emitReduction(
10737 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
10738 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
10739 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10740 assert(Options.SimpleReduction && "Only simple reduction is expected.");
10741 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10742 ReductionOps, Options);
10745 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
10746 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
10747 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10748 llvm_unreachable("Not supported in SIMD-only mode");
10751 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
10752 SourceLocation Loc,
10753 ReductionCodeGen &RCG,
10755 llvm_unreachable("Not supported in SIMD-only mode");
10758 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
10759 SourceLocation Loc,
10760 llvm::Value *ReductionsPtr,
10761 LValue SharedLVal) {
10762 llvm_unreachable("Not supported in SIMD-only mode");
10765 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
10766 SourceLocation Loc) {
10767 llvm_unreachable("Not supported in SIMD-only mode");
10770 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
10771 CodeGenFunction &CGF, SourceLocation Loc,
10772 OpenMPDirectiveKind CancelRegion) {
10773 llvm_unreachable("Not supported in SIMD-only mode");
10776 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
10777 SourceLocation Loc, const Expr *IfCond,
10778 OpenMPDirectiveKind CancelRegion) {
10779 llvm_unreachable("Not supported in SIMD-only mode");
10782 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
10783 const OMPExecutableDirective &D, StringRef ParentName,
10784 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10785 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10786 llvm_unreachable("Not supported in SIMD-only mode");
10789 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF,
10790 const OMPExecutableDirective &D,
10791 llvm::Function *OutlinedFn,
10792 llvm::Value *OutlinedFnID,
10793 const Expr *IfCond,
10794 const Expr *Device) {
10795 llvm_unreachable("Not supported in SIMD-only mode");
10798 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
10799 llvm_unreachable("Not supported in SIMD-only mode");
10802 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10803 llvm_unreachable("Not supported in SIMD-only mode");
10806 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
10810 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() {
10814 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
10815 const OMPExecutableDirective &D,
10816 SourceLocation Loc,
10817 llvm::Function *OutlinedFn,
10818 ArrayRef<llvm::Value *> CapturedVars) {
10819 llvm_unreachable("Not supported in SIMD-only mode");
10822 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10823 const Expr *NumTeams,
10824 const Expr *ThreadLimit,
10825 SourceLocation Loc) {
10826 llvm_unreachable("Not supported in SIMD-only mode");
10829 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
10830 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10831 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10832 llvm_unreachable("Not supported in SIMD-only mode");
10835 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
10836 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10837 const Expr *Device) {
10838 llvm_unreachable("Not supported in SIMD-only mode");
10841 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
10842 const OMPLoopDirective &D,
10843 ArrayRef<Expr *> NumIterations) {
10844 llvm_unreachable("Not supported in SIMD-only mode");
10847 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
10848 const OMPDependClause *C) {
10849 llvm_unreachable("Not supported in SIMD-only mode");
10853 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
10854 const VarDecl *NativeParam) const {
10855 llvm_unreachable("Not supported in SIMD-only mode");
10859 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
10860 const VarDecl *NativeParam,
10861 const VarDecl *TargetParam) const {
10862 llvm_unreachable("Not supported in SIMD-only mode");