1 //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This contains code to emit OpenMP nodes as LLVM code.
12 //===----------------------------------------------------------------------===//
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CodeGenFunction.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/Stmt.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "clang/AST/DeclOpenMP.h"
22 #include "llvm/IR/CallSite.h"
23 using namespace clang;
24 using namespace CodeGen;
27 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
28 /// for captured expressions.
29 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
30 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
31 for (const auto *C : S.clauses()) {
32 if (auto *CPI = OMPClauseWithPreInit::get(C)) {
33 if (auto *PreInit = cast_or_null<DeclStmt>(CPI->getPreInitStmt())) {
34 for (const auto *I : PreInit->decls()) {
35 if (!I->hasAttr<OMPCaptureNoInitAttr>())
36 CGF.EmitVarDecl(cast<VarDecl>(*I));
38 CodeGenFunction::AutoVarEmission Emission =
39 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
40 CGF.EmitAutoVarCleanups(Emission);
47 CodeGenFunction::OMPPrivateScope InlinedShareds;
49 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
50 return CGF.LambdaCaptureFields.lookup(VD) ||
51 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
52 (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
56 OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
57 bool AsInlined = false, bool EmitPreInitStmt = true)
58 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
61 emitPreInitStmt(CGF, S);
63 if (S.hasAssociatedStmt()) {
64 auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
65 for (auto &C : CS->captures()) {
66 if (C.capturesVariable() || C.capturesVariableByCopy()) {
67 auto *VD = C.getCapturedVar();
68 DeclRefExpr DRE(const_cast<VarDecl *>(VD),
69 isCapturedVar(CGF, VD) ||
70 (CGF.CapturedStmtInfo &&
71 InlinedShareds.isGlobalVarCaptured(VD)),
72 VD->getType().getNonReferenceType(), VK_LValue,
74 InlinedShareds.addPrivate(VD, [&CGF, &DRE]() -> Address {
75 return CGF.EmitLValue(&DRE).getAddress();
79 (void)InlinedShareds.Privatize();
85 /// Lexical scope for OpenMP parallel construct, that handles correct codegen
86 /// for captured expressions.
87 class OMPParallelScope final : public OMPLexicalScope {
88 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
89 OpenMPDirectiveKind Kind = S.getDirectiveKind();
90 return !(isOpenMPTargetExecutionDirective(Kind) ||
91 isOpenMPLoopBoundSharingDirective(Kind)) &&
92 isOpenMPParallelDirective(Kind);
96 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
97 : OMPLexicalScope(CGF, S,
99 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
102 /// Lexical scope for OpenMP teams construct, that handles correct codegen
103 /// for captured expressions.
104 class OMPTeamsScope final : public OMPLexicalScope {
105 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
106 OpenMPDirectiveKind Kind = S.getDirectiveKind();
107 return !isOpenMPTargetExecutionDirective(Kind) &&
108 isOpenMPTeamsDirective(Kind);
112 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
113 : OMPLexicalScope(CGF, S,
115 /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
118 /// Private scope for OpenMP loop-based directives, that supports capturing
119 /// of used expression from loop statement.
120 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
121 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
122 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
123 if (auto *PreInits = cast_or_null<DeclStmt>(LD->getPreInits())) {
124 for (const auto *I : PreInits->decls())
125 CGF.EmitVarDecl(cast<VarDecl>(*I));
131 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
132 : CodeGenFunction::RunCleanupsScope(CGF) {
133 emitPreInitStmt(CGF, S);
139 llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
140 auto &C = getContext();
141 llvm::Value *Size = nullptr;
142 auto SizeInChars = C.getTypeSizeInChars(Ty);
143 if (SizeInChars.isZero()) {
144 // getTypeSizeInChars() returns 0 for a VLA.
145 while (auto *VAT = C.getAsVariableArrayType(Ty)) {
146 llvm::Value *ArraySize;
147 std::tie(ArraySize, Ty) = getVLASize(VAT);
148 Size = Size ? Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
150 SizeInChars = C.getTypeSizeInChars(Ty);
151 if (SizeInChars.isZero())
152 return llvm::ConstantInt::get(SizeTy, /*V=*/0);
153 Size = Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
155 Size = CGM.getSize(SizeInChars);
159 void CodeGenFunction::GenerateOpenMPCapturedVars(
160 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
161 const RecordDecl *RD = S.getCapturedRecordDecl();
162 auto CurField = RD->field_begin();
163 auto CurCap = S.captures().begin();
164 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
165 E = S.capture_init_end();
166 I != E; ++I, ++CurField, ++CurCap) {
167 if (CurField->hasCapturedVLAType()) {
168 auto VAT = CurField->getCapturedVLAType();
169 auto *Val = VLASizeMap[VAT->getSizeExpr()];
170 CapturedVars.push_back(Val);
171 } else if (CurCap->capturesThis())
172 CapturedVars.push_back(CXXThisValue);
173 else if (CurCap->capturesVariableByCopy()) {
175 EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal();
177 // If the field is not a pointer, we need to save the actual value
178 // and load it as a void pointer.
179 if (!CurField->getType()->isAnyPointerType()) {
180 auto &Ctx = getContext();
181 auto DstAddr = CreateMemTemp(
182 Ctx.getUIntPtrType(),
183 Twine(CurCap->getCapturedVar()->getName()) + ".casted");
184 LValue DstLV = MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
186 auto *SrcAddrVal = EmitScalarConversion(
187 DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
188 Ctx.getPointerType(CurField->getType()), SourceLocation());
190 MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
192 // Store the value using the source type pointer.
193 EmitStoreThroughLValue(RValue::get(CV), SrcLV);
195 // Load the value using the destination type pointer.
196 CV = EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
198 CapturedVars.push_back(CV);
200 assert(CurCap->capturesVariable() && "Expected capture by reference.");
201 CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
206 static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
207 StringRef Name, LValue AddrLV,
208 bool isReferenceType = false) {
209 ASTContext &Ctx = CGF.getContext();
211 auto *CastedPtr = CGF.EmitScalarConversion(
212 AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
213 Ctx.getPointerType(DstType), SourceLocation());
215 CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
218 // If we are dealing with references we need to return the address of the
219 // reference instead of the reference of the value.
220 if (isReferenceType) {
221 QualType RefType = Ctx.getLValueReferenceType(DstType);
222 auto *RefVal = TmpAddr.getPointer();
223 TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
224 auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
225 CGF.EmitStoreThroughLValue(RValue::get(RefVal), TmpLVal, /*isInit*/ true);
231 static QualType getCanonicalParamType(ASTContext &C, QualType T) {
232 if (T->isLValueReferenceType()) {
233 return C.getLValueReferenceType(
234 getCanonicalParamType(C, T.getNonReferenceType()),
235 /*SpelledAsLValue=*/false);
237 if (T->isPointerType())
238 return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
239 return C.getCanonicalParamType(T);
243 /// Contains required data for proper outlined function codegen.
244 struct FunctionOptions {
245 /// Captured statement for which the function is generated.
246 const CapturedStmt *S = nullptr;
247 /// true if cast to/from UIntPtr is required for variables captured by
249 bool UIntPtrCastRequired = true;
250 /// true if only casted argumefnts must be registered as local args or VLA
252 bool RegisterCastedArgsOnly = false;
253 /// Name of the generated function.
254 StringRef FunctionName;
255 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
256 bool RegisterCastedArgsOnly,
257 StringRef FunctionName)
258 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
259 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
260 FunctionName(FunctionName) {}
264 static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
265 CodeGenFunction &CGF, FunctionArgList &Args,
266 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>>
268 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
270 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
271 const CapturedDecl *CD = FO.S->getCapturedDecl();
272 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
273 assert(CD->hasBody() && "missing CapturedDecl body");
275 CXXThisValue = nullptr;
276 // Build the argument list.
277 CodeGenModule &CGM = CGF.CGM;
278 ASTContext &Ctx = CGM.getContext();
279 bool HasUIntPtrArgs = false;
280 Args.append(CD->param_begin(),
281 std::next(CD->param_begin(), CD->getContextParamPosition()));
282 auto I = FO.S->captures().begin();
283 for (auto *FD : RD->fields()) {
284 QualType ArgType = FD->getType();
285 IdentifierInfo *II = nullptr;
286 VarDecl *CapVar = nullptr;
288 // If this is a capture by copy and the type is not a pointer, the outlined
289 // function argument type should be uintptr and the value properly casted to
290 // uintptr. This is necessary given that the runtime library is only able to
291 // deal with pointers. We can pass in the same way the VLA type sizes to the
292 // outlined function.
293 if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
294 I->capturesVariableArrayType()) {
295 HasUIntPtrArgs = true;
296 if (FO.UIntPtrCastRequired)
297 ArgType = Ctx.getUIntPtrType();
300 if (I->capturesVariable() || I->capturesVariableByCopy()) {
301 CapVar = I->getCapturedVar();
302 II = CapVar->getIdentifier();
303 } else if (I->capturesThis())
304 II = &Ctx.Idents.get("this");
306 assert(I->capturesVariableArrayType());
307 II = &Ctx.Idents.get("vla");
309 if (ArgType->isVariablyModifiedType())
310 ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
311 Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr,
312 FD->getLocation(), II, ArgType,
313 ImplicitParamDecl::Other));
317 std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
320 // Create the function declaration.
321 FunctionType::ExtInfo ExtInfo;
322 const CGFunctionInfo &FuncInfo =
323 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
324 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
327 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
328 FO.FunctionName, &CGM.getModule());
329 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
331 F->addFnAttr(llvm::Attribute::NoUnwind);
333 // Generate the function.
334 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
335 CD->getBody()->getLocStart());
336 unsigned Cnt = CD->getContextParamPosition();
337 I = FO.S->captures().begin();
338 for (auto *FD : RD->fields()) {
339 // If we are capturing a pointer by copy we don't need to do anything, just
340 // use the value that we get from the arguments.
341 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
342 const VarDecl *CurVD = I->getCapturedVar();
343 Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
344 // If the variable is a reference we need to materialize it here.
345 if (CurVD->getType()->isReferenceType()) {
346 Address RefAddr = CGF.CreateMemTemp(
347 CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
348 CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
349 /*Volatile=*/false, CurVD->getType());
352 if (!FO.RegisterCastedArgsOnly)
353 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
359 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
360 LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]),
361 Args[Cnt]->getType(), BaseInfo);
362 if (FD->hasCapturedVLAType()) {
363 if (FO.UIntPtrCastRequired) {
364 ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
365 Args[Cnt]->getName(),
367 FD->getType(), BaseInfo);
370 CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
371 auto VAT = FD->getCapturedVLAType();
372 VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
373 } else if (I->capturesVariable()) {
374 auto *Var = I->getCapturedVar();
375 QualType VarTy = Var->getType();
376 Address ArgAddr = ArgLVal.getAddress();
377 if (!VarTy->isReferenceType()) {
378 if (ArgLVal.getType()->isLValueReferenceType()) {
379 ArgAddr = CGF.EmitLoadOfReference(
380 ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
381 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
382 assert(ArgLVal.getType()->isPointerType());
383 ArgAddr = CGF.EmitLoadOfPointer(
384 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
387 if (!FO.RegisterCastedArgsOnly) {
390 {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
392 } else if (I->capturesVariableByCopy()) {
393 assert(!FD->getType()->isAnyPointerType() &&
394 "Not expecting a captured pointer.");
395 auto *Var = I->getCapturedVar();
396 QualType VarTy = Var->getType();
400 FO.UIntPtrCastRequired
401 ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
402 ArgLVal, VarTy->isReferenceType())
403 : ArgLVal.getAddress()}});
405 // If 'this' is captured, load it into CXXThisValue.
406 assert(I->capturesThis());
407 CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
409 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
415 return {F, HasUIntPtrArgs};
419 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
422 "CapturedStmtInfo should be set when generating the captured function");
423 const CapturedDecl *CD = S.getCapturedDecl();
424 // Build the argument list.
425 bool NeedWrapperFunction =
427 CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
428 FunctionArgList Args;
429 llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
430 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
431 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
432 CapturedStmtInfo->getHelperName());
435 std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue(
436 *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO);
437 for (const auto &LocalAddrPair : LocalAddrs) {
438 if (LocalAddrPair.second.first) {
439 setAddrOfLocalVar(LocalAddrPair.second.first,
440 LocalAddrPair.second.second);
443 for (const auto &VLASizePair : VLASizes)
444 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
445 PGO.assignRegionCounters(GlobalDecl(CD), F);
446 CapturedStmtInfo->EmitBody(*this, CD->getBody());
447 FinishFunction(CD->getBodyRBrace());
448 if (!NeedWrapperFunction || !HasUIntPtrArgs)
451 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
452 /*RegisterCastedArgsOnly=*/true,
453 ".nondebug_wrapper.");
454 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
455 WrapperCGF.disableDebugInfo();
459 llvm::Function *WrapperF =
460 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
461 WrapperCGF.CXXThisValue, WrapperFO).first;
462 LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
463 llvm::SmallVector<llvm::Value *, 4> CallArgs;
464 for (const auto *Arg : Args) {
465 llvm::Value *CallArg;
466 auto I = LocalAddrs.find(Arg);
467 if (I != LocalAddrs.end()) {
469 WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
470 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
472 auto EI = VLASizes.find(Arg);
473 if (EI != VLASizes.end())
474 CallArg = EI->second.second;
476 LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
477 Arg->getType(), BaseInfo);
478 CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
481 CallArgs.emplace_back(CallArg);
483 WrapperCGF.Builder.CreateCall(F, CallArgs);
484 WrapperCGF.FinishFunction();
488 //===----------------------------------------------------------------------===//
489 // OpenMP Directive Emission
490 //===----------------------------------------------------------------------===//
491 void CodeGenFunction::EmitOMPAggregateAssign(
492 Address DestAddr, Address SrcAddr, QualType OriginalType,
493 const llvm::function_ref<void(Address, Address)> &CopyGen) {
494 // Perform element-by-element initialization.
497 // Drill down to the base element type on both arrays.
498 auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
499 auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
500 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
502 auto SrcBegin = SrcAddr.getPointer();
503 auto DestBegin = DestAddr.getPointer();
504 // Cast from pointer to array type to pointer to single element.
505 auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
506 // The basic structure here is a while-do loop.
507 auto BodyBB = createBasicBlock("omp.arraycpy.body");
508 auto DoneBB = createBasicBlock("omp.arraycpy.done");
510 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
511 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
513 // Enter the loop body, making that address the current address.
514 auto EntryBB = Builder.GetInsertBlock();
517 CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
519 llvm::PHINode *SrcElementPHI =
520 Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
521 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
522 Address SrcElementCurrent =
523 Address(SrcElementPHI,
524 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
526 llvm::PHINode *DestElementPHI =
527 Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
528 DestElementPHI->addIncoming(DestBegin, EntryBB);
529 Address DestElementCurrent =
530 Address(DestElementPHI,
531 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
534 CopyGen(DestElementCurrent, SrcElementCurrent);
536 // Shift the address forward by one element.
537 auto DestElementNext = Builder.CreateConstGEP1_32(
538 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
539 auto SrcElementNext = Builder.CreateConstGEP1_32(
540 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
541 // Check whether we've reached the end.
543 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
544 Builder.CreateCondBr(Done, DoneBB, BodyBB);
545 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
546 SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
549 EmitBlock(DoneBB, /*IsFinished=*/true);
552 /// Check if the combiner is a call to UDR combiner and if it is so return the
553 /// UDR decl used for reduction.
554 static const OMPDeclareReductionDecl *
555 getReductionInit(const Expr *ReductionOp) {
556 if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
557 if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
559 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
560 if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
565 static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
566 const OMPDeclareReductionDecl *DRD,
568 Address Private, Address Original,
570 if (DRD->getInitializer()) {
571 std::pair<llvm::Function *, llvm::Function *> Reduction =
572 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
573 auto *CE = cast<CallExpr>(InitOp);
574 auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
575 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
576 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
577 auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
578 auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
579 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
580 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
581 [=]() -> Address { return Private; });
582 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
583 [=]() -> Address { return Original; });
584 (void)PrivateScope.Privatize();
585 RValue Func = RValue::get(Reduction.second);
586 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
587 CGF.EmitIgnoredExpr(InitOp);
589 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
590 auto *GV = new llvm::GlobalVariable(
591 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
592 llvm::GlobalValue::PrivateLinkage, Init, ".init");
593 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
595 switch (CGF.getEvaluationKind(Ty)) {
597 InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
601 RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
604 InitRVal = RValue::getAggregate(LV.getAddress());
607 OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
608 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
609 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
610 /*IsInitializer=*/false);
614 /// \brief Emit initialization of arrays of complex types.
615 /// \param DestAddr Address of the array.
616 /// \param Type Type of array.
617 /// \param Init Initial expression of array.
618 /// \param SrcAddr Address of the original array.
619 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
620 QualType Type, const Expr *Init,
621 Address SrcAddr = Address::invalid()) {
622 auto *DRD = getReductionInit(Init);
623 // Perform element-by-element initialization.
626 // Drill down to the base element type on both arrays.
627 auto ArrayTy = Type->getAsArrayTypeUnsafe();
628 auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
630 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
633 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
635 llvm::Value *SrcBegin = nullptr;
637 SrcBegin = SrcAddr.getPointer();
638 auto DestBegin = DestAddr.getPointer();
639 // Cast from pointer to array type to pointer to single element.
640 auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
641 // The basic structure here is a while-do loop.
642 auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
643 auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
645 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
646 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
648 // Enter the loop body, making that address the current address.
649 auto EntryBB = CGF.Builder.GetInsertBlock();
650 CGF.EmitBlock(BodyBB);
652 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
654 llvm::PHINode *SrcElementPHI = nullptr;
655 Address SrcElementCurrent = Address::invalid();
657 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
658 "omp.arraycpy.srcElementPast");
659 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
661 Address(SrcElementPHI,
662 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
664 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
665 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
666 DestElementPHI->addIncoming(DestBegin, EntryBB);
667 Address DestElementCurrent =
668 Address(DestElementPHI,
669 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
673 CodeGenFunction::RunCleanupsScope InitScope(CGF);
674 if (DRD && (DRD->getInitializer() || !Init)) {
675 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
676 SrcElementCurrent, ElementTy);
678 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
679 /*IsInitializer=*/false);
683 // Shift the address forward by one element.
684 auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
685 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
686 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
689 // Shift the address forward by one element.
690 auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
691 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
692 // Check whether we've reached the end.
694 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
695 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
696 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
699 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
702 void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
703 Address SrcAddr, const VarDecl *DestVD,
704 const VarDecl *SrcVD, const Expr *Copy) {
705 if (OriginalType->isArrayType()) {
706 auto *BO = dyn_cast<BinaryOperator>(Copy);
707 if (BO && BO->getOpcode() == BO_Assign) {
708 // Perform simple memcpy for simple copying.
709 EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
711 // For arrays with complex element types perform element by element
713 EmitOMPAggregateAssign(
714 DestAddr, SrcAddr, OriginalType,
715 [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
716 // Working with the single array element, so have to remap
717 // destination and source variables to corresponding array
719 CodeGenFunction::OMPPrivateScope Remap(*this);
720 Remap.addPrivate(DestVD, [DestElement]() -> Address {
724 SrcVD, [SrcElement]() -> Address { return SrcElement; });
725 (void)Remap.Privatize();
726 EmitIgnoredExpr(Copy);
730 // Remap pseudo source variable to private copy.
731 CodeGenFunction::OMPPrivateScope Remap(*this);
732 Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
733 Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
734 (void)Remap.Privatize();
735 // Emit copying of the whole variable.
736 EmitIgnoredExpr(Copy);
740 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
741 OMPPrivateScope &PrivateScope) {
742 if (!HaveInsertPoint())
744 bool FirstprivateIsLastprivate = false;
745 llvm::DenseSet<const VarDecl *> Lastprivates;
746 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
747 for (const auto *D : C->varlists())
749 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
751 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
752 CGCapturedStmtInfo CapturesInfo(cast<CapturedStmt>(*D.getAssociatedStmt()));
753 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
754 auto IRef = C->varlist_begin();
755 auto InitsRef = C->inits().begin();
756 for (auto IInit : C->private_copies()) {
757 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
758 bool ThisFirstprivateIsLastprivate =
759 Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
760 auto *CapFD = CapturesInfo.lookup(OrigVD);
761 auto *FD = CapturedStmtInfo->lookup(OrigVD);
762 if (!ThisFirstprivateIsLastprivate && FD && (FD == CapFD) &&
763 !FD->getType()->isReferenceType()) {
764 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
769 FirstprivateIsLastprivate =
770 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
771 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
772 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
773 auto *VDInit = cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
775 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
776 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
777 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
778 Address OriginalAddr = EmitLValue(&DRE).getAddress();
779 QualType Type = VD->getType();
780 if (Type->isArrayType()) {
781 // Emit VarDecl with copy init for arrays.
782 // Get the address of the original variable captured in current
784 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
785 auto Emission = EmitAutoVarAlloca(*VD);
786 auto *Init = VD->getInit();
787 if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
788 // Perform simple memcpy.
789 EmitAggregateAssign(Emission.getAllocatedAddress(), OriginalAddr,
792 EmitOMPAggregateAssign(
793 Emission.getAllocatedAddress(), OriginalAddr, Type,
794 [this, VDInit, Init](Address DestElement,
795 Address SrcElement) {
796 // Clean up any temporaries needed by the initialization.
797 RunCleanupsScope InitScope(*this);
798 // Emit initialization for single element.
799 setAddrOfLocalVar(VDInit, SrcElement);
800 EmitAnyExprToMem(Init, DestElement,
801 Init->getType().getQualifiers(),
802 /*IsInitializer*/ false);
803 LocalDeclMap.erase(VDInit);
806 EmitAutoVarCleanups(Emission);
807 return Emission.getAllocatedAddress();
810 IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
811 // Emit private VarDecl with copy init.
812 // Remap temp VDInit variable to the address of the original
814 // (for proper handling of captured global variables).
815 setAddrOfLocalVar(VDInit, OriginalAddr);
817 LocalDeclMap.erase(VDInit);
818 return GetAddrOfLocalVar(VD);
821 assert(IsRegistered &&
822 "firstprivate var already registered as private");
823 // Silence the warning about unused variable.
830 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
833 void CodeGenFunction::EmitOMPPrivateClause(
834 const OMPExecutableDirective &D,
835 CodeGenFunction::OMPPrivateScope &PrivateScope) {
836 if (!HaveInsertPoint())
838 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
839 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
840 auto IRef = C->varlist_begin();
841 for (auto IInit : C->private_copies()) {
842 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
843 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
844 auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
846 PrivateScope.addPrivate(OrigVD, [&]() -> Address {
847 // Emit private VarDecl with copy init.
849 return GetAddrOfLocalVar(VD);
851 assert(IsRegistered && "private var already registered as private");
852 // Silence the warning about unused variable.
860 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
861 if (!HaveInsertPoint())
863 // threadprivate_var1 = master_threadprivate_var1;
864 // operator=(threadprivate_var2, master_threadprivate_var2);
866 // __kmpc_barrier(&loc, global_tid);
867 llvm::DenseSet<const VarDecl *> CopiedVars;
868 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
869 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
870 auto IRef = C->varlist_begin();
871 auto ISrcRef = C->source_exprs().begin();
872 auto IDestRef = C->destination_exprs().begin();
873 for (auto *AssignOp : C->assignment_ops()) {
874 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
875 QualType Type = VD->getType();
876 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
877 // Get the address of the master variable. If we are emitting code with
878 // TLS support, the address is passed from the master as field in the
879 // captured declaration.
880 Address MasterAddr = Address::invalid();
881 if (getLangOpts().OpenMPUseTLS &&
882 getContext().getTargetInfo().isTLSSupported()) {
883 assert(CapturedStmtInfo->lookup(VD) &&
884 "Copyin threadprivates should have been captured!");
885 DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
886 VK_LValue, (*IRef)->getExprLoc());
887 MasterAddr = EmitLValue(&DRE).getAddress();
888 LocalDeclMap.erase(VD);
891 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
892 : CGM.GetAddrOfGlobal(VD),
893 getContext().getDeclAlign(VD));
895 // Get the address of the threadprivate variable.
896 Address PrivateAddr = EmitLValue(*IRef).getAddress();
897 if (CopiedVars.size() == 1) {
898 // At first check if current thread is a master thread. If it is, no
899 // need to copy data.
900 CopyBegin = createBasicBlock("copyin.not.master");
901 CopyEnd = createBasicBlock("copyin.not.master.end");
902 Builder.CreateCondBr(
903 Builder.CreateICmpNE(
904 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
905 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
907 EmitBlock(CopyBegin);
909 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
910 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
911 EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
919 // Exit out of copying procedure for non-master thread.
920 EmitBlock(CopyEnd, /*IsFinished=*/true);
926 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
927 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
928 if (!HaveInsertPoint())
930 bool HasAtLeastOneLastprivate = false;
931 llvm::DenseSet<const VarDecl *> SIMDLCVs;
932 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
933 auto *LoopDirective = cast<OMPLoopDirective>(&D);
934 for (auto *C : LoopDirective->counters()) {
936 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
939 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
940 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
941 HasAtLeastOneLastprivate = true;
942 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()))
944 auto IRef = C->varlist_begin();
945 auto IDestRef = C->destination_exprs().begin();
946 for (auto *IInit : C->private_copies()) {
947 // Keep the address of the original variable for future update at the end
949 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
950 // Taskloops do not require additional initialization, it is done in
951 // runtime support library.
952 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
953 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
954 PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
956 const_cast<VarDecl *>(OrigVD),
957 /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
959 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
960 return EmitLValue(&DRE).getAddress();
962 // Check if the variable is also a firstprivate: in this case IInit is
963 // not generated. Initialization of this variable will happen in codegen
964 // for 'firstprivate' clause.
965 if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
966 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
967 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
968 // Emit private VarDecl with copy init.
970 return GetAddrOfLocalVar(VD);
972 assert(IsRegistered &&
973 "lastprivate var already registered as private");
981 return HasAtLeastOneLastprivate;
984 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
985 const OMPExecutableDirective &D, bool NoFinals,
986 llvm::Value *IsLastIterCond) {
987 if (!HaveInsertPoint())
989 // Emit following code:
990 // if (<IsLastIterCond>) {
991 // orig_var1 = private_orig_var1;
993 // orig_varn = private_orig_varn;
995 llvm::BasicBlock *ThenBB = nullptr;
996 llvm::BasicBlock *DoneBB = nullptr;
997 if (IsLastIterCond) {
998 ThenBB = createBasicBlock(".omp.lastprivate.then");
999 DoneBB = createBasicBlock(".omp.lastprivate.done");
1000 Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB);
1003 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1004 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1005 if (auto *LoopDirective = dyn_cast<OMPLoopDirective>(&D)) {
1006 auto IC = LoopDirective->counters().begin();
1007 for (auto F : LoopDirective->finals()) {
1009 cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl())->getCanonicalDecl();
1011 AlreadyEmittedVars.insert(D);
1013 LoopCountersAndUpdates[D] = F;
1017 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1018 auto IRef = C->varlist_begin();
1019 auto ISrcRef = C->source_exprs().begin();
1020 auto IDestRef = C->destination_exprs().begin();
1021 for (auto *AssignOp : C->assignment_ops()) {
1022 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1023 QualType Type = PrivateVD->getType();
1024 auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1025 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1026 // If lastprivate variable is a loop control variable for loop-based
1027 // directive, update its value before copyin back to original
1029 if (auto *FinalExpr = LoopCountersAndUpdates.lookup(CanonicalVD))
1030 EmitIgnoredExpr(FinalExpr);
1031 auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
1032 auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
1033 // Get the address of the original variable.
1034 Address OriginalAddr = GetAddrOfLocalVar(DestVD);
1035 // Get the address of the private variable.
1036 Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
1037 if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1039 Address(Builder.CreateLoad(PrivateAddr),
1040 getNaturalTypeAlignment(RefTy->getPointeeType()));
1041 EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
1047 if (auto *PostUpdate = C->getPostUpdateExpr())
1048 EmitIgnoredExpr(PostUpdate);
1051 EmitBlock(DoneBB, /*IsFinished=*/true);
1054 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1055 LValue BaseLV, llvm::Value *Addr) {
1056 Address Tmp = Address::invalid();
1057 Address TopTmp = Address::invalid();
1058 Address MostTopTmp = Address::invalid();
1059 BaseTy = BaseTy.getNonReferenceType();
1060 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1061 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1062 Tmp = CGF.CreateMemTemp(BaseTy);
1063 if (TopTmp.isValid())
1064 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1068 BaseTy = BaseTy->getPointeeType();
1070 llvm::Type *Ty = BaseLV.getPointer()->getType();
1072 Ty = Tmp.getElementType();
1073 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1074 if (Tmp.isValid()) {
1075 CGF.Builder.CreateStore(Addr, Tmp);
1078 return Address(Addr, BaseLV.getAlignment());
1081 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
1083 BaseTy = BaseTy.getNonReferenceType();
1084 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1085 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1086 if (auto *PtrTy = BaseTy->getAs<PointerType>())
1087 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1089 BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
1090 BaseTy->castAs<ReferenceType>());
1092 BaseTy = BaseTy->getPointeeType();
1094 return CGF.MakeAddrLValue(
1096 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1097 BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
1098 BaseLV.getAlignment()),
1099 BaseLV.getType(), BaseLV.getBaseInfo());
1102 void CodeGenFunction::EmitOMPReductionClauseInit(
1103 const OMPExecutableDirective &D,
1104 CodeGenFunction::OMPPrivateScope &PrivateScope) {
1105 if (!HaveInsertPoint())
1107 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1108 auto ILHS = C->lhs_exprs().begin();
1109 auto IRHS = C->rhs_exprs().begin();
1110 auto IPriv = C->privates().begin();
1111 auto IRed = C->reduction_ops().begin();
1112 for (auto IRef : C->varlists()) {
1113 auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
1114 auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
1115 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
1116 auto *DRD = getReductionInit(*IRed);
1117 if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
1118 auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1119 while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1120 Base = TempOASE->getBase()->IgnoreParenImpCasts();
1121 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1122 Base = TempASE->getBase()->IgnoreParenImpCasts();
1123 auto *DE = cast<DeclRefExpr>(Base);
1124 auto *OrigVD = cast<VarDecl>(DE->getDecl());
1125 auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
1127 EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
1128 auto OriginalBaseLValue = EmitLValue(DE);
1130 loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
1131 OriginalBaseLValue);
1132 // Store the address of the original variable associated with the LHS
1133 // implicit variable.
1134 PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address {
1135 return OASELValueLB.getAddress();
1137 // Emit reduction copy.
1138 bool IsRegistered = PrivateScope.addPrivate(
1139 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
1140 OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
1141 // Emit VarDecl with copy init for arrays.
1142 // Get the address of the original variable captured in current
1144 auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
1145 OASELValueLB.getPointer());
1146 Size = Builder.CreateNUWAdd(
1147 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1148 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1149 *this, cast<OpaqueValueExpr>(
1151 .getAsVariableArrayType(PrivateVD->getType())
1154 EmitVariablyModifiedType(PrivateVD->getType());
1155 auto Emission = EmitAutoVarAlloca(*PrivateVD);
1156 auto Addr = Emission.getAllocatedAddress();
1157 auto *Init = PrivateVD->getInit();
1158 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1160 OASELValueLB.getAddress());
1161 EmitAutoVarCleanups(Emission);
1162 // Emit private VarDecl with reduction init.
1163 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1164 OASELValueLB.getPointer());
1165 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1166 return castToBase(*this, OrigVD->getType(),
1167 OASELValueLB.getType(), OriginalBaseLValue,
1170 assert(IsRegistered && "private var already registered as private");
1171 // Silence the warning about unused variable.
1173 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1174 return GetAddrOfLocalVar(PrivateVD);
1176 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
1177 auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1178 while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1179 Base = TempASE->getBase()->IgnoreParenImpCasts();
1180 auto *DE = cast<DeclRefExpr>(Base);
1181 auto *OrigVD = cast<VarDecl>(DE->getDecl());
1182 auto ASELValue = EmitLValue(ASE);
1183 auto OriginalBaseLValue = EmitLValue(DE);
1184 LValue BaseLValue = loadToBegin(
1185 *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
1186 // Store the address of the original variable associated with the LHS
1187 // implicit variable.
1188 PrivateScope.addPrivate(
1189 LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); });
1190 // Emit reduction copy.
1191 bool IsRegistered = PrivateScope.addPrivate(
1192 OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
1193 OriginalBaseLValue, DRD, IRed]() -> Address {
1194 // Emit private VarDecl with reduction init.
1195 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1196 auto Addr = Emission.getAllocatedAddress();
1197 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1198 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1199 ASELValue.getAddress(),
1200 ASELValue.getType());
1202 EmitAutoVarInit(Emission);
1203 EmitAutoVarCleanups(Emission);
1204 auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
1205 ASELValue.getPointer());
1206 auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
1207 return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
1208 OriginalBaseLValue, Ptr);
1210 assert(IsRegistered && "private var already registered as private");
1211 // Silence the warning about unused variable.
1213 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1214 return Builder.CreateElementBitCast(
1215 GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
1219 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
1220 QualType Type = PrivateVD->getType();
1221 if (getContext().getAsArrayType(Type)) {
1222 // Store the address of the original variable associated with the LHS
1223 // implicit variable.
1224 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1225 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1226 IRef->getType(), VK_LValue, IRef->getExprLoc());
1227 Address OriginalAddr = EmitLValue(&DRE).getAddress();
1228 PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
1229 LHSVD]() -> Address {
1230 OriginalAddr = Builder.CreateElementBitCast(
1231 OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
1232 return OriginalAddr;
1234 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
1235 if (Type->isVariablyModifiedType()) {
1236 CodeGenFunction::OpaqueValueMapping OpaqueMap(
1237 *this, cast<OpaqueValueExpr>(
1239 .getAsVariableArrayType(PrivateVD->getType())
1242 getTypeSize(OrigVD->getType().getNonReferenceType())));
1243 EmitVariablyModifiedType(Type);
1245 auto Emission = EmitAutoVarAlloca(*PrivateVD);
1246 auto Addr = Emission.getAllocatedAddress();
1247 auto *Init = PrivateVD->getInit();
1248 EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
1249 DRD ? *IRed : Init, OriginalAddr);
1250 EmitAutoVarCleanups(Emission);
1251 return Emission.getAllocatedAddress();
1253 assert(IsRegistered && "private var already registered as private");
1254 // Silence the warning about unused variable.
1256 PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
1257 return Builder.CreateElementBitCast(
1258 GetAddrOfLocalVar(PrivateVD),
1259 ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
1262 // Store the address of the original variable associated with the LHS
1263 // implicit variable.
1264 Address OriginalAddr = Address::invalid();
1265 PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
1266 &OriginalAddr]() -> Address {
1267 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1268 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1269 IRef->getType(), VK_LValue, IRef->getExprLoc());
1270 OriginalAddr = EmitLValue(&DRE).getAddress();
1271 return OriginalAddr;
1273 // Emit reduction copy.
1274 bool IsRegistered = PrivateScope.addPrivate(
1275 OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
1276 // Emit private VarDecl with reduction init.
1277 AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
1278 auto Addr = Emission.getAllocatedAddress();
1279 if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1280 emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
1282 PrivateVD->getType());
1284 EmitAutoVarInit(Emission);
1285 EmitAutoVarCleanups(Emission);
1288 assert(IsRegistered && "private var already registered as private");
1289 // Silence the warning about unused variable.
1291 PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
1292 return GetAddrOfLocalVar(PrivateVD);
1304 void CodeGenFunction::EmitOMPReductionClauseFinal(
1305 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1306 if (!HaveInsertPoint())
1308 llvm::SmallVector<const Expr *, 8> Privates;
1309 llvm::SmallVector<const Expr *, 8> LHSExprs;
1310 llvm::SmallVector<const Expr *, 8> RHSExprs;
1311 llvm::SmallVector<const Expr *, 8> ReductionOps;
1312 bool HasAtLeastOneReduction = false;
1313 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1314 HasAtLeastOneReduction = true;
1315 Privates.append(C->privates().begin(), C->privates().end());
1316 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1317 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1318 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1320 if (HasAtLeastOneReduction) {
1321 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1322 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1323 D.getDirectiveKind() == OMPD_simd;
1324 bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
1325 // Emit nowait reduction if nowait clause is present or directive is a
1326 // parallel directive (it always has implicit barrier).
1327 CGM.getOpenMPRuntime().emitReduction(
1328 *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
1329 {WithNowait, SimpleReduction, ReductionKind});
1333 static void emitPostUpdateForReductionClause(
1334 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1335 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1336 if (!CGF.HaveInsertPoint())
1338 llvm::BasicBlock *DoneBB = nullptr;
1339 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1340 if (auto *PostUpdate = C->getPostUpdateExpr()) {
1342 if (auto *Cond = CondGen(CGF)) {
1343 // If the first post-update expression is found, emit conditional
1344 // block if it was requested.
1345 auto *ThenBB = CGF.createBasicBlock(".omp.reduction.pu");
1346 DoneBB = CGF.createBasicBlock(".omp.reduction.pu.done");
1347 CGF.Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1348 CGF.EmitBlock(ThenBB);
1351 CGF.EmitIgnoredExpr(PostUpdate);
1355 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
1359 /// Codegen lambda for appending distribute lower and upper bounds to outlined
1360 /// parallel function. This is necessary for combined constructs such as
1361 /// 'distribute parallel for'
1362 typedef llvm::function_ref<void(CodeGenFunction &,
1363 const OMPExecutableDirective &,
1364 llvm::SmallVectorImpl<llvm::Value *> &)>
1365 CodeGenBoundParametersTy;
1366 } // anonymous namespace
1368 static void emitCommonOMPParallelDirective(
1369 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1370 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1371 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1372 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1373 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1374 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
1375 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1376 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1377 auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
1378 /*IgnoreResultAssign*/ true);
1379 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1380 CGF, NumThreads, NumThreadsClause->getLocStart());
1382 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1383 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1384 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1385 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
1387 const Expr *IfCond = nullptr;
1388 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1389 if (C->getNameModifier() == OMPD_unknown ||
1390 C->getNameModifier() == OMPD_parallel) {
1391 IfCond = C->getCondition();
1396 OMPParallelScope Scope(CGF, S);
1397 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1398 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1399 // lower and upper bounds with the pragma 'for' chunking mechanism.
1400 // The following lambda takes care of appending the lower and upper bound
1401 // parameters when necessary
1402 CodeGenBoundParameters(CGF, S, CapturedVars);
1403 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1404 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
1405 CapturedVars, IfCond);
1408 static void emitEmptyBoundParameters(CodeGenFunction &,
1409 const OMPExecutableDirective &,
1410 llvm::SmallVectorImpl<llvm::Value *> &) {}
1412 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1413 // Emit parallel region as a standalone region.
1414 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1415 OMPPrivateScope PrivateScope(CGF);
1416 bool Copyins = CGF.EmitOMPCopyinClause(S);
1417 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1419 // Emit implicit barrier to synchronize threads and avoid data races on
1420 // propagation master's thread values of threadprivate variables to local
1421 // instances of that variables of all other implicit threads.
1422 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1423 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
1424 /*ForceSimpleCall=*/true);
1426 CGF.EmitOMPPrivateClause(S, PrivateScope);
1427 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1428 (void)PrivateScope.Privatize();
1429 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
1430 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1432 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1433 emitEmptyBoundParameters);
1434 emitPostUpdateForReductionClause(
1435 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1438 void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1439 JumpDest LoopExit) {
1440 RunCleanupsScope BodyScope(*this);
1441 // Update counters values on current iteration.
1442 for (auto I : D.updates()) {
1445 // Update the linear variables.
1446 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1447 for (auto *U : C->updates())
1451 // On a continue in the body, jump to the end.
1452 auto Continue = getJumpDestInCurrentScope("omp.body.continue");
1453 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1455 EmitStmt(D.getBody());
1456 // The end (updates/cleanups).
1457 EmitBlock(Continue.getBlock());
1458 BreakContinueStack.pop_back();
1461 void CodeGenFunction::EmitOMPInnerLoop(
1462 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
1463 const Expr *IncExpr,
1464 const llvm::function_ref<void(CodeGenFunction &)> &BodyGen,
1465 const llvm::function_ref<void(CodeGenFunction &)> &PostIncGen) {
1466 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1468 // Start the loop with a block that tests the condition.
1469 auto CondBlock = createBasicBlock("omp.inner.for.cond");
1470 EmitBlock(CondBlock);
1471 const SourceRange &R = S.getSourceRange();
1472 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1473 SourceLocToDebugLoc(R.getEnd()));
1475 // If there are any cleanups between here and the loop-exit scope,
1476 // create a block to stage a loop exit along.
1477 auto ExitBlock = LoopExit.getBlock();
1478 if (RequiresCleanup)
1479 ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup");
1481 auto LoopBody = createBasicBlock("omp.inner.for.body");
1484 EmitBranchOnBoolExpr(LoopCond, LoopBody, ExitBlock, getProfileCount(&S));
1485 if (ExitBlock != LoopExit.getBlock()) {
1486 EmitBlock(ExitBlock);
1487 EmitBranchThroughCleanup(LoopExit);
1490 EmitBlock(LoopBody);
1491 incrementProfileCounter(&S);
1493 // Create a block for the increment.
1494 auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc");
1495 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1499 // Emit "IV = IV + 1" and a back-edge to the condition block.
1500 EmitBlock(Continue.getBlock());
1501 EmitIgnoredExpr(IncExpr);
1503 BreakContinueStack.pop_back();
1504 EmitBranch(CondBlock);
1506 // Emit the fall-through block.
1507 EmitBlock(LoopExit.getBlock());
1510 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
1511 if (!HaveInsertPoint())
1513 // Emit inits for the linear variables.
1514 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1515 for (auto *Init : C->inits()) {
1516 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
1517 if (auto *Ref = dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
1518 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
1519 auto *OrigVD = cast<VarDecl>(Ref->getDecl());
1520 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1521 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1522 VD->getInit()->getType(), VK_LValue,
1523 VD->getInit()->getExprLoc());
1524 EmitExprAsInit(&DRE, VD, MakeAddrLValue(Emission.getAllocatedAddress(),
1526 /*capturedByInit=*/false);
1527 EmitAutoVarCleanups(Emission);
1531 // Emit the linear steps for the linear clauses.
1532 // If a step is not constant, it is pre-calculated before the loop.
1533 if (auto CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
1534 if (auto SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
1535 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
1536 // Emit calculation of the linear step.
1537 EmitIgnoredExpr(CS);
1542 void CodeGenFunction::EmitOMPLinearClauseFinal(
1543 const OMPLoopDirective &D,
1544 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1545 if (!HaveInsertPoint())
1547 llvm::BasicBlock *DoneBB = nullptr;
1548 // Emit the final values of the linear variables.
1549 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1550 auto IC = C->varlist_begin();
1551 for (auto *F : C->finals()) {
1553 if (auto *Cond = CondGen(*this)) {
1554 // If the first post-update expression is found, emit conditional
1555 // block if it was requested.
1556 auto *ThenBB = createBasicBlock(".omp.linear.pu");
1557 DoneBB = createBasicBlock(".omp.linear.pu.done");
1558 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1562 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
1563 DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
1564 CapturedStmtInfo->lookup(OrigVD) != nullptr,
1565 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
1566 Address OrigAddr = EmitLValue(&DRE).getAddress();
1567 CodeGenFunction::OMPPrivateScope VarScope(*this);
1568 VarScope.addPrivate(OrigVD, [OrigAddr]() -> Address { return OrigAddr; });
1569 (void)VarScope.Privatize();
1573 if (auto *PostUpdate = C->getPostUpdateExpr())
1574 EmitIgnoredExpr(PostUpdate);
1577 EmitBlock(DoneBB, /*IsFinished=*/true);
1580 static void emitAlignedClause(CodeGenFunction &CGF,
1581 const OMPExecutableDirective &D) {
1582 if (!CGF.HaveInsertPoint())
1584 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
1585 unsigned ClauseAlignment = 0;
1586 if (auto AlignmentExpr = Clause->getAlignment()) {
1588 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
1589 ClauseAlignment = static_cast<unsigned>(AlignmentCI->getZExtValue());
1591 for (auto E : Clause->varlists()) {
1592 unsigned Alignment = ClauseAlignment;
1593 if (Alignment == 0) {
1594 // OpenMP [2.8.1, Description]
1595 // If no optional parameter is specified, implementation-defined default
1596 // alignments for SIMD instructions on the target platforms are assumed.
1599 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
1600 E->getType()->getPointeeType()))
1603 assert((Alignment == 0 || llvm::isPowerOf2_32(Alignment)) &&
1604 "alignment is not power of 2");
1605 if (Alignment != 0) {
1606 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
1607 CGF.EmitAlignmentAssumption(PtrValue, Alignment);
1613 void CodeGenFunction::EmitOMPPrivateLoopCounters(
1614 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
1615 if (!HaveInsertPoint())
1617 auto I = S.private_counters().begin();
1618 for (auto *E : S.counters()) {
1619 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1620 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
1621 (void)LoopScope.addPrivate(VD, [&]() -> Address {
1622 // Emit var without initialization.
1623 if (!LocalDeclMap.count(PrivateVD)) {
1624 auto VarEmission = EmitAutoVarAlloca(*PrivateVD);
1625 EmitAutoVarCleanups(VarEmission);
1627 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1628 /*RefersToEnclosingVariableOrCapture=*/false,
1629 (*I)->getType(), VK_LValue, (*I)->getExprLoc());
1630 return EmitLValue(&DRE).getAddress();
1632 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
1633 VD->hasGlobalStorage()) {
1634 (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
1635 DeclRefExpr DRE(const_cast<VarDecl *>(VD),
1636 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
1637 E->getType(), VK_LValue, E->getExprLoc());
1638 return EmitLValue(&DRE).getAddress();
1645 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
1646 const Expr *Cond, llvm::BasicBlock *TrueBlock,
1647 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
1648 if (!CGF.HaveInsertPoint())
1651 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
1652 CGF.EmitOMPPrivateLoopCounters(S, PreCondScope);
1653 (void)PreCondScope.Privatize();
1654 // Get initial values of real counters.
1655 for (auto I : S.inits()) {
1656 CGF.EmitIgnoredExpr(I);
1659 // Check that loop is executed at least one time.
1660 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
1663 void CodeGenFunction::EmitOMPLinearClause(
1664 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
1665 if (!HaveInsertPoint())
1667 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1668 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1669 auto *LoopDirective = cast<OMPLoopDirective>(&D);
1670 for (auto *C : LoopDirective->counters()) {
1672 cast<VarDecl>(cast<DeclRefExpr>(C)->getDecl())->getCanonicalDecl());
1675 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1676 auto CurPrivate = C->privates().begin();
1677 for (auto *E : C->varlists()) {
1678 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
1680 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
1681 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
1682 bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
1683 // Emit private VarDecl with copy init.
1684 EmitVarDecl(*PrivateVD);
1685 return GetAddrOfLocalVar(PrivateVD);
1687 assert(IsRegistered && "linear var already registered as private");
1688 // Silence the warning about unused variable.
1691 EmitVarDecl(*PrivateVD);
1697 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
1698 const OMPExecutableDirective &D,
1700 if (!CGF.HaveInsertPoint())
1702 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
1703 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
1704 /*ignoreResult=*/true);
1705 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1706 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1707 // In presence of finite 'safelen', it may be unsafe to mark all
1708 // the memory instructions parallel, because loop-carried
1709 // dependences of 'safelen' iterations are possible.
1711 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
1712 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
1713 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
1714 /*ignoreResult=*/true);
1715 llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
1716 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
1717 // In presence of finite 'safelen', it may be unsafe to mark all
1718 // the memory instructions parallel, because loop-carried
1719 // dependences of 'safelen' iterations are possible.
1720 CGF.LoopStack.setParallel(false);
1724 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
1726 // Walk clauses and process safelen/lastprivate.
1727 LoopStack.setParallel(!IsMonotonic);
1728 LoopStack.setVectorizeEnable(true);
1729 emitSimdlenSafelenClause(*this, D, IsMonotonic);
1732 void CodeGenFunction::EmitOMPSimdFinal(
1733 const OMPLoopDirective &D,
1734 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen) {
1735 if (!HaveInsertPoint())
1737 llvm::BasicBlock *DoneBB = nullptr;
1738 auto IC = D.counters().begin();
1739 auto IPC = D.private_counters().begin();
1740 for (auto F : D.finals()) {
1741 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
1742 auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>((*IPC))->getDecl());
1743 auto *CED = dyn_cast<OMPCapturedExprDecl>(OrigVD);
1744 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD) ||
1745 OrigVD->hasGlobalStorage() || CED) {
1747 if (auto *Cond = CondGen(*this)) {
1748 // If the first post-update expression is found, emit conditional
1749 // block if it was requested.
1750 auto *ThenBB = createBasicBlock(".omp.final.then");
1751 DoneBB = createBasicBlock(".omp.final.done");
1752 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
1756 Address OrigAddr = Address::invalid();
1758 OrigAddr = EmitLValue(CED->getInit()->IgnoreImpCasts()).getAddress();
1760 DeclRefExpr DRE(const_cast<VarDecl *>(PrivateVD),
1761 /*RefersToEnclosingVariableOrCapture=*/false,
1762 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
1763 OrigAddr = EmitLValue(&DRE).getAddress();
1765 OMPPrivateScope VarScope(*this);
1766 VarScope.addPrivate(OrigVD,
1767 [OrigAddr]() -> Address { return OrigAddr; });
1768 (void)VarScope.Privatize();
1775 EmitBlock(DoneBB, /*IsFinished=*/true);
1778 static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
1779 const OMPLoopDirective &S,
1780 CodeGenFunction::JumpDest LoopExit) {
1781 CGF.EmitOMPLoopBody(S, LoopExit);
1782 CGF.EmitStopPoint(&S);
1785 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
1786 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
1787 OMPLoopScope PreInitScope(CGF, S);
1789 // for (IV in 0..LastIteration) BODY;
1790 // <Final counter/linear vars updates>;
1794 // Emit: if (PreCond) - begin.
1795 // If the condition constant folds and can be elided, avoid emitting the
1798 llvm::BasicBlock *ContBlock = nullptr;
1799 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
1803 auto *ThenBlock = CGF.createBasicBlock("simd.if.then");
1804 ContBlock = CGF.createBasicBlock("simd.if.end");
1805 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
1806 CGF.getProfileCount(&S));
1807 CGF.EmitBlock(ThenBlock);
1808 CGF.incrementProfileCounter(&S);
1811 // Emit the loop iteration variable.
1812 const Expr *IVExpr = S.getIterationVariable();
1813 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
1814 CGF.EmitVarDecl(*IVDecl);
1815 CGF.EmitIgnoredExpr(S.getInit());
1817 // Emit the iterations count variable.
1818 // If it is not a variable, Sema decided to calculate iterations count on
1819 // each iteration (e.g., it is foldable into a constant).
1820 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
1821 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
1822 // Emit calculation of the iterations count.
1823 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
1826 CGF.EmitOMPSimdInit(S);
1828 emitAlignedClause(CGF, S);
1829 CGF.EmitOMPLinearClauseInit(S);
1831 OMPPrivateScope LoopScope(CGF);
1832 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
1833 CGF.EmitOMPLinearClause(S, LoopScope);
1834 CGF.EmitOMPPrivateClause(S, LoopScope);
1835 CGF.EmitOMPReductionClauseInit(S, LoopScope);
1836 bool HasLastprivateClause =
1837 CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
1838 (void)LoopScope.Privatize();
1839 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
1841 [&S](CodeGenFunction &CGF) {
1842 CGF.EmitOMPLoopBody(S, JumpDest());
1843 CGF.EmitStopPoint(&S);
1845 [](CodeGenFunction &) {});
1846 CGF.EmitOMPSimdFinal(
1847 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1848 // Emit final copy of the lastprivate variables at the end of loops.
1849 if (HasLastprivateClause)
1850 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
1851 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
1852 emitPostUpdateForReductionClause(
1853 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1855 CGF.EmitOMPLinearClauseFinal(
1856 S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
1857 // Emit: if (PreCond) - end.
1859 CGF.EmitBranch(ContBlock);
1860 CGF.EmitBlock(ContBlock, true);
1863 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
1864 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
1867 void CodeGenFunction::EmitOMPOuterLoop(
1868 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
1869 CodeGenFunction::OMPPrivateScope &LoopScope,
1870 const CodeGenFunction::OMPLoopArguments &LoopArgs,
1871 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
1872 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
1873 auto &RT = CGM.getOpenMPRuntime();
1875 const Expr *IVExpr = S.getIterationVariable();
1876 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
1877 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
1879 auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
1881 // Start the loop with a block that tests the condition.
1882 auto CondBlock = createBasicBlock("omp.dispatch.cond");
1883 EmitBlock(CondBlock);
1884 const SourceRange &R = S.getSourceRange();
1885 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
1886 SourceLocToDebugLoc(R.getEnd()));
1888 llvm::Value *BoolCondVal = nullptr;
1889 if (!DynamicOrOrdered) {
1890 // UB = min(UB, GlobalUB) or
1891 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
1892 // 'distribute parallel for')
1893 EmitIgnoredExpr(LoopArgs.EUB);
1895 EmitIgnoredExpr(LoopArgs.Init);
1897 BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
1900 RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
1901 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
1904 // If there are any cleanups between here and the loop-exit scope,
1905 // create a block to stage a loop exit along.
1906 auto ExitBlock = LoopExit.getBlock();
1907 if (LoopScope.requiresCleanups())
1908 ExitBlock = createBasicBlock("omp.dispatch.cleanup");
1910 auto LoopBody = createBasicBlock("omp.dispatch.body");
1911 Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock);
1912 if (ExitBlock != LoopExit.getBlock()) {
1913 EmitBlock(ExitBlock);
1914 EmitBranchThroughCleanup(LoopExit);
1916 EmitBlock(LoopBody);
1918 // Emit "IV = LB" (in case of static schedule, we have already calculated new
1919 // LB for loop condition and emitted it above).
1920 if (DynamicOrOrdered)
1921 EmitIgnoredExpr(LoopArgs.Init);
1923 // Create a block for the increment.
1924 auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
1925 BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
1927 // Generate !llvm.loop.parallel metadata for loads and stores for loops
1928 // with dynamic/guided scheduling and without ordered clause.
1929 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
1930 LoopStack.setParallel(!IsMonotonic);
1932 EmitOMPSimdInit(S, IsMonotonic);
1934 SourceLocation Loc = S.getLocStart();
1936 // when 'distribute' is not combined with a 'for':
1937 // while (idx <= UB) { BODY; ++idx; }
1938 // when 'distribute' is combined with a 'for'
1939 // (e.g. 'distribute parallel for')
1940 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
1942 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
1943 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
1944 CodeGenLoop(CGF, S, LoopExit);
1946 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
1947 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
1950 EmitBlock(Continue.getBlock());
1951 BreakContinueStack.pop_back();
1952 if (!DynamicOrOrdered) {
1953 // Emit "LB = LB + Stride", "UB = UB + Stride".
1954 EmitIgnoredExpr(LoopArgs.NextLB);
1955 EmitIgnoredExpr(LoopArgs.NextUB);
1958 EmitBranch(CondBlock);
1960 // Emit the fall-through block.
1961 EmitBlock(LoopExit.getBlock());
1963 // Tell the runtime we are done.
1964 auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
1965 if (!DynamicOrOrdered)
1966 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
1968 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
1971 void CodeGenFunction::EmitOMPForOuterLoop(
1972 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
1973 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
1974 const OMPLoopArguments &LoopArgs,
1975 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
1976 auto &RT = CGM.getOpenMPRuntime();
1978 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
1979 const bool DynamicOrOrdered =
1980 Ordered || RT.isDynamic(ScheduleKind.Schedule);
1983 !RT.isStaticNonchunked(ScheduleKind.Schedule,
1984 LoopArgs.Chunk != nullptr)) &&
1985 "static non-chunked schedule does not need outer loop");
1989 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
1990 // When schedule(dynamic,chunk_size) is specified, the iterations are
1991 // distributed to threads in the team in chunks as the threads request them.
1992 // Each thread executes a chunk of iterations, then requests another chunk,
1993 // until no chunks remain to be distributed. Each chunk contains chunk_size
1994 // iterations, except for the last chunk to be distributed, which may have
1995 // fewer iterations. When no chunk_size is specified, it defaults to 1.
1997 // When schedule(guided,chunk_size) is specified, the iterations are assigned
1998 // to threads in the team in chunks as the executing threads request them.
1999 // Each thread executes a chunk of iterations, then requests another chunk,
2000 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2001 // each chunk is proportional to the number of unassigned iterations divided
2002 // by the number of threads in the team, decreasing to 1. For a chunk_size
2003 // with value k (greater than 1), the size of each chunk is determined in the
2004 // same way, with the restriction that the chunks do not contain fewer than k
2005 // iterations (except for the last chunk to be assigned, which may have fewer
2006 // than k iterations).
2008 // When schedule(auto) is specified, the decision regarding scheduling is
2009 // delegated to the compiler and/or runtime system. The programmer gives the
2010 // implementation the freedom to choose any possible mapping of iterations to
2011 // threads in the team.
2013 // When schedule(runtime) is specified, the decision regarding scheduling is
2014 // deferred until run time, and the schedule and chunk size are taken from the
2015 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2016 // implementation defined
2018 // while(__kmpc_dispatch_next(&LB, &UB)) {
2020 // while (idx <= UB) { BODY; ++idx;
2021 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2025 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2026 // When schedule(static, chunk_size) is specified, iterations are divided into
2027 // chunks of size chunk_size, and the chunks are assigned to the threads in
2028 // the team in a round-robin fashion in the order of the thread number.
2030 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2031 // while (idx <= UB) { BODY; ++idx; } // inner loop
2037 const Expr *IVExpr = S.getIterationVariable();
2038 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2039 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2041 if (DynamicOrOrdered) {
2042 auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
2043 llvm::Value *LBVal = DispatchBounds.first;
2044 llvm::Value *UBVal = DispatchBounds.second;
2045 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
2047 RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
2048 IVSigned, Ordered, DipatchRTInputValues);
2050 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
2051 Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
2052 LoopArgs.ST, LoopArgs.Chunk);
2055 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
2056 const unsigned IVSize,
2057 const bool IVSigned) {
2059 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
2064 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2065 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
2066 OuterLoopArgs.IncExpr = S.getInc();
2067 OuterLoopArgs.Init = S.getInit();
2068 OuterLoopArgs.Cond = S.getCond();
2069 OuterLoopArgs.NextLB = S.getNextLowerBound();
2070 OuterLoopArgs.NextUB = S.getNextUpperBound();
2071 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
2072 emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
2075 static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
2076 const unsigned IVSize, const bool IVSigned) {}
2078 void CodeGenFunction::EmitOMPDistributeOuterLoop(
2079 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
2080 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
2081 const CodeGenLoopTy &CodeGenLoopContent) {
2083 auto &RT = CGM.getOpenMPRuntime();
2086 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
2090 const Expr *IVExpr = S.getIterationVariable();
2091 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2092 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2094 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
2095 IVSigned, /* Ordered = */ false, LoopArgs.IL,
2096 LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
2099 // for combined 'distribute' and 'for' the increment expression of distribute
2100 // is store in DistInc. For 'distribute' alone, it is in Inc.
2102 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
2103 IncExpr = S.getDistInc();
2105 IncExpr = S.getInc();
2107 // this routine is shared by 'omp distribute parallel for' and
2108 // 'omp distribute': select the right EUB expression depending on the
2110 OMPLoopArguments OuterLoopArgs;
2111 OuterLoopArgs.LB = LoopArgs.LB;
2112 OuterLoopArgs.UB = LoopArgs.UB;
2113 OuterLoopArgs.ST = LoopArgs.ST;
2114 OuterLoopArgs.IL = LoopArgs.IL;
2115 OuterLoopArgs.Chunk = LoopArgs.Chunk;
2116 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2117 ? S.getCombinedEnsureUpperBound()
2118 : S.getEnsureUpperBound();
2119 OuterLoopArgs.IncExpr = IncExpr;
2120 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2121 ? S.getCombinedInit()
2123 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2124 ? S.getCombinedCond()
2126 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2127 ? S.getCombinedNextLowerBound()
2128 : S.getNextLowerBound();
2129 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
2130 ? S.getCombinedNextUpperBound()
2131 : S.getNextUpperBound();
2133 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
2134 LoopScope, OuterLoopArgs, CodeGenLoopContent,
2138 /// Emit a helper variable and return corresponding lvalue.
2139 static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2140 const DeclRefExpr *Helper) {
2141 auto VDecl = cast<VarDecl>(Helper->getDecl());
2142 CGF.EmitVarDecl(*VDecl);
2143 return CGF.EmitLValue(Helper);
2146 static std::pair<LValue, LValue>
2147 emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
2148 const OMPExecutableDirective &S) {
2149 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2151 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2153 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2155 // When composing 'distribute' with 'for' (e.g. as in 'distribute
2156 // parallel for') we need to use the 'distribute'
2157 // chunk lower and upper bounds rather than the whole loop iteration
2158 // space. These are parameters to the outlined function for 'parallel'
2159 // and we copy the bounds of the previous schedule into the
2160 // the current ones.
2161 LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
2162 LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
2163 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
2164 PrevLBVal = CGF.EmitScalarConversion(
2165 PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
2166 LS.getIterationVariable()->getType(), SourceLocation());
2167 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
2168 PrevUBVal = CGF.EmitScalarConversion(
2169 PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
2170 LS.getIterationVariable()->getType(), SourceLocation());
2172 CGF.EmitStoreOfScalar(PrevLBVal, LB);
2173 CGF.EmitStoreOfScalar(PrevUBVal, UB);
2178 /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
2179 /// we need to use the LB and UB expressions generated by the worksharing
2180 /// code generation support, whereas in non combined situations we would
2181 /// just emit 0 and the LastIteration expression
2182 /// This function is necessary due to the difference of the LB and UB
2183 /// types for the RT emission routines for 'for_static_init' and
2184 /// 'for_dispatch_init'
2185 static std::pair<llvm::Value *, llvm::Value *>
2186 emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
2187 const OMPExecutableDirective &S,
2188 Address LB, Address UB) {
2189 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2190 const Expr *IVExpr = LS.getIterationVariable();
2191 // when implementing a dynamic schedule for a 'for' combined with a
2192 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
2193 // is not normalized as each team only executes its own assigned
2195 QualType IteratorTy = IVExpr->getType();
2196 llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
2198 llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
2200 return {LBVal, UBVal};
2203 static void emitDistributeParallelForDistributeInnerBoundParams(
2204 CodeGenFunction &CGF, const OMPExecutableDirective &S,
2205 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
2206 const auto &Dir = cast<OMPLoopDirective>(S);
2208 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
2209 auto LBCast = CGF.Builder.CreateIntCast(
2210 CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2211 CapturedVars.push_back(LBCast);
2213 CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
2215 auto UBCast = CGF.Builder.CreateIntCast(
2216 CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
2217 CapturedVars.push_back(UBCast);
2221 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
2222 const OMPLoopDirective &S,
2223 CodeGenFunction::JumpDest LoopExit) {
2224 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
2225 PrePostActionTy &) {
2226 CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
2227 emitDistributeParallelForInnerBounds,
2228 emitDistributeParallelForDispatchBounds);
2231 emitCommonOMPParallelDirective(
2232 CGF, S, OMPD_for, CGInlinedWorksharingLoop,
2233 emitDistributeParallelForDistributeInnerBoundParams);
2236 void CodeGenFunction::EmitOMPDistributeParallelForDirective(
2237 const OMPDistributeParallelForDirective &S) {
2238 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2239 CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
2242 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2243 OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
2244 /*HasCancel=*/false);
2245 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
2246 /*HasCancel=*/false);
2249 void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
2250 const OMPDistributeParallelForSimdDirective &S) {
2251 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2252 CGM.getOpenMPRuntime().emitInlinedDirective(
2253 *this, OMPD_distribute_parallel_for_simd,
2254 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2255 OMPLoopScope PreInitScope(CGF, S);
2257 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2261 void CodeGenFunction::EmitOMPDistributeSimdDirective(
2262 const OMPDistributeSimdDirective &S) {
2263 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2264 CGM.getOpenMPRuntime().emitInlinedDirective(
2265 *this, OMPD_distribute_simd,
2266 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2267 OMPLoopScope PreInitScope(CGF, S);
2269 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2273 void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
2274 const OMPTargetParallelForSimdDirective &S) {
2275 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2276 CGM.getOpenMPRuntime().emitInlinedDirective(
2277 *this, OMPD_target_parallel_for_simd,
2278 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2279 OMPLoopScope PreInitScope(CGF, S);
2281 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2285 void CodeGenFunction::EmitOMPTargetSimdDirective(
2286 const OMPTargetSimdDirective &S) {
2287 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2288 CGM.getOpenMPRuntime().emitInlinedDirective(
2289 *this, OMPD_target_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2290 OMPLoopScope PreInitScope(CGF, S);
2292 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2296 void CodeGenFunction::EmitOMPTeamsDistributeDirective(
2297 const OMPTeamsDistributeDirective &S) {
2298 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2299 CGM.getOpenMPRuntime().emitInlinedDirective(
2300 *this, OMPD_teams_distribute,
2301 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2302 OMPLoopScope PreInitScope(CGF, S);
2304 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2308 void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
2309 const OMPTeamsDistributeSimdDirective &S) {
2310 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2311 CGM.getOpenMPRuntime().emitInlinedDirective(
2312 *this, OMPD_teams_distribute_simd,
2313 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2314 OMPLoopScope PreInitScope(CGF, S);
2316 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2320 void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
2321 const OMPTeamsDistributeParallelForSimdDirective &S) {
2322 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2323 CGM.getOpenMPRuntime().emitInlinedDirective(
2324 *this, OMPD_teams_distribute_parallel_for_simd,
2325 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2326 OMPLoopScope PreInitScope(CGF, S);
2328 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2332 void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
2333 const OMPTeamsDistributeParallelForDirective &S) {
2334 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2335 CGM.getOpenMPRuntime().emitInlinedDirective(
2336 *this, OMPD_teams_distribute_parallel_for,
2337 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2338 OMPLoopScope PreInitScope(CGF, S);
2340 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2344 void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
2345 const OMPTargetTeamsDistributeDirective &S) {
2346 CGM.getOpenMPRuntime().emitInlinedDirective(
2347 *this, OMPD_target_teams_distribute,
2348 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2350 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2354 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
2355 const OMPTargetTeamsDistributeParallelForDirective &S) {
2356 CGM.getOpenMPRuntime().emitInlinedDirective(
2357 *this, OMPD_target_teams_distribute_parallel_for,
2358 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2360 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2364 void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
2365 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
2366 CGM.getOpenMPRuntime().emitInlinedDirective(
2367 *this, OMPD_target_teams_distribute_parallel_for_simd,
2368 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2370 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2374 void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
2375 const OMPTargetTeamsDistributeSimdDirective &S) {
2376 CGM.getOpenMPRuntime().emitInlinedDirective(
2377 *this, OMPD_target_teams_distribute_simd,
2378 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2380 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2385 struct ScheduleKindModifiersTy {
2386 OpenMPScheduleClauseKind Kind;
2387 OpenMPScheduleClauseModifier M1;
2388 OpenMPScheduleClauseModifier M2;
2389 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
2390 OpenMPScheduleClauseModifier M1,
2391 OpenMPScheduleClauseModifier M2)
2392 : Kind(Kind), M1(M1), M2(M2) {}
2396 bool CodeGenFunction::EmitOMPWorksharingLoop(
2397 const OMPLoopDirective &S, Expr *EUB,
2398 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
2399 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2400 // Emit the loop iteration variable.
2401 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
2402 auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
2403 EmitVarDecl(*IVDecl);
2405 // Emit the iterations count variable.
2406 // If it is not a variable, Sema decided to calculate iterations count on each
2407 // iteration (e.g., it is foldable into a constant).
2408 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
2409 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
2410 // Emit calculation of the iterations count.
2411 EmitIgnoredExpr(S.getCalcLastIteration());
2414 auto &RT = CGM.getOpenMPRuntime();
2416 bool HasLastprivateClause;
2417 // Check pre-condition.
2419 OMPLoopScope PreInitScope(*this, S);
2420 // Skip the entire loop if we don't meet the precondition.
2421 // If the condition constant folds and can be elided, avoid emitting the
2424 llvm::BasicBlock *ContBlock = nullptr;
2425 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
2429 auto *ThenBlock = createBasicBlock("omp.precond.then");
2430 ContBlock = createBasicBlock("omp.precond.end");
2431 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
2432 getProfileCount(&S));
2433 EmitBlock(ThenBlock);
2434 incrementProfileCounter(&S);
2437 bool Ordered = false;
2438 if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
2439 if (OrderedClause->getNumForLoops())
2440 RT.emitDoacrossInit(*this, S);
2445 llvm::DenseSet<const Expr *> EmittedFinals;
2446 emitAlignedClause(*this, S);
2447 EmitOMPLinearClauseInit(S);
2448 // Emit helper vars inits.
2450 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
2451 LValue LB = Bounds.first;
2452 LValue UB = Bounds.second;
2454 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
2456 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
2458 // Emit 'then' code.
2460 OMPPrivateScope LoopScope(*this);
2461 if (EmitOMPFirstprivateClause(S, LoopScope)) {
2462 // Emit implicit barrier to synchronize threads and avoid data races on
2463 // initialization of firstprivate variables and post-update of
2464 // lastprivate variables.
2465 CGM.getOpenMPRuntime().emitBarrierCall(
2466 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2467 /*ForceSimpleCall=*/true);
2469 EmitOMPPrivateClause(S, LoopScope);
2470 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
2471 EmitOMPReductionClauseInit(S, LoopScope);
2472 EmitOMPPrivateLoopCounters(S, LoopScope);
2473 EmitOMPLinearClause(S, LoopScope);
2474 (void)LoopScope.Privatize();
2476 // Detect the loop schedule kind and chunk.
2477 llvm::Value *Chunk = nullptr;
2478 OpenMPScheduleTy ScheduleKind;
2479 if (auto *C = S.getSingleClause<OMPScheduleClause>()) {
2480 ScheduleKind.Schedule = C->getScheduleKind();
2481 ScheduleKind.M1 = C->getFirstScheduleModifier();
2482 ScheduleKind.M2 = C->getSecondScheduleModifier();
2483 if (const auto *Ch = C->getChunkSize()) {
2484 Chunk = EmitScalarExpr(Ch);
2485 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
2486 S.getIterationVariable()->getType(),
2490 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
2491 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2492 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
2493 // If the static schedule kind is specified or if the ordered clause is
2494 // specified, and if no monotonic modifier is specified, the effect will
2495 // be as if the monotonic modifier was specified.
2496 if (RT.isStaticNonchunked(ScheduleKind.Schedule,
2497 /* Chunked */ Chunk != nullptr) &&
2499 if (isOpenMPSimdDirective(S.getDirectiveKind()))
2500 EmitOMPSimdInit(S, /*IsMonotonic=*/true);
2501 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2502 // When no chunk_size is specified, the iteration space is divided into
2503 // chunks that are approximately equal in size, and at most one chunk is
2504 // distributed to each thread. Note that the size of the chunks is
2505 // unspecified in this case.
2506 RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
2507 IVSize, IVSigned, Ordered,
2508 IL.getAddress(), LB.getAddress(),
2509 UB.getAddress(), ST.getAddress());
2511 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2512 // UB = min(UB, GlobalUB);
2513 EmitIgnoredExpr(S.getEnsureUpperBound());
2515 EmitIgnoredExpr(S.getInit());
2516 // while (idx <= UB) { BODY; ++idx; }
2517 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
2519 [&S, LoopExit](CodeGenFunction &CGF) {
2520 CGF.EmitOMPLoopBody(S, LoopExit);
2521 CGF.EmitStopPoint(&S);
2523 [](CodeGenFunction &) {});
2524 EmitBlock(LoopExit.getBlock());
2525 // Tell the runtime we are done.
2526 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2527 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2529 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2531 const bool IsMonotonic =
2532 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2533 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2534 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2535 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2536 // Emit the outer loop, which requests its work chunk [LB..UB] from
2537 // runtime and runs the inner loop to process it.
2538 const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
2539 ST.getAddress(), IL.getAddress(),
2541 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
2542 LoopArguments, CGDispatchBounds);
2544 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2546 [&](CodeGenFunction &CGF) -> llvm::Value * {
2547 return CGF.Builder.CreateIsNotNull(
2548 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2551 EmitOMPReductionClauseFinal(
2552 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
2553 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
2554 : /*Parallel only*/ OMPD_parallel);
2555 // Emit post-update of the reduction variables if IsLastIter != 0.
2556 emitPostUpdateForReductionClause(
2557 *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2558 return CGF.Builder.CreateIsNotNull(
2559 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2561 // Emit final copy of the lastprivate variables if IsLastIter != 0.
2562 if (HasLastprivateClause)
2563 EmitOMPLastprivateClauseFinal(
2564 S, isOpenMPSimdDirective(S.getDirectiveKind()),
2565 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart())));
2567 EmitOMPLinearClauseFinal(S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2568 return CGF.Builder.CreateIsNotNull(
2569 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2571 // We're now done with the loop, so jump to the continuation block.
2573 EmitBranch(ContBlock);
2574 EmitBlock(ContBlock, true);
2577 return HasLastprivateClause;
2580 /// The following two functions generate expressions for the loop lower
2581 /// and upper bounds in case of static and dynamic (dispatch) schedule
2582 /// of the associated 'for' or 'distribute' loop.
2583 static std::pair<LValue, LValue>
2584 emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
2585 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2587 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
2589 EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
2593 /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
2594 /// consider the lower and upper bound expressions generated by the
2595 /// worksharing loop support, but we use 0 and the iteration space size as
2597 static std::pair<llvm::Value *, llvm::Value *>
2598 emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
2599 Address LB, Address UB) {
2600 const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
2601 const Expr *IVExpr = LS.getIterationVariable();
2602 const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
2603 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2604 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2605 return {LBVal, UBVal};
2608 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
2609 bool HasLastprivates = false;
2610 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2611 PrePostActionTy &) {
2612 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
2613 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2615 emitDispatchForLoopBounds);
2618 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2619 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2623 // Emit an implicit barrier at the end.
2624 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2625 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2629 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
2630 bool HasLastprivates = false;
2631 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
2632 PrePostActionTy &) {
2633 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2635 emitDispatchForLoopBounds);
2638 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2639 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2642 // Emit an implicit barrier at the end.
2643 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
2644 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
2648 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
2650 llvm::Value *Init = nullptr) {
2651 auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
2653 CGF.EmitStoreThroughLValue(RValue::get(Init), LVal, /*isInit*/ true);
2657 void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
2658 auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
2659 auto *CS = dyn_cast<CompoundStmt>(Stmt);
2660 bool HasLastprivates = false;
2661 auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF,
2662 PrePostActionTy &) {
2663 auto &C = CGF.CGM.getContext();
2664 auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2665 // Emit helper vars inits.
2666 LValue LB = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.lb.",
2667 CGF.Builder.getInt32(0));
2668 auto *GlobalUBVal = CS != nullptr ? CGF.Builder.getInt32(CS->size() - 1)
2669 : CGF.Builder.getInt32(0);
2671 createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.ub.", GlobalUBVal);
2672 LValue ST = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.st.",
2673 CGF.Builder.getInt32(1));
2674 LValue IL = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.il.",
2675 CGF.Builder.getInt32(0));
2677 LValue IV = createSectionLVal(CGF, KmpInt32Ty, ".omp.sections.iv.");
2678 OpaqueValueExpr IVRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2679 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
2680 OpaqueValueExpr UBRefExpr(S.getLocStart(), KmpInt32Ty, VK_LValue);
2681 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
2682 // Generate condition for loop.
2683 BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
2684 OK_Ordinary, S.getLocStart(), FPOptions());
2685 // Increment for loop counter.
2686 UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
2688 auto BodyGen = [Stmt, CS, &S, &IV](CodeGenFunction &CGF) {
2689 // Iterate through all sections and emit a switch construct:
2692 // <SectionStmt[0]>;
2695 // case <NumSection> - 1:
2696 // <SectionStmt[<NumSection> - 1]>;
2699 // .omp.sections.exit:
2700 auto *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
2701 auto *SwitchStmt = CGF.Builder.CreateSwitch(
2702 CGF.EmitLoadOfLValue(IV, S.getLocStart()).getScalarVal(), ExitBB,
2703 CS == nullptr ? 1 : CS->size());
2705 unsigned CaseNumber = 0;
2706 for (auto *SubStmt : CS->children()) {
2707 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2708 CGF.EmitBlock(CaseBB);
2709 SwitchStmt->addCase(CGF.Builder.getInt32(CaseNumber), CaseBB);
2710 CGF.EmitStmt(SubStmt);
2711 CGF.EmitBranch(ExitBB);
2715 auto CaseBB = CGF.createBasicBlock(".omp.sections.case");
2716 CGF.EmitBlock(CaseBB);
2717 SwitchStmt->addCase(CGF.Builder.getInt32(0), CaseBB);
2719 CGF.EmitBranch(ExitBB);
2721 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
2724 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2725 if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
2726 // Emit implicit barrier to synchronize threads and avoid data races on
2727 // initialization of firstprivate variables and post-update of lastprivate
2729 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
2730 CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
2731 /*ForceSimpleCall=*/true);
2733 CGF.EmitOMPPrivateClause(S, LoopScope);
2734 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2735 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2736 (void)LoopScope.Privatize();
2738 // Emit static non-chunked loop.
2739 OpenMPScheduleTy ScheduleKind;
2740 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
2741 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
2742 CGF, S.getLocStart(), ScheduleKind, /*IVSize=*/32,
2743 /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(), LB.getAddress(),
2744 UB.getAddress(), ST.getAddress());
2745 // UB = min(UB, GlobalUB);
2746 auto *UBVal = CGF.EmitLoadOfScalar(UB, S.getLocStart());
2747 auto *MinUBGlobalUB = CGF.Builder.CreateSelect(
2748 CGF.Builder.CreateICmpSLT(UBVal, GlobalUBVal), UBVal, GlobalUBVal);
2749 CGF.EmitStoreOfScalar(MinUBGlobalUB, UB);
2751 CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getLocStart()), IV);
2752 // while (idx <= UB) { BODY; ++idx; }
2753 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
2754 [](CodeGenFunction &) {});
2755 // Tell the runtime we are done.
2756 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
2757 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
2759 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
2760 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
2761 // Emit post-update of the reduction variables if IsLastIter != 0.
2762 emitPostUpdateForReductionClause(
2763 CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
2764 return CGF.Builder.CreateIsNotNull(
2765 CGF.EmitLoadOfScalar(IL, S.getLocStart()));
2768 // Emit final copy of the lastprivate variables if IsLastIter != 0.
2769 if (HasLastprivates)
2770 CGF.EmitOMPLastprivateClauseFinal(
2771 S, /*NoFinals=*/false,
2772 CGF.Builder.CreateIsNotNull(
2773 CGF.EmitLoadOfScalar(IL, S.getLocStart())));
2776 bool HasCancel = false;
2777 if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
2778 HasCancel = OSD->hasCancel();
2779 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
2780 HasCancel = OPSD->hasCancel();
2781 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
2782 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
2784 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
2785 // clause. Otherwise the barrier will be generated by the codegen for the
2787 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
2788 // Emit implicit barrier to synchronize threads and avoid data races on
2789 // initialization of firstprivate variables.
2790 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2795 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
2797 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2800 // Emit an implicit barrier at the end.
2801 if (!S.getSingleClause<OMPNowaitClause>()) {
2802 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
2807 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
2808 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2809 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2811 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2812 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
2816 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
2817 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
2818 llvm::SmallVector<const Expr *, 8> DestExprs;
2819 llvm::SmallVector<const Expr *, 8> SrcExprs;
2820 llvm::SmallVector<const Expr *, 8> AssignmentOps;
2821 // Check if there are any 'copyprivate' clauses associated with this
2822 // 'single' construct.
2823 // Build a list of copyprivate variables along with helper expressions
2824 // (<source>, <destination>, <destination>=<source> expressions)
2825 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
2826 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
2827 DestExprs.append(C->destination_exprs().begin(),
2828 C->destination_exprs().end());
2829 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
2830 AssignmentOps.append(C->assignment_ops().begin(),
2831 C->assignment_ops().end());
2833 // Emit code for 'single' region along with 'copyprivate' clauses
2834 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2836 OMPPrivateScope SingleScope(CGF);
2837 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
2838 CGF.EmitOMPPrivateClause(S, SingleScope);
2839 (void)SingleScope.Privatize();
2840 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2843 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2844 CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
2845 CopyprivateVars, DestExprs,
2846 SrcExprs, AssignmentOps);
2848 // Emit an implicit barrier at the end (to avoid data race on firstprivate
2849 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
2850 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
2851 CGM.getOpenMPRuntime().emitBarrierCall(
2852 *this, S.getLocStart(),
2853 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
2857 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
2858 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2860 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2862 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2863 CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
2866 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
2867 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2869 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
2871 Expr *Hint = nullptr;
2872 if (auto *HintClause = S.getSingleClause<OMPHintClause>())
2873 Hint = HintClause->getHint();
2874 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
2875 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
2876 S.getDirectiveName().getAsString(),
2877 CodeGen, S.getLocStart(), Hint);
2880 void CodeGenFunction::EmitOMPParallelForDirective(
2881 const OMPParallelForDirective &S) {
2882 // Emit directive as a combined directive that consists of two implicit
2883 // directives: 'parallel' with 'for' directive.
2884 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2885 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
2886 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2887 emitDispatchForLoopBounds);
2889 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
2890 emitEmptyBoundParameters);
2893 void CodeGenFunction::EmitOMPParallelForSimdDirective(
2894 const OMPParallelForSimdDirective &S) {
2895 // Emit directive as a combined directive that consists of two implicit
2896 // directives: 'parallel' with 'for' directive.
2897 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2898 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
2899 emitDispatchForLoopBounds);
2901 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
2902 emitEmptyBoundParameters);
2905 void CodeGenFunction::EmitOMPParallelSectionsDirective(
2906 const OMPParallelSectionsDirective &S) {
2907 // Emit directive as a combined directive that consists of two implicit
2908 // directives: 'parallel' with 'sections' directive.
2909 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2910 CGF.EmitSections(S);
2912 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
2913 emitEmptyBoundParameters);
2916 void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
2917 const RegionCodeGenTy &BodyGen,
2918 const TaskGenTy &TaskGen,
2919 OMPTaskDataTy &Data) {
2920 // Emit outlined function for task construct.
2921 auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
2922 auto *I = CS->getCapturedDecl()->param_begin();
2923 auto *PartId = std::next(I);
2924 auto *TaskT = std::next(I, 4);
2925 // Check if the task is final
2926 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
2927 // If the condition constant folds and can be elided, try to avoid emitting
2928 // the condition and the dead arm of the if/else.
2929 auto *Cond = Clause->getCondition();
2931 if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
2932 Data.Final.setInt(CondConstant);
2934 Data.Final.setPointer(EvaluateExprAsBool(Cond));
2936 // By default the task is not final.
2937 Data.Final.setInt(/*IntVal=*/false);
2939 // Check if the task has 'priority' clause.
2940 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
2941 auto *Prio = Clause->getPriority();
2942 Data.Priority.setInt(/*IntVal=*/true);
2943 Data.Priority.setPointer(EmitScalarConversion(
2944 EmitScalarExpr(Prio), Prio->getType(),
2945 getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
2946 Prio->getExprLoc()));
2948 // The first function argument for tasks is a thread id, the second one is a
2949 // part id (0 for tied tasks, >=0 for untied task).
2950 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
2951 // Get list of private variables.
2952 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
2953 auto IRef = C->varlist_begin();
2954 for (auto *IInit : C->private_copies()) {
2955 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2956 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2957 Data.PrivateVars.push_back(*IRef);
2958 Data.PrivateCopies.push_back(IInit);
2963 EmittedAsPrivate.clear();
2964 // Get list of firstprivate variables.
2965 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
2966 auto IRef = C->varlist_begin();
2967 auto IElemInitRef = C->inits().begin();
2968 for (auto *IInit : C->private_copies()) {
2969 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2970 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2971 Data.FirstprivateVars.push_back(*IRef);
2972 Data.FirstprivateCopies.push_back(IInit);
2973 Data.FirstprivateInits.push_back(*IElemInitRef);
2979 // Get list of lastprivate variables (for taskloops).
2980 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
2981 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
2982 auto IRef = C->varlist_begin();
2983 auto ID = C->destination_exprs().begin();
2984 for (auto *IInit : C->private_copies()) {
2985 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
2986 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
2987 Data.LastprivateVars.push_back(*IRef);
2988 Data.LastprivateCopies.push_back(IInit);
2990 LastprivateDstsOrigs.insert(
2991 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
2992 cast<DeclRefExpr>(*IRef)});
2997 // Build list of dependences.
2998 for (const auto *C : S.getClausesOfKind<OMPDependClause>())
2999 for (auto *IRef : C->varlists())
3000 Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
3001 auto &&CodeGen = [&Data, CS, &BodyGen, &LastprivateDstsOrigs](
3002 CodeGenFunction &CGF, PrePostActionTy &Action) {
3003 // Set proper addresses for generated private copies.
3004 OMPPrivateScope Scope(CGF);
3005 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
3006 !Data.LastprivateVars.empty()) {
3007 auto *CopyFn = CGF.Builder.CreateLoad(
3008 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
3009 auto *PrivatesPtr = CGF.Builder.CreateLoad(
3010 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
3012 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3013 llvm::SmallVector<llvm::Value *, 16> CallArgs;
3014 CallArgs.push_back(PrivatesPtr);
3015 for (auto *E : Data.PrivateVars) {
3016 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3017 Address PrivatePtr = CGF.CreateMemTemp(
3018 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
3019 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3020 CallArgs.push_back(PrivatePtr.getPointer());
3022 for (auto *E : Data.FirstprivateVars) {
3023 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3024 Address PrivatePtr =
3025 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3026 ".firstpriv.ptr.addr");
3027 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3028 CallArgs.push_back(PrivatePtr.getPointer());
3030 for (auto *E : Data.LastprivateVars) {
3031 auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3032 Address PrivatePtr =
3033 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3034 ".lastpriv.ptr.addr");
3035 PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
3036 CallArgs.push_back(PrivatePtr.getPointer());
3038 CGF.EmitRuntimeCall(CopyFn, CallArgs);
3039 for (auto &&Pair : LastprivateDstsOrigs) {
3040 auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
3042 const_cast<VarDecl *>(OrigVD),
3043 /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup(
3045 Pair.second->getType(), VK_LValue, Pair.second->getExprLoc());
3046 Scope.addPrivate(Pair.first, [&CGF, &DRE]() {
3047 return CGF.EmitLValue(&DRE).getAddress();
3050 for (auto &&Pair : PrivatePtrs) {
3051 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3052 CGF.getContext().getDeclAlign(Pair.first));
3053 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
3056 (void)Scope.Privatize();
3061 auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3062 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
3063 Data.NumberOfParts);
3064 OMPLexicalScope Scope(*this, S);
3065 TaskGen(*this, OutlinedFn, Data);
3068 void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
3069 // Emit outlined function for task construct.
3070 auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3071 auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
3072 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
3073 const Expr *IfCond = nullptr;
3074 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3075 if (C->getNameModifier() == OMPD_unknown ||
3076 C->getNameModifier() == OMPD_task) {
3077 IfCond = C->getCondition();
3083 // Check if we should emit tied or untied task.
3084 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
3085 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3086 CGF.EmitStmt(CS->getCapturedStmt());
3088 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
3089 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
3090 const OMPTaskDataTy &Data) {
3091 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getLocStart(), S, OutlinedFn,
3092 SharedsTy, CapturedStruct, IfCond,
3095 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
3098 void CodeGenFunction::EmitOMPTaskyieldDirective(
3099 const OMPTaskyieldDirective &S) {
3100 CGM.getOpenMPRuntime().emitTaskyieldCall(*this, S.getLocStart());
3103 void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
3104 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_barrier);
3107 void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
3108 CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getLocStart());
3111 void CodeGenFunction::EmitOMPTaskgroupDirective(
3112 const OMPTaskgroupDirective &S) {
3113 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3115 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3117 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3118 CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
3121 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
3122 CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
3123 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
3124 return llvm::makeArrayRef(FlushClause->varlist_begin(),
3125 FlushClause->varlist_end());
3128 }(), S.getLocStart());
3131 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3132 const CodeGenLoopTy &CodeGenLoop,
3134 // Emit the loop iteration variable.
3135 auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
3136 auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
3137 EmitVarDecl(*IVDecl);
3139 // Emit the iterations count variable.
3140 // If it is not a variable, Sema decided to calculate iterations count on each
3141 // iteration (e.g., it is foldable into a constant).
3142 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
3143 EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
3144 // Emit calculation of the iterations count.
3145 EmitIgnoredExpr(S.getCalcLastIteration());
3148 auto &RT = CGM.getOpenMPRuntime();
3150 bool HasLastprivateClause = false;
3151 // Check pre-condition.
3153 OMPLoopScope PreInitScope(*this, S);
3154 // Skip the entire loop if we don't meet the precondition.
3155 // If the condition constant folds and can be elided, avoid emitting the
3158 llvm::BasicBlock *ContBlock = nullptr;
3159 if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
3163 auto *ThenBlock = createBasicBlock("omp.precond.then");
3164 ContBlock = createBasicBlock("omp.precond.end");
3165 emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock,
3166 getProfileCount(&S));
3167 EmitBlock(ThenBlock);
3168 incrementProfileCounter(&S);
3171 // Emit 'then' code.
3173 // Emit helper vars inits.
3175 LValue LB = EmitOMPHelperVar(
3176 *this, cast<DeclRefExpr>(
3177 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3178 ? S.getCombinedLowerBoundVariable()
3179 : S.getLowerBoundVariable())));
3180 LValue UB = EmitOMPHelperVar(
3181 *this, cast<DeclRefExpr>(
3182 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3183 ? S.getCombinedUpperBoundVariable()
3184 : S.getUpperBoundVariable())));
3186 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
3188 EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
3190 OMPPrivateScope LoopScope(*this);
3191 if (EmitOMPFirstprivateClause(S, LoopScope)) {
3192 // Emit implicit barrier to synchronize threads and avoid data races on
3193 // initialization of firstprivate variables and post-update of
3194 // lastprivate variables.
3195 CGM.getOpenMPRuntime().emitBarrierCall(
3196 *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
3197 /*ForceSimpleCall=*/true);
3199 EmitOMPPrivateClause(S, LoopScope);
3200 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3201 EmitOMPPrivateLoopCounters(S, LoopScope);
3202 (void)LoopScope.Privatize();
3204 // Detect the distribute schedule kind and chunk.
3205 llvm::Value *Chunk = nullptr;
3206 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
3207 if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
3208 ScheduleKind = C->getDistScheduleKind();
3209 if (const auto *Ch = C->getChunkSize()) {
3210 Chunk = EmitScalarExpr(Ch);
3211 Chunk = EmitScalarConversion(Chunk, Ch->getType(),
3212 S.getIterationVariable()->getType(),
3216 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3217 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3219 // OpenMP [2.10.8, distribute Construct, Description]
3220 // If dist_schedule is specified, kind must be static. If specified,
3221 // iterations are divided into chunks of size chunk_size, chunks are
3222 // assigned to the teams of the league in a round-robin fashion in the
3223 // order of the team number. When no chunk_size is specified, the
3224 // iteration space is divided into chunks that are approximately equal
3225 // in size, and at most one chunk is distributed to each team of the
3226 // league. The size of the chunks is unspecified in this case.
3227 if (RT.isStaticNonchunked(ScheduleKind,
3228 /* Chunked */ Chunk != nullptr)) {
3229 RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
3230 IVSize, IVSigned, /* Ordered = */ false,
3231 IL.getAddress(), LB.getAddress(),
3232 UB.getAddress(), ST.getAddress());
3234 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3235 // UB = min(UB, GlobalUB);
3236 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3237 ? S.getCombinedEnsureUpperBound()
3238 : S.getEnsureUpperBound());
3240 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3241 ? S.getCombinedInit()
3244 Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3245 ? S.getCombinedCond()
3248 // for distribute alone, codegen
3249 // while (idx <= UB) { BODY; ++idx; }
3250 // when combined with 'for' (e.g. as in 'distribute parallel for')
3251 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
3252 EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
3253 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
3254 CodeGenLoop(CGF, S, LoopExit);
3256 [](CodeGenFunction &) {});
3257 EmitBlock(LoopExit.getBlock());
3258 // Tell the runtime we are done.
3259 RT.emitForStaticFinish(*this, S.getLocStart());
3261 // Emit the outer loop, which requests its work chunk [LB..UB] from
3262 // runtime and runs the inner loop to process it.
3263 const OMPLoopArguments LoopArguments = {
3264 LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
3266 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
3270 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3271 if (HasLastprivateClause)
3272 EmitOMPLastprivateClauseFinal(
3273 S, /*NoFinals=*/false,
3274 Builder.CreateIsNotNull(
3275 EmitLoadOfScalar(IL, S.getLocStart())));
3278 // We're now done with the loop, so jump to the continuation block.
3280 EmitBranch(ContBlock);
3281 EmitBlock(ContBlock, true);
3286 void CodeGenFunction::EmitOMPDistributeDirective(
3287 const OMPDistributeDirective &S) {
3288 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3290 CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
3292 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3293 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
3297 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
3298 const CapturedStmt *S) {
3299 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
3300 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
3301 CGF.CapturedStmtInfo = &CapStmtInfo;
3302 auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
3303 Fn->addFnAttr(llvm::Attribute::NoInline);
3307 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
3308 if (!S.getAssociatedStmt()) {
3309 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
3310 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
3313 auto *C = S.getSingleClause<OMPSIMDClause>();
3314 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
3315 PrePostActionTy &Action) {
3317 auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
3318 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3319 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3320 auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
3321 CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
3325 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3328 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3329 CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
3332 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
3333 QualType SrcType, QualType DestType,
3334 SourceLocation Loc) {
3335 assert(CGF.hasScalarEvaluationKind(DestType) &&
3336 "DestType must have scalar evaluation kind.");
3337 assert(!Val.isAggregate() && "Must be a scalar or complex.");
3338 return Val.isScalar()
3339 ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
3341 : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
3345 static CodeGenFunction::ComplexPairTy
3346 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
3347 QualType DestType, SourceLocation Loc) {
3348 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
3349 "DestType must have complex evaluation kind.");
3350 CodeGenFunction::ComplexPairTy ComplexVal;
3351 if (Val.isScalar()) {
3352 // Convert the input element to the element type of the complex.
3353 auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3354 auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
3355 DestElementType, Loc);
3356 ComplexVal = CodeGenFunction::ComplexPairTy(
3357 ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
3359 assert(Val.isComplex() && "Must be a scalar or complex.");
3360 auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
3361 auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
3362 ComplexVal.first = CGF.EmitScalarConversion(
3363 Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
3364 ComplexVal.second = CGF.EmitScalarConversion(
3365 Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
3370 static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
3371 LValue LVal, RValue RVal) {
3372 if (LVal.isGlobalReg()) {
3373 CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
3375 CGF.EmitAtomicStore(RVal, LVal,
3376 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3377 : llvm::AtomicOrdering::Monotonic,
3378 LVal.isVolatile(), /*IsInit=*/false);
3382 void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
3383 QualType RValTy, SourceLocation Loc) {
3384 switch (getEvaluationKind(LVal.getType())) {
3386 EmitStoreThroughLValue(RValue::get(convertToScalarValue(
3387 *this, RVal, RValTy, LVal.getType(), Loc)),
3392 convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal,
3396 llvm_unreachable("Must be a scalar or complex.");
3400 static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
3401 const Expr *X, const Expr *V,
3402 SourceLocation Loc) {
3404 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
3405 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
3406 LValue XLValue = CGF.EmitLValue(X);
3407 LValue VLValue = CGF.EmitLValue(V);
3408 RValue Res = XLValue.isGlobalReg()
3409 ? CGF.EmitLoadOfLValue(XLValue, Loc)
3410 : CGF.EmitAtomicLoad(
3412 IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3413 : llvm::AtomicOrdering::Monotonic,
3414 XLValue.isVolatile());
3415 // OpenMP, 2.12.6, atomic Construct
3416 // Any atomic construct with a seq_cst clause forces the atomically
3417 // performed operation to include an implicit flush operation without a
3420 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3421 CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
3424 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
3425 const Expr *X, const Expr *E,
3426 SourceLocation Loc) {
3428 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
3429 emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
3430 // OpenMP, 2.12.6, atomic Construct
3431 // Any atomic construct with a seq_cst clause forces the atomically
3432 // performed operation to include an implicit flush operation without a
3435 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3438 static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
3440 BinaryOperatorKind BO,
3441 llvm::AtomicOrdering AO,
3442 bool IsXLHSInRHSPart) {
3443 auto &Context = CGF.CGM.getContext();
3444 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
3445 // expression is simple and atomic is allowed for the given type for the
3447 if (BO == BO_Comma || !Update.isScalar() ||
3448 !Update.getScalarVal()->getType()->isIntegerTy() ||
3449 !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
3450 (Update.getScalarVal()->getType() !=
3451 X.getAddress().getElementType())) ||
3452 !X.getAddress().getElementType()->isIntegerTy() ||
3453 !Context.getTargetInfo().hasBuiltinAtomic(
3454 Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
3455 return std::make_pair(false, RValue::get(nullptr));
3457 llvm::AtomicRMWInst::BinOp RMWOp;
3460 RMWOp = llvm::AtomicRMWInst::Add;
3463 if (!IsXLHSInRHSPart)
3464 return std::make_pair(false, RValue::get(nullptr));
3465 RMWOp = llvm::AtomicRMWInst::Sub;
3468 RMWOp = llvm::AtomicRMWInst::And;
3471 RMWOp = llvm::AtomicRMWInst::Or;
3474 RMWOp = llvm::AtomicRMWInst::Xor;
3477 RMWOp = X.getType()->hasSignedIntegerRepresentation()
3478 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
3479 : llvm::AtomicRMWInst::Max)
3480 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
3481 : llvm::AtomicRMWInst::UMax);
3484 RMWOp = X.getType()->hasSignedIntegerRepresentation()
3485 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
3486 : llvm::AtomicRMWInst::Min)
3487 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
3488 : llvm::AtomicRMWInst::UMin);
3491 RMWOp = llvm::AtomicRMWInst::Xchg;
3500 return std::make_pair(false, RValue::get(nullptr));
3518 llvm_unreachable("Unsupported atomic update operation");
3520 auto *UpdateVal = Update.getScalarVal();
3521 if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
3522 UpdateVal = CGF.Builder.CreateIntCast(
3523 IC, X.getAddress().getElementType(),
3524 X.getType()->hasSignedIntegerRepresentation());
3526 auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
3527 return std::make_pair(true, RValue::get(Res));
3530 std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
3531 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
3532 llvm::AtomicOrdering AO, SourceLocation Loc,
3533 const llvm::function_ref<RValue(RValue)> &CommonGen) {
3534 // Update expressions are allowed to have the following forms:
3535 // x binop= expr; -> xrval + expr;
3536 // x++, ++x -> xrval + 1;
3537 // x--, --x -> xrval - 1;
3538 // x = x binop expr; -> xrval binop expr
3539 // x = expr Op x; - > expr binop xrval;
3540 auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart);
3542 if (X.isGlobalReg()) {
3543 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
3545 EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
3547 // Perform compare-and-swap procedure.
3548 EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
3554 static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
3555 const Expr *X, const Expr *E,
3556 const Expr *UE, bool IsXLHSInRHSPart,
3557 SourceLocation Loc) {
3558 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3559 "Update expr in 'atomic update' must be a binary operator.");
3560 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3561 // Update expressions are allowed to have the following forms:
3562 // x binop= expr; -> xrval + expr;
3563 // x++, ++x -> xrval + 1;
3564 // x--, --x -> xrval - 1;
3565 // x = x binop expr; -> xrval binop expr
3566 // x = expr Op x; - > expr binop xrval;
3567 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
3568 LValue XLValue = CGF.EmitLValue(X);
3569 RValue ExprRValue = CGF.EmitAnyExpr(E);
3570 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3571 : llvm::AtomicOrdering::Monotonic;
3572 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3573 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3574 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3575 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3577 [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
3578 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3579 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3580 return CGF.EmitAnyExpr(UE);
3582 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
3583 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3584 // OpenMP, 2.12.6, atomic Construct
3585 // Any atomic construct with a seq_cst clause forces the atomically
3586 // performed operation to include an implicit flush operation without a
3589 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3592 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
3593 QualType SourceType, QualType ResType,
3594 SourceLocation Loc) {
3595 switch (CGF.getEvaluationKind(ResType)) {
3598 convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
3600 auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
3601 return RValue::getComplex(Res.first, Res.second);
3606 llvm_unreachable("Must be a scalar or complex.");
3609 static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
3610 bool IsPostfixUpdate, const Expr *V,
3611 const Expr *X, const Expr *E,
3612 const Expr *UE, bool IsXLHSInRHSPart,
3613 SourceLocation Loc) {
3614 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
3615 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
3617 LValue VLValue = CGF.EmitLValue(V);
3618 LValue XLValue = CGF.EmitLValue(X);
3619 RValue ExprRValue = CGF.EmitAnyExpr(E);
3620 auto AO = IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
3621 : llvm::AtomicOrdering::Monotonic;
3622 QualType NewVValType;
3624 // 'x' is updated with some additional value.
3625 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
3626 "Update expr in 'atomic capture' must be a binary operator.");
3627 auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
3628 // Update expressions are allowed to have the following forms:
3629 // x binop= expr; -> xrval + expr;
3630 // x++, ++x -> xrval + 1;
3631 // x--, --x -> xrval - 1;
3632 // x = x binop expr; -> xrval binop expr
3633 // x = expr Op x; - > expr binop xrval;
3634 auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
3635 auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
3636 auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
3637 NewVValType = XRValExpr->getType();
3638 auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
3639 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
3640 IsPostfixUpdate](RValue XRValue) -> RValue {
3641 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3642 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
3643 RValue Res = CGF.EmitAnyExpr(UE);
3644 NewVVal = IsPostfixUpdate ? XRValue : Res;
3647 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3648 XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
3650 // 'atomicrmw' instruction was generated.
3651 if (IsPostfixUpdate) {
3652 // Use old value from 'atomicrmw'.
3653 NewVVal = Res.second;
3655 // 'atomicrmw' does not provide new value, so evaluate it using old
3657 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
3658 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
3659 NewVVal = CGF.EmitAnyExpr(UE);
3663 // 'x' is simply rewritten with some 'expr'.
3664 NewVValType = X->getType().getNonReferenceType();
3665 ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
3666 X->getType().getNonReferenceType(), Loc);
3667 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
3671 // Try to perform atomicrmw xchg, otherwise simple exchange.
3672 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
3673 XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
3676 // 'atomicrmw' instruction was generated.
3677 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
3680 // Emit post-update store to 'v' of old/new 'x' value.
3681 CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
3682 // OpenMP, 2.12.6, atomic Construct
3683 // Any atomic construct with a seq_cst clause forces the atomically
3684 // performed operation to include an implicit flush operation without a
3687 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
3690 static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
3691 bool IsSeqCst, bool IsPostfixUpdate,
3692 const Expr *X, const Expr *V, const Expr *E,
3693 const Expr *UE, bool IsXLHSInRHSPart,
3694 SourceLocation Loc) {
3697 EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
3700 EmitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
3704 EmitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
3707 EmitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
3708 IsXLHSInRHSPart, Loc);
3712 case OMPC_num_threads:
3714 case OMPC_firstprivate:
3715 case OMPC_lastprivate:
3716 case OMPC_reduction:
3726 case OMPC_copyprivate:
3728 case OMPC_proc_bind:
3733 case OMPC_threadprivate:
3735 case OMPC_mergeable:
3740 case OMPC_num_teams:
3741 case OMPC_thread_limit:
3743 case OMPC_grainsize:
3745 case OMPC_num_tasks:
3747 case OMPC_dist_schedule:
3748 case OMPC_defaultmap:
3752 case OMPC_use_device_ptr:
3753 case OMPC_is_device_ptr:
3754 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
3758 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
3759 bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
3760 OpenMPClauseKind Kind = OMPC_unknown;
3761 for (auto *C : S.clauses()) {
3762 // Find first clause (skip seq_cst clause, if it is first).
3763 if (C->getClauseKind() != OMPC_seq_cst) {
3764 Kind = C->getClauseKind();
3770 S.getAssociatedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
3771 if (const auto *EWC = dyn_cast<ExprWithCleanups>(CS)) {
3772 enterFullExpression(EWC);
3774 // Processing for statements under 'atomic capture'.
3775 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
3776 for (const auto *C : Compound->body()) {
3777 if (const auto *EWC = dyn_cast<ExprWithCleanups>(C)) {
3778 enterFullExpression(EWC);
3783 auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
3784 PrePostActionTy &) {
3785 CGF.EmitStopPoint(CS);
3786 EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
3787 S.getV(), S.getExpr(), S.getUpdateExpr(),
3788 S.isXLHSInRHSPart(), S.getLocStart());
3790 OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
3791 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
3794 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
3795 const OMPExecutableDirective &S,
3796 const RegionCodeGenTy &CodeGen) {
3797 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
3798 CodeGenModule &CGM = CGF.CGM;
3799 const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
3801 llvm::Function *Fn = nullptr;
3802 llvm::Constant *FnID = nullptr;
3804 const Expr *IfCond = nullptr;
3805 // Check for the at most one if clause associated with the target region.
3806 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3807 if (C->getNameModifier() == OMPD_unknown ||
3808 C->getNameModifier() == OMPD_target) {
3809 IfCond = C->getCondition();
3814 // Check if we have any device clause associated with the directive.
3815 const Expr *Device = nullptr;
3816 if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
3817 Device = C->getDevice();
3820 // Check if we have an if clause whose conditional always evaluates to false
3821 // or if we do not have any targets specified. If so the target region is not
3822 // an offload entry point.
3823 bool IsOffloadEntry = true;
3826 if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
3827 IsOffloadEntry = false;
3829 if (CGM.getLangOpts().OMPTargetTriples.empty())
3830 IsOffloadEntry = false;
3832 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
3833 StringRef ParentName;
3834 // In case we have Ctors/Dtors we use the complete type variant to produce
3835 // the mangling of the device outlined kernel.
3836 if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
3837 ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
3838 else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
3839 ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
3842 CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
3844 // Emit target region as a standalone region.
3845 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
3846 IsOffloadEntry, CodeGen);
3847 OMPLexicalScope Scope(CGF, S);
3848 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3849 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
3850 CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
3854 static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
3855 PrePostActionTy &Action) {
3856 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
3857 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3858 CGF.EmitOMPPrivateClause(S, PrivateScope);
3859 (void)PrivateScope.Privatize();
3862 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3865 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
3866 StringRef ParentName,
3867 const OMPTargetDirective &S) {
3868 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3869 emitTargetRegion(CGF, S, Action);
3872 llvm::Constant *Addr;
3873 // Emit target region as a standalone region.
3874 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3875 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3876 assert(Fn && Addr && "Target device function emission failed.");
3879 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
3880 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3881 emitTargetRegion(CGF, S, Action);
3883 emitCommonOMPTargetDirective(*this, S, CodeGen);
3886 static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
3887 const OMPExecutableDirective &S,
3888 OpenMPDirectiveKind InnermostKind,
3889 const RegionCodeGenTy &CodeGen) {
3890 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
3891 auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
3892 S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
3894 const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
3895 const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
3897 Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
3898 Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
3900 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
3904 OMPTeamsScope Scope(CGF, S);
3905 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
3906 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
3907 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
3911 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
3912 // Emit teams region as a standalone region.
3913 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3914 OMPPrivateScope PrivateScope(CGF);
3915 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
3916 CGF.EmitOMPPrivateClause(S, PrivateScope);
3917 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
3918 (void)PrivateScope.Privatize();
3919 CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
3920 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
3922 emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
3923 emitPostUpdateForReductionClause(
3924 *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
3927 static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
3928 const OMPTargetTeamsDirective &S) {
3929 auto *CS = S.getCapturedStmt(OMPD_teams);
3931 auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
3932 // TODO: Add support for clauses.
3933 CGF.EmitStmt(CS->getCapturedStmt());
3935 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
3938 void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
3939 CodeGenModule &CGM, StringRef ParentName,
3940 const OMPTargetTeamsDirective &S) {
3941 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3942 emitTargetTeamsRegion(CGF, Action, S);
3945 llvm::Constant *Addr;
3946 // Emit target region as a standalone region.
3947 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3948 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3949 assert(Fn && Addr && "Target device function emission failed.");
3952 void CodeGenFunction::EmitOMPTargetTeamsDirective(
3953 const OMPTargetTeamsDirective &S) {
3954 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3955 emitTargetTeamsRegion(CGF, Action, S);
3957 emitCommonOMPTargetDirective(*this, S, CodeGen);
3960 void CodeGenFunction::EmitOMPCancellationPointDirective(
3961 const OMPCancellationPointDirective &S) {
3962 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
3963 S.getCancelRegion());
3966 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
3967 const Expr *IfCond = nullptr;
3968 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
3969 if (C->getNameModifier() == OMPD_unknown ||
3970 C->getNameModifier() == OMPD_cancel) {
3971 IfCond = C->getCondition();
3975 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
3976 S.getCancelRegion());
3979 CodeGenFunction::JumpDest
3980 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
3981 if (Kind == OMPD_parallel || Kind == OMPD_task ||
3982 Kind == OMPD_target_parallel)
3984 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
3985 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
3986 Kind == OMPD_distribute_parallel_for ||
3987 Kind == OMPD_target_parallel_for);
3988 return OMPCancelStack.getExitBlock();
3991 void CodeGenFunction::EmitOMPUseDevicePtrClause(
3992 const OMPClause &NC, OMPPrivateScope &PrivateScope,
3993 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
3994 const auto &C = cast<OMPUseDevicePtrClause>(NC);
3995 auto OrigVarIt = C.varlist_begin();
3996 auto InitIt = C.inits().begin();
3997 for (auto PvtVarIt : C.private_copies()) {
3998 auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
3999 auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
4000 auto *PvtVD = cast<VarDecl>(cast<DeclRefExpr>(PvtVarIt)->getDecl());
4002 // In order to identify the right initializer we need to match the
4003 // declaration used by the mapping logic. In some cases we may get
4004 // OMPCapturedExprDecl that refers to the original declaration.
4005 const ValueDecl *MatchingVD = OrigVD;
4006 if (auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
4007 // OMPCapturedExprDecl are used to privative fields of the current
4009 auto *ME = cast<MemberExpr>(OED->getInit());
4010 assert(isa<CXXThisExpr>(ME->getBase()) &&
4011 "Base should be the current struct!");
4012 MatchingVD = ME->getMemberDecl();
4015 // If we don't have information about the current list item, move on to
4017 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
4018 if (InitAddrIt == CaptureDeviceAddrMap.end())
4021 bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
4022 // Initialize the temporary initialization variable with the address we
4023 // get from the runtime library. We have to cast the source address
4024 // because it is always a void *. References are materialized in the
4025 // privatization scope, so the initialization here disregards the fact
4026 // the original variable is a reference.
4028 getContext().getPointerType(OrigVD->getType().getNonReferenceType());
4029 llvm::Type *AddrTy = ConvertTypeForMem(AddrQTy);
4030 Address InitAddr = Builder.CreateBitCast(InitAddrIt->second, AddrTy);
4031 setAddrOfLocalVar(InitVD, InitAddr);
4033 // Emit private declaration, it will be initialized by the value we
4034 // declaration we just added to the local declarations map.
4037 // The initialization variables reached its purpose in the emission
4038 // ofthe previous declaration, so we don't need it anymore.
4039 LocalDeclMap.erase(InitVD);
4041 // Return the address of the private variable.
4042 return GetAddrOfLocalVar(PvtVD);
4044 assert(IsRegistered && "firstprivate var already registered as private");
4045 // Silence the warning about unused variable.
4053 // Generate the instructions for '#pragma omp target data' directive.
4054 void CodeGenFunction::EmitOMPTargetDataDirective(
4055 const OMPTargetDataDirective &S) {
4056 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true);
4058 // Create a pre/post action to signal the privatization of the device pointer.
4059 // This action can be replaced by the OpenMP runtime code generation to
4060 // deactivate privatization.
4061 bool PrivatizeDevicePointers = false;
4062 class DevicePointerPrivActionTy : public PrePostActionTy {
4063 bool &PrivatizeDevicePointers;
4066 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
4067 : PrePostActionTy(), PrivatizeDevicePointers(PrivatizeDevicePointers) {}
4068 void Enter(CodeGenFunction &CGF) override {
4069 PrivatizeDevicePointers = true;
4072 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
4074 auto &&CodeGen = [&S, &Info, &PrivatizeDevicePointers](
4075 CodeGenFunction &CGF, PrePostActionTy &Action) {
4076 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
4078 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
4081 // Codegen that selects wheather to generate the privatization code or not.
4082 auto &&PrivCodeGen = [&S, &Info, &PrivatizeDevicePointers,
4083 &InnermostCodeGen](CodeGenFunction &CGF,
4084 PrePostActionTy &Action) {
4085 RegionCodeGenTy RCG(InnermostCodeGen);
4086 PrivatizeDevicePointers = false;
4088 // Call the pre-action to change the status of PrivatizeDevicePointers if
4092 if (PrivatizeDevicePointers) {
4093 OMPPrivateScope PrivateScope(CGF);
4094 // Emit all instances of the use_device_ptr clause.
4095 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
4096 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
4097 Info.CaptureDeviceAddrMap);
4098 (void)PrivateScope.Privatize();
4104 // Forward the provided action to the privatization codegen.
4105 RegionCodeGenTy PrivRCG(PrivCodeGen);
4106 PrivRCG.setAction(Action);
4108 // Notwithstanding the body of the region is emitted as inlined directive,
4109 // we don't use an inline scope as changes in the references inside the
4110 // region are expected to be visible outside, so we do not privative them.
4111 OMPLexicalScope Scope(CGF, S);
4112 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
4116 RegionCodeGenTy RCG(CodeGen);
4118 // If we don't have target devices, don't bother emitting the data mapping
4120 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
4125 // Check if we have any if clause associated with the directive.
4126 const Expr *IfCond = nullptr;
4127 if (auto *C = S.getSingleClause<OMPIfClause>())
4128 IfCond = C->getCondition();
4130 // Check if we have any device clause associated with the directive.
4131 const Expr *Device = nullptr;
4132 if (auto *C = S.getSingleClause<OMPDeviceClause>())
4133 Device = C->getDevice();
4135 // Set the action to signal privatization of device pointers.
4136 RCG.setAction(PrivAction);
4138 // Emit region code.
4139 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
4143 void CodeGenFunction::EmitOMPTargetEnterDataDirective(
4144 const OMPTargetEnterDataDirective &S) {
4145 // If we don't have target devices, don't bother emitting the data mapping
4147 if (CGM.getLangOpts().OMPTargetTriples.empty())
4150 // Check if we have any if clause associated with the directive.
4151 const Expr *IfCond = nullptr;
4152 if (auto *C = S.getSingleClause<OMPIfClause>())
4153 IfCond = C->getCondition();
4155 // Check if we have any device clause associated with the directive.
4156 const Expr *Device = nullptr;
4157 if (auto *C = S.getSingleClause<OMPDeviceClause>())
4158 Device = C->getDevice();
4160 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4163 void CodeGenFunction::EmitOMPTargetExitDataDirective(
4164 const OMPTargetExitDataDirective &S) {
4165 // If we don't have target devices, don't bother emitting the data mapping
4167 if (CGM.getLangOpts().OMPTargetTriples.empty())
4170 // Check if we have any if clause associated with the directive.
4171 const Expr *IfCond = nullptr;
4172 if (auto *C = S.getSingleClause<OMPIfClause>())
4173 IfCond = C->getCondition();
4175 // Check if we have any device clause associated with the directive.
4176 const Expr *Device = nullptr;
4177 if (auto *C = S.getSingleClause<OMPDeviceClause>())
4178 Device = C->getDevice();
4180 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
4183 static void emitTargetParallelRegion(CodeGenFunction &CGF,
4184 const OMPTargetParallelDirective &S,
4185 PrePostActionTy &Action) {
4186 // Get the captured statement associated with the 'parallel' region.
4187 auto *CS = S.getCapturedStmt(OMPD_parallel);
4189 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
4190 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
4191 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4192 CGF.EmitOMPPrivateClause(S, PrivateScope);
4193 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4194 (void)PrivateScope.Privatize();
4195 // TODO: Add support for clauses.
4196 CGF.EmitStmt(CS->getCapturedStmt());
4197 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4199 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
4200 emitEmptyBoundParameters);
4201 emitPostUpdateForReductionClause(
4202 CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
4205 void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
4206 CodeGenModule &CGM, StringRef ParentName,
4207 const OMPTargetParallelDirective &S) {
4208 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4209 emitTargetParallelRegion(CGF, S, Action);
4212 llvm::Constant *Addr;
4213 // Emit target region as a standalone region.
4214 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
4215 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
4216 assert(Fn && Addr && "Target device function emission failed.");
4219 void CodeGenFunction::EmitOMPTargetParallelDirective(
4220 const OMPTargetParallelDirective &S) {
4221 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4222 emitTargetParallelRegion(CGF, S, Action);
4224 emitCommonOMPTargetDirective(*this, S, CodeGen);
4227 void CodeGenFunction::EmitOMPTargetParallelForDirective(
4228 const OMPTargetParallelForDirective &S) {
4229 // TODO: codegen for target parallel for.
4232 /// Emit a helper variable and return corresponding lvalue.
4233 static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
4234 const ImplicitParamDecl *PVD,
4235 CodeGenFunction::OMPPrivateScope &Privates) {
4236 auto *VDecl = cast<VarDecl>(Helper->getDecl());
4237 Privates.addPrivate(
4238 VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); });
4241 void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
4242 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
4243 // Emit outlined function for task construct.
4244 auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
4245 auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
4246 auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
4247 const Expr *IfCond = nullptr;
4248 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
4249 if (C->getNameModifier() == OMPD_unknown ||
4250 C->getNameModifier() == OMPD_taskloop) {
4251 IfCond = C->getCondition();
4257 // Check if taskloop must be emitted without taskgroup.
4258 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
4259 // TODO: Check if we should emit tied or untied task.
4261 // Set scheduling for taskloop
4262 if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
4264 Data.Schedule.setInt(/*IntVal=*/false);
4265 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
4266 } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
4268 Data.Schedule.setInt(/*IntVal=*/true);
4269 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
4272 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
4274 // for (IV in 0..LastIteration) BODY;
4275 // <Final counter/linear vars updates>;
4279 // Emit: if (PreCond) - begin.
4280 // If the condition constant folds and can be elided, avoid emitting the
4283 llvm::BasicBlock *ContBlock = nullptr;
4284 OMPLoopScope PreInitScope(CGF, S);
4285 if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) {
4289 auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then");
4290 ContBlock = CGF.createBasicBlock("taskloop.if.end");
4291 emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock,
4292 CGF.getProfileCount(&S));
4293 CGF.EmitBlock(ThenBlock);
4294 CGF.incrementProfileCounter(&S);
4297 if (isOpenMPSimdDirective(S.getDirectiveKind()))
4298 CGF.EmitOMPSimdInit(S);
4300 OMPPrivateScope LoopScope(CGF);
4301 // Emit helper vars inits.
4302 enum { LowerBound = 5, UpperBound, Stride, LastIter };
4303 auto *I = CS->getCapturedDecl()->param_begin();
4304 auto *LBP = std::next(I, LowerBound);
4305 auto *UBP = std::next(I, UpperBound);
4306 auto *STP = std::next(I, Stride);
4307 auto *LIP = std::next(I, LastIter);
4308 mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP,
4310 mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP,
4312 mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope);
4313 mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
4315 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
4316 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
4317 (void)LoopScope.Privatize();
4318 // Emit the loop iteration variable.
4319 const Expr *IVExpr = S.getIterationVariable();
4320 const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl());
4321 CGF.EmitVarDecl(*IVDecl);
4322 CGF.EmitIgnoredExpr(S.getInit());
4324 // Emit the iterations count variable.
4325 // If it is not a variable, Sema decided to calculate iterations count on
4326 // each iteration (e.g., it is foldable into a constant).
4327 if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
4328 CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
4329 // Emit calculation of the iterations count.
4330 CGF.EmitIgnoredExpr(S.getCalcLastIteration());
4333 CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
4335 [&S](CodeGenFunction &CGF) {
4336 CGF.EmitOMPLoopBody(S, JumpDest());
4337 CGF.EmitStopPoint(&S);
4339 [](CodeGenFunction &) {});
4340 // Emit: if (PreCond) - end.
4342 CGF.EmitBranch(ContBlock);
4343 CGF.EmitBlock(ContBlock, true);
4345 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4346 if (HasLastprivateClause) {
4347 CGF.EmitOMPLastprivateClauseFinal(
4348 S, isOpenMPSimdDirective(S.getDirectiveKind()),
4349 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
4350 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
4351 (*LIP)->getType(), S.getLocStart())));
4354 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
4355 IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
4356 const OMPTaskDataTy &Data) {
4357 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
4358 OMPLoopScope PreInitScope(CGF, S);
4359 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getLocStart(), S,
4360 OutlinedFn, SharedsTy,
4361 CapturedStruct, IfCond, Data);
4363 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
4366 EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
4369 void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
4370 EmitOMPTaskLoopBasedDirective(S);
4373 void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
4374 const OMPTaskLoopSimdDirective &S) {
4375 EmitOMPTaskLoopBasedDirective(S);
4378 // Generate the instructions for '#pragma omp target update' directive.
4379 void CodeGenFunction::EmitOMPTargetUpdateDirective(
4380 const OMPTargetUpdateDirective &S) {
4381 // If we don't have target devices, don't bother emitting the data mapping
4383 if (CGM.getLangOpts().OMPTargetTriples.empty())
4386 // Check if we have any if clause associated with the directive.
4387 const Expr *IfCond = nullptr;
4388 if (auto *C = S.getSingleClause<OMPIfClause>())
4389 IfCond = C->getCondition();
4391 // Check if we have any device clause associated with the directive.
4392 const Expr *Device = nullptr;
4393 if (auto *C = S.getSingleClause<OMPDeviceClause>())
4394 Device = C->getDevice();
4396 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);