summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp1469
1 files changed, 856 insertions, 613 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 39e1cdf..6135cf3 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -26,7 +26,7 @@ using namespace CodeGen;
namespace {
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
/// for captured expressions.
-class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
+class OMPLexicalScope : public CodeGenFunction::LexicalScope {
void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
for (const auto *C : S.clauses()) {
if (auto *CPI = OMPClauseWithPreInit::get(C)) {
@@ -54,10 +54,11 @@ class OMPLexicalScope final : public CodeGenFunction::LexicalScope {
public:
OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S,
- bool AsInlined = false)
+ bool AsInlined = false, bool EmitPreInitStmt = true)
: CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
InlinedShareds(CGF) {
- emitPreInitStmt(CGF, S);
+ if (EmitPreInitStmt)
+ emitPreInitStmt(CGF, S);
if (AsInlined) {
if (S.hasAssociatedStmt()) {
auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
@@ -81,6 +82,39 @@ public:
}
};
+/// Lexical scope for OpenMP parallel construct, that handles correct codegen
+/// for captured expressions.
+class OMPParallelScope final : public OMPLexicalScope {
+ bool EmitPreInitStmt(const OMPExecutableDirective &S) {
+ OpenMPDirectiveKind Kind = S.getDirectiveKind();
+ return !(isOpenMPTargetExecutionDirective(Kind) ||
+ isOpenMPLoopBoundSharingDirective(Kind)) &&
+ isOpenMPParallelDirective(Kind);
+ }
+
+public:
+ OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : OMPLexicalScope(CGF, S,
+ /*AsInlined=*/false,
+ /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+};
+
+/// Lexical scope for OpenMP teams construct, that handles correct codegen
+/// for captured expressions.
+class OMPTeamsScope final : public OMPLexicalScope {
+ bool EmitPreInitStmt(const OMPExecutableDirective &S) {
+ OpenMPDirectiveKind Kind = S.getDirectiveKind();
+ return !isOpenMPTargetExecutionDirective(Kind) &&
+ isOpenMPTeamsDirective(Kind);
+ }
+
+public:
+ OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : OMPLexicalScope(CGF, S,
+ /*AsInlined=*/false,
+ /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+};
+
/// Private scope for OpenMP loop-based directives, that supports capturing
/// of used expression from loop statement.
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
@@ -194,21 +228,58 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
return TmpAddr;
}
-llvm::Function *
-CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
- assert(
- CapturedStmtInfo &&
- "CapturedStmtInfo should be set when generating the captured function");
- const CapturedDecl *CD = S.getCapturedDecl();
- const RecordDecl *RD = S.getCapturedRecordDecl();
+static QualType getCanonicalParamType(ASTContext &C, QualType T) {
+ if (T->isLValueReferenceType()) {
+ return C.getLValueReferenceType(
+ getCanonicalParamType(C, T.getNonReferenceType()),
+ /*SpelledAsLValue=*/false);
+ }
+ if (T->isPointerType())
+ return C.getPointerType(getCanonicalParamType(C, T->getPointeeType()));
+ return C.getCanonicalParamType(T);
+}
+
+namespace {
+ /// Contains required data for proper outlined function codegen.
+ struct FunctionOptions {
+ /// Captured statement for which the function is generated.
+ const CapturedStmt *S = nullptr;
+ /// true if cast to/from UIntPtr is required for variables captured by
+ /// value.
+ bool UIntPtrCastRequired = true;
+ /// true if only casted argumefnts must be registered as local args or VLA
+ /// sizes.
+ bool RegisterCastedArgsOnly = false;
+ /// Name of the generated function.
+ StringRef FunctionName;
+ explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
+ bool RegisterCastedArgsOnly,
+ StringRef FunctionName)
+ : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
+ RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
+ FunctionName(FunctionName) {}
+ };
+}
+
+static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue(
+ CodeGenFunction &CGF, FunctionArgList &Args,
+ llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>>
+ &LocalAddrs,
+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
+ &VLASizes,
+ llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
+ const CapturedDecl *CD = FO.S->getCapturedDecl();
+ const RecordDecl *RD = FO.S->getCapturedRecordDecl();
assert(CD->hasBody() && "missing CapturedDecl body");
+ CXXThisValue = nullptr;
// Build the argument list.
+ CodeGenModule &CGM = CGF.CGM;
ASTContext &Ctx = CGM.getContext();
- FunctionArgList Args;
+ bool HasUIntPtrArgs = false;
Args.append(CD->param_begin(),
std::next(CD->param_begin(), CD->getContextParamPosition()));
- auto I = S.captures().begin();
+ auto I = FO.S->captures().begin();
for (auto *FD : RD->fields()) {
QualType ArgType = FD->getType();
IdentifierInfo *II = nullptr;
@@ -220,29 +291,26 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
// deal with pointers. We can pass in the same way the VLA type sizes to the
// outlined function.
if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
- I->capturesVariableArrayType())
- ArgType = Ctx.getUIntPtrType();
+ I->capturesVariableArrayType()) {
+ HasUIntPtrArgs = true;
+ if (FO.UIntPtrCastRequired)
+ ArgType = Ctx.getUIntPtrType();
+ }
if (I->capturesVariable() || I->capturesVariableByCopy()) {
CapVar = I->getCapturedVar();
II = CapVar->getIdentifier();
} else if (I->capturesThis())
- II = &getContext().Idents.get("this");
+ II = &Ctx.Idents.get("this");
else {
assert(I->capturesVariableArrayType());
- II = &getContext().Idents.get("vla");
- }
- if (ArgType->isVariablyModifiedType()) {
- bool IsReference = ArgType->isLValueReferenceType();
- ArgType =
- getContext().getCanonicalParamType(ArgType.getNonReferenceType());
- if (IsReference && !ArgType->isPointerType()) {
- ArgType = getContext().getLValueReferenceType(
- ArgType, /*SpelledAsLValue=*/false);
- }
+ II = &Ctx.Idents.get("vla");
}
- Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
- FD->getLocation(), II, ArgType));
+ if (ArgType->isVariablyModifiedType())
+ ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType());
+ Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr,
+ FD->getLocation(), II, ArgType,
+ ImplicitParamDecl::Other));
++I;
}
Args.append(
@@ -255,89 +323,166 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
- llvm::Function *F = llvm::Function::Create(
- FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
- CapturedStmtInfo->getHelperName(), &CGM.getModule());
+ llvm::Function *F =
+ llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
+ FO.FunctionName, &CGM.getModule());
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
if (CD->isNothrow())
F->addFnAttr(llvm::Attribute::NoUnwind);
// Generate the function.
- StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
- CD->getBody()->getLocStart());
+ CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+ CD->getBody()->getLocStart());
unsigned Cnt = CD->getContextParamPosition();
- I = S.captures().begin();
+ I = FO.S->captures().begin();
for (auto *FD : RD->fields()) {
// If we are capturing a pointer by copy we don't need to do anything, just
// use the value that we get from the arguments.
if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
const VarDecl *CurVD = I->getCapturedVar();
- Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]);
+ Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]);
// If the variable is a reference we need to materialize it here.
if (CurVD->getType()->isReferenceType()) {
- Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(),
- ".materialized_ref");
- EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false,
- CurVD->getType());
+ Address RefAddr = CGF.CreateMemTemp(
+ CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref");
+ CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr,
+ /*Volatile=*/false, CurVD->getType());
LocalAddr = RefAddr;
}
- setAddrOfLocalVar(CurVD, LocalAddr);
+ if (!FO.RegisterCastedArgsOnly)
+ LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
++Cnt;
++I;
continue;
}
- LValue ArgLVal =
- MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]),
+ Args[Cnt]->getType(), BaseInfo);
if (FD->hasCapturedVLAType()) {
- LValue CastedArgLVal =
- MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
- Args[Cnt]->getName(), ArgLVal),
- FD->getType(), AlignmentSource::Decl);
+ if (FO.UIntPtrCastRequired) {
+ ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(),
+ Args[Cnt]->getName(),
+ ArgLVal),
+ FD->getType(), BaseInfo);
+ }
auto *ExprArg =
- EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
+ CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal();
auto VAT = FD->getCapturedVLAType();
- VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+ VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}});
} else if (I->capturesVariable()) {
auto *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
Address ArgAddr = ArgLVal.getAddress();
if (!VarTy->isReferenceType()) {
if (ArgLVal.getType()->isLValueReferenceType()) {
- ArgAddr = EmitLoadOfReference(
+ ArgAddr = CGF.EmitLoadOfReference(
ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
} else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
assert(ArgLVal.getType()->isPointerType());
- ArgAddr = EmitLoadOfPointer(
+ ArgAddr = CGF.EmitLoadOfPointer(
ArgAddr, ArgLVal.getType()->castAs<PointerType>());
}
}
- setAddrOfLocalVar(
- Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
+ if (!FO.RegisterCastedArgsOnly) {
+ LocalAddrs.insert(
+ {Args[Cnt],
+ {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
+ }
} else if (I->capturesVariableByCopy()) {
assert(!FD->getType()->isAnyPointerType() &&
"Not expecting a captured pointer.");
auto *Var = I->getCapturedVar();
QualType VarTy = Var->getType();
- setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(),
- Args[Cnt]->getName(), ArgLVal,
- VarTy->isReferenceType()));
+ LocalAddrs.insert(
+ {Args[Cnt],
+ {Var,
+ FO.UIntPtrCastRequired
+ ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(),
+ ArgLVal, VarTy->isReferenceType())
+ : ArgLVal.getAddress()}});
} else {
// If 'this' is captured, load it into CXXThisValue.
assert(I->capturesThis());
- CXXThisValue =
- EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
+ CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation())
+ .getScalarVal();
+ LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}});
}
++Cnt;
++I;
}
+ return {F, HasUIntPtrArgs};
+}
+
+llvm::Function *
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+ assert(
+ CapturedStmtInfo &&
+ "CapturedStmtInfo should be set when generating the captured function");
+ const CapturedDecl *CD = S.getCapturedDecl();
+ // Build the argument list.
+ bool NeedWrapperFunction =
+ getDebugInfo() &&
+ CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo;
+ FunctionArgList Args;
+ llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
+ FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
+ CapturedStmtInfo->getHelperName());
+ llvm::Function *F;
+ bool HasUIntPtrArgs;
+ std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue(
+ *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO);
+ for (const auto &LocalAddrPair : LocalAddrs) {
+ if (LocalAddrPair.second.first) {
+ setAddrOfLocalVar(LocalAddrPair.second.first,
+ LocalAddrPair.second.second);
+ }
+ }
+ for (const auto &VLASizePair : VLASizes)
+ VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
PGO.assignRegionCounters(GlobalDecl(CD), F);
CapturedStmtInfo->EmitBody(*this, CD->getBody());
FinishFunction(CD->getBodyRBrace());
-
- return F;
+ if (!NeedWrapperFunction || !HasUIntPtrArgs)
+ return F;
+
+ FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
+ /*RegisterCastedArgsOnly=*/true,
+ ".nondebug_wrapper.");
+ CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
+ WrapperCGF.disableDebugInfo();
+ Args.clear();
+ LocalAddrs.clear();
+ VLASizes.clear();
+ llvm::Function *WrapperF =
+ emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
+ WrapperCGF.CXXThisValue, WrapperFO).first;
+ LValueBaseInfo BaseInfo(AlignmentSource::Decl, false);
+ llvm::SmallVector<llvm::Value *, 4> CallArgs;
+ for (const auto *Arg : Args) {
+ llvm::Value *CallArg;
+ auto I = LocalAddrs.find(Arg);
+ if (I != LocalAddrs.end()) {
+ LValue LV =
+ WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo);
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+ } else {
+ auto EI = VLASizes.find(Arg);
+ if (EI != VLASizes.end())
+ CallArg = EI->second.second;
+ else {
+ LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg),
+ Arg->getType(), BaseInfo);
+ CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation());
+ }
+ }
+ CallArgs.emplace_back(CallArg);
+ }
+ WrapperCGF.Builder.CreateCall(F, CallArgs);
+ WrapperCGF.FinishFunction();
+ return WrapperF;
}
//===----------------------------------------------------------------------===//
@@ -404,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
-/// Check if the combiner is a call to UDR combiner and if it is so return the
-/// UDR decl used for reduction.
-static const OMPDeclareReductionDecl *
-getReductionInit(const Expr *ReductionOp) {
- if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
- if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
- if (auto *DRE =
- dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
- if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
- return DRD;
- return nullptr;
-}
-
-static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
- const OMPDeclareReductionDecl *DRD,
- const Expr *InitOp,
- Address Private, Address Original,
- QualType Ty) {
- if (DRD->getInitializer()) {
- std::pair<llvm::Function *, llvm::Function *> Reduction =
- CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
- auto *CE = cast<CallExpr>(InitOp);
- auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
- const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
- const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
- auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
- auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
- CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
- PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
- [=]() -> Address { return Private; });
- PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
- [=]() -> Address { return Original; });
- (void)PrivateScope.Privatize();
- RValue Func = RValue::get(Reduction.second);
- CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
- CGF.EmitIgnoredExpr(InitOp);
- } else {
- llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
- auto *GV = new llvm::GlobalVariable(
- CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
- llvm::GlobalValue::PrivateLinkage, Init, ".init");
- LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
- RValue InitRVal;
- switch (CGF.getEvaluationKind(Ty)) {
- case TEK_Scalar:
- InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
- break;
- case TEK_Complex:
- InitRVal =
- RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
- break;
- case TEK_Aggregate:
- InitRVal = RValue::getAggregate(LV.getAddress());
- break;
- }
- OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
- CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
- CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
- /*IsInitializer=*/false);
- }
-}
-
-/// \brief Emit initialization of arrays of complex types.
-/// \param DestAddr Address of the array.
-/// \param Type Type of array.
-/// \param Init Initial expression of array.
-/// \param SrcAddr Address of the original array.
-static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
- QualType Type, const Expr *Init,
- Address SrcAddr = Address::invalid()) {
- auto *DRD = getReductionInit(Init);
- // Perform element-by-element initialization.
- QualType ElementTy;
-
- // Drill down to the base element type on both arrays.
- auto ArrayTy = Type->getAsArrayTypeUnsafe();
- auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
- DestAddr =
- CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
- if (DRD)
- SrcAddr =
- CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
-
- llvm::Value *SrcBegin = nullptr;
- if (DRD)
- SrcBegin = SrcAddr.getPointer();
- auto DestBegin = DestAddr.getPointer();
- // Cast from pointer to array type to pointer to single element.
- auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
- // The basic structure here is a while-do loop.
- auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
- auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
- auto IsEmpty =
- CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
- CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
-
- // Enter the loop body, making that address the current address.
- auto EntryBB = CGF.Builder.GetInsertBlock();
- CGF.EmitBlock(BodyBB);
-
- CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
-
- llvm::PHINode *SrcElementPHI = nullptr;
- Address SrcElementCurrent = Address::invalid();
- if (DRD) {
- SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
- "omp.arraycpy.srcElementPast");
- SrcElementPHI->addIncoming(SrcBegin, EntryBB);
- SrcElementCurrent =
- Address(SrcElementPHI,
- SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
- }
- llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
- DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
- DestElementPHI->addIncoming(DestBegin, EntryBB);
- Address DestElementCurrent =
- Address(DestElementPHI,
- DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
-
- // Emit copy.
- {
- CodeGenFunction::RunCleanupsScope InitScope(CGF);
- if (DRD && (DRD->getInitializer() || !Init)) {
- emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
- SrcElementCurrent, ElementTy);
- } else
- CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
- /*IsInitializer=*/false);
- }
-
- if (DRD) {
- // Shift the address forward by one element.
- auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
- SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
- SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
- }
-
- // Shift the address forward by one element.
- auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
- DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
- // Check whether we've reached the end.
- auto Done =
- CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
- CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
- DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
-
- // Done.
- CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
-}
-
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
Address SrcAddr, const VarDecl *DestVD,
const VarDecl *SrcVD, const Expr *Copy) {
@@ -906,259 +901,111 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
- LValue BaseLV, llvm::Value *Addr) {
- Address Tmp = Address::invalid();
- Address TopTmp = Address::invalid();
- Address MostTopTmp = Address::invalid();
- BaseTy = BaseTy.getNonReferenceType();
- while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
- !CGF.getContext().hasSameType(BaseTy, ElTy)) {
- Tmp = CGF.CreateMemTemp(BaseTy);
- if (TopTmp.isValid())
- CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
- else
- MostTopTmp = Tmp;
- TopTmp = Tmp;
- BaseTy = BaseTy->getPointeeType();
- }
- llvm::Type *Ty = BaseLV.getPointer()->getType();
- if (Tmp.isValid())
- Ty = Tmp.getElementType();
- Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
- if (Tmp.isValid()) {
- CGF.Builder.CreateStore(Addr, Tmp);
- return MostTopTmp;
- }
- return Address(Addr, BaseLV.getAlignment());
-}
-
-static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
- LValue BaseLV) {
- BaseTy = BaseTy.getNonReferenceType();
- while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
- !CGF.getContext().hasSameType(BaseTy, ElTy)) {
- if (auto *PtrTy = BaseTy->getAs<PointerType>())
- BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
- else {
- BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
- BaseTy->castAs<ReferenceType>());
- }
- BaseTy = BaseTy->getPointeeType();
- }
- return CGF.MakeAddrLValue(
- Address(
- CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
- BaseLV.getAlignment()),
- BaseLV.getType(), BaseLV.getAlignmentSource());
-}
-
void CodeGenFunction::EmitOMPReductionClauseInit(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return;
+ SmallVector<const Expr *, 4> Shareds;
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
- for (auto IRef : C->varlists()) {
- auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
- auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
- auto *DRD = getReductionInit(*IRed);
- if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
- auto *Base = OASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
- Base = TempOASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
- Base = TempASE->getBase()->IgnoreParenImpCasts();
- auto *DE = cast<DeclRefExpr>(Base);
- auto *OrigVD = cast<VarDecl>(DE->getDecl());
- auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
- auto OASELValueUB =
- EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
- auto OriginalBaseLValue = EmitLValue(DE);
- LValue BaseLValue =
- loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
- OriginalBaseLValue);
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
- return OASELValueLB.getAddress();
- });
- // Emit reduction copy.
- bool IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
- OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
- // Emit VarDecl with copy init for arrays.
- // Get the address of the original variable captured in current
- // captured region.
- auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
- OASELValueLB.getPointer());
- Size = Builder.CreateNUWAdd(
- Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
- CodeGenFunction::OpaqueValueMapping OpaqueMap(
- *this, cast<OpaqueValueExpr>(
- getContext()
- .getAsVariableArrayType(PrivateVD->getType())
- ->getSizeExpr()),
- RValue::get(Size));
- EmitVariablyModifiedType(PrivateVD->getType());
- auto Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- auto *Init = PrivateVD->getInit();
- EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
- DRD ? *IRed : Init,
- OASELValueLB.getAddress());
- EmitAutoVarCleanups(Emission);
- // Emit private VarDecl with reduction init.
- auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
- OASELValueLB.getPointer());
- auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
- return castToBase(*this, OrigVD->getType(),
- OASELValueLB.getType(), OriginalBaseLValue,
- Ptr);
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
- return GetAddrOfLocalVar(PrivateVD);
- });
- } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
- auto *Base = ASE->getBase()->IgnoreParenImpCasts();
- while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
- Base = TempASE->getBase()->IgnoreParenImpCasts();
- auto *DE = cast<DeclRefExpr>(Base);
- auto *OrigVD = cast<VarDecl>(DE->getDecl());
- auto ASELValue = EmitLValue(ASE);
- auto OriginalBaseLValue = EmitLValue(DE);
- LValue BaseLValue = loadToBegin(
- *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
- return ASELValue.getAddress();
- });
- // Emit reduction copy.
- bool IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
- OriginalBaseLValue, DRD, IRed]() -> Address {
- // Emit private VarDecl with reduction init.
- AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
- emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
- ASELValue.getAddress(),
- ASELValue.getType());
- } else
- EmitAutoVarInit(Emission);
- EmitAutoVarCleanups(Emission);
- auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
- ASELValue.getPointer());
- auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
- return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
- OriginalBaseLValue, Ptr);
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
- return Builder.CreateElementBitCast(
- GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
- "rhs.begin");
- });
- } else {
- auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
- QualType Type = PrivateVD->getType();
- if (getContext().getAsArrayType(Type)) {
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
- CapturedStmtInfo->lookup(OrigVD) != nullptr,
- IRef->getType(), VK_LValue, IRef->getExprLoc());
- Address OriginalAddr = EmitLValue(&DRE).getAddress();
- PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
- LHSVD]() -> Address {
- OriginalAddr = Builder.CreateElementBitCast(
- OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
- return OriginalAddr;
- });
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
- if (Type->isVariablyModifiedType()) {
- CodeGenFunction::OpaqueValueMapping OpaqueMap(
- *this, cast<OpaqueValueExpr>(
- getContext()
- .getAsVariableArrayType(PrivateVD->getType())
- ->getSizeExpr()),
- RValue::get(
- getTypeSize(OrigVD->getType().getNonReferenceType())));
- EmitVariablyModifiedType(Type);
- }
- auto Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- auto *Init = PrivateVD->getInit();
- EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
- DRD ? *IRed : Init, OriginalAddr);
- EmitAutoVarCleanups(Emission);
- return Emission.getAllocatedAddress();
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
- return Builder.CreateElementBitCast(
- GetAddrOfLocalVar(PrivateVD),
- ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
- });
- } else {
- // Store the address of the original variable associated with the LHS
- // implicit variable.
- Address OriginalAddr = Address::invalid();
- PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
- &OriginalAddr]() -> Address {
- DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
- CapturedStmtInfo->lookup(OrigVD) != nullptr,
- IRef->getType(), VK_LValue, IRef->getExprLoc());
- OriginalAddr = EmitLValue(&DRE).getAddress();
- return OriginalAddr;
- });
- // Emit reduction copy.
- bool IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
- // Emit private VarDecl with reduction init.
- AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
- auto Addr = Emission.getAllocatedAddress();
- if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
- emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
- OriginalAddr,
- PrivateVD->getType());
- } else
- EmitAutoVarInit(Emission);
- EmitAutoVarCleanups(Emission);
- return Addr;
- });
- assert(IsRegistered && "private var already registered as private");
- // Silence the warning about unused variable.
- (void)IsRegistered;
- PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
- return GetAddrOfLocalVar(PrivateVD);
- });
- }
+ auto ILHS = C->lhs_exprs().begin();
+ auto IRHS = C->rhs_exprs().begin();
+ for (const auto *Ref : C->varlists()) {
+ Shareds.emplace_back(Ref);
+ Privates.emplace_back(*IPriv);
+ ReductionOps.emplace_back(*IRed);
+ LHSs.emplace_back(*ILHS);
+ RHSs.emplace_back(*IRHS);
+ std::advance(IPriv, 1);
+ std::advance(IRed, 1);
+ std::advance(ILHS, 1);
+ std::advance(IRHS, 1);
+ }
+ }
+ ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
+ unsigned Count = 0;
+ auto ILHS = LHSs.begin();
+ auto IRHS = RHSs.begin();
+ auto IPriv = Privates.begin();
+ for (const auto *IRef : Shareds) {
+ auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
+ // Emit private VarDecl with reduction init.
+ RedCG.emitSharedLValue(*this, Count);
+ RedCG.emitAggregateType(*this, Count);
+ auto Emission = EmitAutoVarAlloca(*PrivateVD);
+ RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
+ RedCG.getSharedLValue(Count),
+ [&Emission](CodeGenFunction &CGF) {
+ CGF.EmitAutoVarInit(Emission);
+ return true;
+ });
+ EmitAutoVarCleanups(Emission);
+ Address BaseAddr = RedCG.adjustPrivateAddress(
+ *this, Count, Emission.getAllocatedAddress());
+ bool IsRegistered = PrivateScope.addPrivate(
+ RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
+ assert(IsRegistered && "private var already registered as private");
+ // Silence the warning about unused variable.
+ (void)IsRegistered;
+
+ auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ if (isa<OMPArraySectionExpr>(IRef)) {
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+ return RedCG.getSharedLValue(Count).getAddress();
+ });
+ PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+ return GetAddrOfLocalVar(PrivateVD);
+ });
+ } else if (isa<ArraySubscriptExpr>(IRef)) {
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
+ return RedCG.getSharedLValue(Count).getAddress();
+ });
+ PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
+ return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
+ ConvertTypeForMem(RHSVD->getType()),
+ "rhs.begin");
+ });
+ } else {
+ QualType Type = PrivateVD->getType();
+ bool IsArray = getContext().getAsArrayType(Type) != nullptr;
+ Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ if (IsArray) {
+ OriginalAddr = Builder.CreateElementBitCast(
+ OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
}
- ++ILHS;
- ++IRHS;
- ++IPriv;
- ++IRed;
+ PrivateScope.addPrivate(
+ LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
+ PrivateScope.addPrivate(
+ RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
+ return IsArray
+ ? Builder.CreateElementBitCast(
+ GetAddrOfLocalVar(PrivateVD),
+ ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
+ : GetAddrOfLocalVar(PrivateVD);
+ });
}
+ ++ILHS;
+ ++IRHS;
+ ++IPriv;
+ ++Count;
}
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
- const OMPExecutableDirective &D) {
+ const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
if (!HaveInsertPoint())
return;
llvm::SmallVector<const Expr *, 8> Privates;
@@ -1174,14 +1021,15 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
}
if (HasAtLeastOneReduction) {
+ bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
+ isOpenMPParallelDirective(D.getDirectiveKind()) ||
+ D.getDirectiveKind() == OMPD_simd;
+ bool SimpleReduction = D.getDirectiveKind() == OMPD_simd;
// Emit nowait reduction if nowait clause is present or directive is a
// parallel directive (it always has implicit barrier).
CGM.getOpenMPRuntime().emitReduction(
*this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
- D.getSingleClause<OMPNowaitClause>() ||
- isOpenMPParallelDirective(D.getDirectiveKind()) ||
- D.getDirectiveKind() == OMPD_simd,
- D.getDirectiveKind() == OMPD_simd);
+ {WithNowait, SimpleReduction, ReductionKind});
}
}
@@ -1210,14 +1058,23 @@ static void emitPostUpdateForReductionClause(
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
- const OMPExecutableDirective &S,
- OpenMPDirectiveKind InnermostKind,
- const RegionCodeGenTy &CodeGen) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
- emitParallelOrTeamsOutlinedFunction(S,
- *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+namespace {
+/// Codegen lambda for appending distribute lower and upper bounds to outlined
+/// parallel function. This is necessary for combined constructs such as
+/// 'distribute parallel for'
+typedef llvm::function_ref<void(CodeGenFunction &,
+ const OMPExecutableDirective &,
+ llvm::SmallVectorImpl<llvm::Value *> &)>
+ CodeGenBoundParametersTy;
+} // anonymous namespace
+
+static void emitCommonOMPParallelDirective(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+ const CodeGenBoundParametersTy &CodeGenBoundParameters) {
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
+ auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
@@ -1239,13 +1096,22 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
}
}
- OMPLexicalScope Scope(CGF, S);
+ OMPParallelScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
+ // lower and upper bounds with the pragma 'for' chunking mechanism.
+ // The following lambda takes care of appending the lower and upper bound
+ // parameters when necessary
+ CodeGenBoundParameters(CGF, S, CapturedVars);
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
CapturedVars, IfCond);
}
+static void emitEmptyBoundParameters(CodeGenFunction &,
+ const OMPExecutableDirective &,
+ llvm::SmallVectorImpl<llvm::Value *> &) {}
+
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -1264,9 +1130,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
emitPostUpdateForReductionClause(
*this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
@@ -1611,6 +1478,13 @@ void CodeGenFunction::EmitOMPSimdFinal(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
+static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ CodeGenFunction::JumpDest LoopExit) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+}
+
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
@@ -1677,7 +1551,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
// Emit final copy of the lastprivate variables at the end of loops.
if (HasLastprivateClause)
CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
emitPostUpdateForReductionClause(
CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
@@ -1693,9 +1567,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
-void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
- const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+void CodeGenFunction::EmitOMPOuterLoop(
+ bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
+ CodeGenFunction::OMPPrivateScope &LoopScope,
+ const CodeGenFunction::OMPLoopArguments &LoopArgs,
+ const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
+ const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
auto &RT = CGM.getOpenMPRuntime();
const Expr *IVExpr = S.getIterationVariable();
@@ -1713,15 +1590,18 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
llvm::Value *BoolCondVal = nullptr;
if (!DynamicOrOrdered) {
- // UB = min(UB, GlobalUB)
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ // UB = min(UB, GlobalUB) or
+ // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
+ // 'distribute parallel for')
+ EmitIgnoredExpr(LoopArgs.EUB);
// IV = LB
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(LoopArgs.Init);
// IV < UB
- BoolCondVal = EvaluateExprAsBool(S.getCond());
+ BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
} else {
- BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
- LB, UB, ST);
+ BoolCondVal =
+ RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
+ LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
}
// If there are any cleanups between here and the loop-exit scope,
@@ -1741,7 +1621,7 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
// Emit "IV = LB" (in case of static schedule, we have already calculated new
// LB for loop condition and emitted it above).
if (DynamicOrOrdered)
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(LoopArgs.Init);
// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
@@ -1755,24 +1635,27 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
EmitOMPSimdInit(S, IsMonotonic);
SourceLocation Loc = S.getLocStart();
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
- },
- [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
- if (Ordered) {
- CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
- CGF, Loc, IVSize, IVSigned);
- }
- });
+
+ // when 'distribute' is not combined with a 'for':
+ // while (idx <= UB) { BODY; ++idx; }
+ // when 'distribute' is combined with a 'for'
+ // (e.g. 'distribute parallel for')
+ // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ EmitOMPInnerLoop(
+ S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
+ [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+ CodeGenLoop(CGF, S, LoopExit);
+ },
+ [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
+ CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
+ });
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
if (!DynamicOrOrdered) {
// Emit "LB = LB + Stride", "UB = UB + Stride".
- EmitIgnoredExpr(S.getNextLowerBound());
- EmitIgnoredExpr(S.getNextUpperBound());
+ EmitIgnoredExpr(LoopArgs.NextLB);
+ EmitIgnoredExpr(LoopArgs.NextUB);
}
EmitBranch(CondBlock);
@@ -1791,7 +1674,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
void CodeGenFunction::EmitOMPForOuterLoop(
const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds) {
auto &RT = CGM.getOpenMPRuntime();
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
@@ -1800,7 +1684,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(
assert((Ordered ||
!RT.isStaticNonchunked(ScheduleKind.Schedule,
- /*Chunked=*/Chunk != nullptr)) &&
+ LoopArgs.Chunk != nullptr)) &&
"static non-chunked schedule does not need outer loop");
// Emit outer loop.
@@ -1858,22 +1742,46 @@ void CodeGenFunction::EmitOMPForOuterLoop(
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
if (DynamicOrOrdered) {
- llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
+ auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
+ llvm::Value *LBVal = DispatchBounds.first;
+ llvm::Value *UBVal = DispatchBounds.second;
+ CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
+ LoopArgs.Chunk};
RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
- IVSigned, Ordered, UBVal, Chunk);
+ IVSigned, Ordered, DipatchRTInputValues);
} else {
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
- Ordered, IL, LB, UB, ST, Chunk);
+ Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
+ LoopArgs.ST, LoopArgs.Chunk);
}
- EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
- ST, IL, Chunk);
+ auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
+ const unsigned IVSize,
+ const bool IVSigned) {
+ if (Ordered) {
+ CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
+ IVSigned);
+ }
+ };
+
+ OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
+ LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
+ OuterLoopArgs.IncExpr = S.getInc();
+ OuterLoopArgs.Init = S.getInit();
+ OuterLoopArgs.Cond = S.getCond();
+ OuterLoopArgs.NextLB = S.getNextLowerBound();
+ OuterLoopArgs.NextUB = S.getNextUpperBound();
+ EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
+ emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
}
+static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
+ const unsigned IVSize, const bool IVSigned) {}
+
void CodeGenFunction::EmitOMPDistributeOuterLoop(
- OpenMPDistScheduleClauseKind ScheduleKind,
- const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+ OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
+ OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
+ const CodeGenLoopTy &CodeGenLoopContent) {
auto &RT = CGM.getOpenMPRuntime();
@@ -1886,26 +1794,159 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
- RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
- IVSize, IVSigned, /* Ordered = */ false,
- IL, LB, UB, ST, Chunk);
+ RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
+ IVSigned, /* Ordered = */ false, LoopArgs.IL,
+ LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
+ LoopArgs.Chunk);
- EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
- S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
+ // for combined 'distribute' and 'for' the increment expression of distribute
+ // is store in DistInc. For 'distribute' alone, it is in Inc.
+ Expr *IncExpr;
+ if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
+ IncExpr = S.getDistInc();
+ else
+ IncExpr = S.getInc();
+
+ // this routine is shared by 'omp distribute parallel for' and
+ // 'omp distribute': select the right EUB expression depending on the
+ // directive
+ OMPLoopArguments OuterLoopArgs;
+ OuterLoopArgs.LB = LoopArgs.LB;
+ OuterLoopArgs.UB = LoopArgs.UB;
+ OuterLoopArgs.ST = LoopArgs.ST;
+ OuterLoopArgs.IL = LoopArgs.IL;
+ OuterLoopArgs.Chunk = LoopArgs.Chunk;
+ OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedEnsureUpperBound()
+ : S.getEnsureUpperBound();
+ OuterLoopArgs.IncExpr = IncExpr;
+ OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedInit()
+ : S.getInit();
+ OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedCond()
+ : S.getCond();
+ OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedNextLowerBound()
+ : S.getNextLowerBound();
+ OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedNextUpperBound()
+ : S.getNextUpperBound();
+
+ EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
+ LoopScope, OuterLoopArgs, CodeGenLoopContent,
+ emitEmptyOrdered);
+}
+
+/// Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+ const DeclRefExpr *Helper) {
+ auto VDecl = cast<VarDecl>(Helper->getDecl());
+ CGF.EmitVarDecl(*VDecl);
+ return CGF.EmitLValue(Helper);
+}
+
+static std::pair<LValue, LValue>
+emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ LValue LB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
+
+ // When composing 'distribute' with 'for' (e.g. as in 'distribute
+ // parallel for') we need to use the 'distribute'
+ // chunk lower and upper bounds rather than the whole loop iteration
+ // space. These are parameters to the outlined function for 'parallel'
+ // and we copy the bounds of the previous schedule into the
+ // the current ones.
+ LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
+ LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
+ llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
+ PrevLBVal = CGF.EmitScalarConversion(
+ PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
+ LS.getIterationVariable()->getType(), SourceLocation());
+ llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
+ PrevUBVal = CGF.EmitScalarConversion(
+ PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
+ LS.getIterationVariable()->getType(), SourceLocation());
+
+ CGF.EmitStoreOfScalar(PrevLBVal, LB);
+ CGF.EmitStoreOfScalar(PrevUBVal, UB);
+
+ return {LB, UB};
+}
+
+/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
+/// we need to use the LB and UB expressions generated by the worksharing
+/// code generation support, whereas in non combined situations we would
+/// just emit 0 and the LastIteration expression
+/// This function is necessary due to the difference of the LB and UB
+/// types for the RT emission routines for 'for_static_init' and
+/// 'for_dispatch_init'
+static std::pair<llvm::Value *, llvm::Value *>
+emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ Address LB, Address UB) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const Expr *IVExpr = LS.getIterationVariable();
+ // when implementing a dynamic schedule for a 'for' combined with a
+ // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
+ // is not normalized as each team only executes its own assigned
+ // distribute chunk
+ QualType IteratorTy = IVExpr->getType();
+ llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
+ SourceLocation());
+ llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
+ SourceLocation());
+ return {LBVal, UBVal};
+}
+
+static void emitDistributeParallelForDistributeInnerBoundParams(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
+ const auto &Dir = cast<OMPLoopDirective>(S);
+ LValue LB =
+ CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
+ auto LBCast = CGF.Builder.CreateIntCast(
+ CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
+ CapturedVars.push_back(LBCast);
+ LValue UB =
+ CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
+
+ auto UBCast = CGF.Builder.CreateIntCast(
+ CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
+ CapturedVars.push_back(UBCast);
+}
+
+static void
+emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ CodeGenFunction::JumpDest LoopExit) {
+ auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
+ emitDistributeParallelForInnerBounds,
+ emitDistributeParallelForDispatchBounds);
+ };
+
+ emitCommonOMPParallelDirective(
+ CGF, S, OMPD_for, CGInlinedWorksharingLoop,
+ emitDistributeParallelForDistributeInnerBoundParams);
}
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+ S.getDistInc());
+ };
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_distribute_parallel_for,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- OMPLoopScope PreInitScope(CGF, S);
- OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for,
- /*HasCancel=*/false);
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
+ OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
+ /*HasCancel=*/false);
+ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
+ /*HasCancel=*/false);
}
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
@@ -2003,15 +2044,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
});
}
-void CodeGenFunction::EmitOMPTargetTeamsDirective(
- const OMPTargetTeamsDirective &S) {
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
-}
-
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
const OMPTargetTeamsDistributeDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(
@@ -2052,14 +2084,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
});
}
-/// \brief Emit a helper variable and return corresponding lvalue.
-static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
- const DeclRefExpr *Helper) {
- auto VDecl = cast<VarDecl>(Helper->getDecl());
- CGF.EmitVarDecl(*VDecl);
- return CGF.EmitLValue(Helper);
-}
-
namespace {
struct ScheduleKindModifiersTy {
OpenMPScheduleClauseKind Kind;
@@ -2072,7 +2096,10 @@ namespace {
};
} // namespace
-bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+bool CodeGenFunction::EmitOMPWorksharingLoop(
+ const OMPLoopDirective &S, Expr *EUB,
+ const CodeGenLoopBoundsTy &CodeGenLoopBounds,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -2122,10 +2149,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
emitAlignedClause(*this, S);
EmitOMPLinearClauseInit(S);
// Emit helper vars inits.
- LValue LB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+
+ std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
+ LValue LB = Bounds.first;
+ LValue UB = Bounds.second;
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -2211,9 +2238,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
+ const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
+ ST.getAddress(), IL.getAddress(),
+ Chunk, EUB);
EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
- LB.getAddress(), UB.getAddress(), ST.getAddress(),
- IL.getAddress(), Chunk);
+ LoopArguments, CGDispatchBounds);
}
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
EmitOMPSimdFinal(S,
@@ -2222,7 +2251,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
CGF.EmitLoadOfScalar(IL, S.getLocStart()));
});
}
- EmitOMPReductionClauseFinal(S);
+ EmitOMPReductionClauseFinal(
+ S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
+ ? /*Parallel and Simd*/ OMPD_parallel_for_simd
+ : /*Parallel only*/ OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
*this, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
@@ -2248,12 +2280,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
return HasLastprivateClause;
}
+/// The following two functions generate expressions for the loop lower
+/// and upper bounds in case of static and dynamic (dispatch) schedule
+/// of the associated 'for' or 'distribute' loop.
+static std::pair<LValue, LValue>
+emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ LValue LB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
+ return {LB, UB};
+}
+
+/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
+/// consider the lower and upper bound expressions generated by the
+/// worksharing loop support, but we use 0 and the iteration space size as
+/// constants
+static std::pair<llvm::Value *, llvm::Value *>
+emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ Address LB, Address UB) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const Expr *IVExpr = LS.getIterationVariable();
+ const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
+ llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
+ llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
+ return {LBVal, UBVal};
+}
+
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2271,7 +2333,9 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2320,8 +2384,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
// Generate condition for loop.
BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
- OK_Ordinary, S.getLocStart(),
- /*fpContractable=*/false);
+ OK_Ordinary, S.getLocStart(), FPOptions());
// Increment for loop counter.
UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
S.getLocStart());
@@ -2397,7 +2460,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd());
};
CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
- CGF.EmitOMPReductionClauseFinal(S);
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
// Emit post-update of the reduction variables if IsLastIter != 0.
emitPostUpdateForReductionClause(
CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * {
@@ -2523,9 +2586,11 @@ void CodeGenFunction::EmitOMPParallelForDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
- CGF.EmitOMPWorksharingLoop(S);
+ CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -2533,9 +2598,11 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPWorksharingLoop(S);
+ CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -2545,7 +2612,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitSections(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
@@ -2629,11 +2697,32 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
++ID;
}
}
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ auto IPriv = C->privates().begin();
+ auto IRed = C->reduction_ops().begin();
+ auto ILHS = C->lhs_exprs().begin();
+ auto IRHS = C->rhs_exprs().begin();
+ for (const auto *Ref : C->varlists()) {
+ Data.ReductionVars.emplace_back(Ref);
+ Data.ReductionCopies.emplace_back(*IPriv);
+ Data.ReductionOps.emplace_back(*IRed);
+ LHSs.emplace_back(*ILHS);
+ RHSs.emplace_back(*IRHS);
+ std::advance(IPriv, 1);
+ std::advance(IRed, 1);
+ std::advance(ILHS, 1);
+ std::advance(IRHS, 1);
+ }
+ }
+ Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
+ *this, S.getLocStart(), LHSs, RHSs, Data);
// Build list of dependences.
for (const auto *C : S.getClausesOfKind<OMPDependClause>())
for (auto *IRef : C->varlists())
Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
- auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs](
+ auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs](
CodeGenFunction &CGF, PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
@@ -2688,6 +2777,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
}
}
+ if (Data.Reductions) {
+ OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true);
+ ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
+ Data.ReductionOps);
+ llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
+ CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
+ for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
+ RedCG.emitSharedLValue(CGF, Cnt);
+ RedCG.emitAggregateType(CGF, Cnt);
+ Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
+ CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
+ Replacement =
+ Address(CGF.EmitScalarConversion(
+ Replacement.getPointer(), CGF.getContext().VoidPtrTy,
+ CGF.getContext().getPointerType(
+ Data.ReductionCopies[Cnt]->getType()),
+ SourceLocation()),
+ Replacement.getAlignment());
+ Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
+ Scope.addPrivate(RedCG.getBaseDecl(Cnt),
+ [Replacement]() { return Replacement; });
+ // FIXME: This must removed once the runtime library is fixed.
+ // Emit required threadprivate variables for
+ // initilizer/combiner/finalizer.
+ CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+ RedCG, Cnt);
+ }
+ }
(void)Scope.Privatize();
Action.Enter(CGF);
@@ -2763,7 +2880,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
}(), S.getLocStart());
}
-void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
+void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
+ const CodeGenLoopTy &CodeGenLoop,
+ Expr *IncExpr) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -2804,10 +2923,17 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
// Emit 'then' code.
{
// Emit helper vars inits.
- LValue LB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+
+ LValue LB = EmitOMPHelperVar(
+ *this, cast<DeclRefExpr>(
+ (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedLowerBoundVariable()
+ : S.getLowerBoundVariable())));
+ LValue UB = EmitOMPHelperVar(
+ *this, cast<DeclRefExpr>(
+ (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedUpperBoundVariable()
+ : S.getUpperBoundVariable())));
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -2859,15 +2985,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
auto LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedEnsureUpperBound()
+ : S.getEnsureUpperBound());
// IV = LB;
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedInit()
+ : S.getInit());
+
+ Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedCond()
+ : S.getCond();
+
+ // for distribute alone, codegen
// while (idx <= UB) { BODY; ++idx; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
- S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
+ // when combined with 'for' (e.g. as in 'distribute parallel for')
+ // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
+ [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+ CodeGenLoop(CGF, S, LoopExit);
},
[](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
@@ -2876,9 +3012,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
- EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
- LB.getAddress(), UB.getAddress(), ST.getAddress(),
- IL.getAddress(), Chunk);
+ const OMPLoopArguments LoopArguments = {
+ LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
+ Chunk};
+ EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
+ CodeGenLoop);
}
// Emit final copy of the lastprivate variables if IsLastIter != 0.
@@ -2900,7 +3038,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
void CodeGenFunction::EmitOMPDistributeDirective(
const OMPDistributeDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPDistributeLoop(S);
+
+ CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
@@ -3250,7 +3389,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
NewVValType = XRValExpr->getType();
auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
- IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue {
+ IsPostfixUpdate](RValue XRValue) -> RValue {
CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
RValue Res = CGF.EmitAnyExpr(UE);
@@ -3277,7 +3416,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
NewVValType = X->getType().getNonReferenceType();
ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
X->getType().getNonReferenceType(), Loc);
- auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
+ auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue {
NewVVal = XRValue;
return ExprRValue;
};
@@ -3327,6 +3466,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_firstprivate:
case OMPC_lastprivate:
case OMPC_reduction:
+ case OMPC_task_reduction:
case OMPC_safelen:
case OMPC_simdlen:
case OMPC_collapse:
@@ -3404,41 +3544,24 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
}
-std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/>
-CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
- CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName,
- bool IsOffloadEntry) {
- llvm::Function *OutlinedFn = nullptr;
- llvm::Constant *OutlinedFnID = nullptr;
- auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
- OMPPrivateScope PrivateScope(CGF);
- (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
- CGF.EmitOMPPrivateClause(S, PrivateScope);
- (void)PrivateScope.Privatize();
-
- Action.Enter(CGF);
- CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- };
- // Emit target region as a standalone region.
- CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
- S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen);
- return std::make_pair(OutlinedFn, OutlinedFnID);
-}
-
-void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ const RegionCodeGenTy &CodeGen) {
+ assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
+ CodeGenModule &CGM = CGF.CGM;
const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
- llvm::SmallVector<llvm::Value *, 16> CapturedVars;
- GenerateOpenMPCapturedVars(CS, CapturedVars);
-
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
- // Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
-
- if (auto *C = S.getSingleClause<OMPIfClause>()) {
- IfCond = C->getCondition();
+ // Check for the at most one if clause associated with the target region.
+ for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+ if (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_target) {
+ IfCond = C->getCondition();
+ break;
+ }
}
// Check if we have any device clause associated with the directive.
@@ -3453,43 +3576,76 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
bool IsOffloadEntry = true;
if (IfCond) {
bool Val;
- if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
+ if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val)
IsOffloadEntry = false;
}
if (CGM.getLangOpts().OMPTargetTriples.empty())
IsOffloadEntry = false;
- assert(CurFuncDecl && "No parent declaration for target region!");
+ assert(CGF.CurFuncDecl && "No parent declaration for target region!");
StringRef ParentName;
// In case we have Ctors/Dtors we use the complete type variant to produce
// the mangling of the device outlined kernel.
- if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl))
+ if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete));
- else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl))
+ else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl))
ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete));
else
ParentName =
- CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
+ CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl)));
- std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction(
- CGM, S, ParentName, IsOffloadEntry);
- OMPLexicalScope Scope(*this, S);
- CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
+ IsOffloadEntry, CodeGen);
+ OMPLexicalScope Scope(CGF, S);
+ llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
+ CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device,
CapturedVars);
}
+static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
+ PrePostActionTy &Action) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+
+ Action.Enter(CGF);
+ CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+}
+
+void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
+ StringRef ParentName,
+ const OMPTargetDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind,
const RegionCodeGenTy &CodeGen) {
- auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
- auto OutlinedFn = CGF.CGM.getOpenMPRuntime().
- emitParallelOrTeamsOutlinedFunction(S,
- *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
+ const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
+ auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
- const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
- const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
- const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
+ const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
+ const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
@@ -3498,7 +3654,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
S.getLocStart());
}
- OMPLexicalScope Scope(CGF, S);
+ OMPTeamsScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
@@ -3511,10 +3667,47 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
OMPPrivateScope PrivateScope(CGF);
(void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
(void)PrivateScope.Privatize();
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
};
emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
+ emitPostUpdateForReductionClause(
+ *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+}
+
+static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
+ const OMPTargetTeamsDirective &S) {
+ auto *CS = S.getCapturedStmt(OMPD_teams);
+ Action.Enter(CGF);
+ auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ // TODO: Add support for clauses.
+ CGF.EmitStmt(CS->getCapturedStmt());
+ };
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDirective(
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitOMPCancellationPointDirective(
@@ -3740,9 +3933,48 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective(
CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
}
+static void emitTargetParallelRegion(CodeGenFunction &CGF,
+ const OMPTargetParallelDirective &S,
+ PrePostActionTy &Action) {
+ // Get the captured statement associated with the 'parallel' region.
+ auto *CS = S.getCapturedStmt(OMPD_parallel);
+ Action.Enter(CGF);
+ auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPPrivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
+ (void)PrivateScope.Privatize();
+ // TODO: Add support for clauses.
+ CGF.EmitStmt(CS->getCapturedStmt());
+ CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
+ };
+ emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
+ emitPostUpdateForReductionClause(
+ CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
+}
+
+void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetParallelDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetParallelRegion(CGF, S, Action);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
void CodeGenFunction::EmitOMPTargetParallelDirective(
const OMPTargetParallelDirective &S) {
- // TODO: codegen for target parallel.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetParallelRegion(CGF, S, Action);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
}
void CodeGenFunction::EmitOMPTargetParallelForDirective(
@@ -3884,7 +4116,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
CodeGen);
};
- EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ if (Data.Nogroup)
+ EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ else {
+ CGM.getOpenMPRuntime().emitTaskgroupRegion(
+ *this,
+ [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ },
+ S.getLocStart());
+ }
}
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
OpenPOWER on IntegriCloud