summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp1305
1 files changed, 1132 insertions, 173 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 4025217..d488bd4 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -15,7 +15,7 @@
#include "CGCleanup.h"
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
-#include "ConstantBuilder.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "clang/AST/Decl.h"
#include "clang/AST/StmtOpenMP.h"
#include "llvm/ADT/ArrayRef.h"
@@ -643,6 +643,12 @@ enum OpenMPRTLFunction {
// Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
// *vec);
OMPRTL__kmpc_doacross_wait,
+ // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ OMPRTL__kmpc_task_reduction_init,
+ // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ OMPRTL__kmpc_task_reduction_get_th_data,
//
// Offloading related calls
@@ -697,6 +703,414 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
}
}
+/// Check if the combiner is a call to UDR combiner and if it is so return the
+/// UDR decl used for reduction.
+static const OMPDeclareReductionDecl *
+getReductionInit(const Expr *ReductionOp) {
+ if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
+ if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
+ if (auto *DRE =
+ dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
+ if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
+ return DRD;
+ return nullptr;
+}
+
+static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
+ const OMPDeclareReductionDecl *DRD,
+ const Expr *InitOp,
+ Address Private, Address Original,
+ QualType Ty) {
+ if (DRD->getInitializer()) {
+ std::pair<llvm::Function *, llvm::Function *> Reduction =
+ CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
+ auto *CE = cast<CallExpr>(InitOp);
+ auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
+ const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+ const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+ auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
+ auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
+ [=]() -> Address { return Private; });
+ PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
+ [=]() -> Address { return Original; });
+ (void)PrivateScope.Privatize();
+ RValue Func = RValue::get(Reduction.second);
+ CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
+ CGF.EmitIgnoredExpr(InitOp);
+ } else {
+ llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
+ auto *GV = new llvm::GlobalVariable(
+ CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage, Init, ".init");
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
+ RValue InitRVal;
+ switch (CGF.getEvaluationKind(Ty)) {
+ case TEK_Scalar:
+ InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
+ break;
+ case TEK_Complex:
+ InitRVal =
+ RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
+ break;
+ case TEK_Aggregate:
+ InitRVal = RValue::getAggregate(LV.getAddress());
+ break;
+ }
+ OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
+ CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
+ /*IsInitializer=*/false);
+ }
+}
+
+/// \brief Emit initialization of arrays of complex types.
+/// \param DestAddr Address of the array.
+/// \param Type Type of array.
+/// \param Init Initial expression of array.
+/// \param SrcAddr Address of the original array.
+static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
+ QualType Type, const Expr *Init,
+ const OMPDeclareReductionDecl *DRD,
+ Address SrcAddr = Address::invalid()) {
+ // Perform element-by-element initialization.
+ QualType ElementTy;
+
+ // Drill down to the base element type on both arrays.
+ auto ArrayTy = Type->getAsArrayTypeUnsafe();
+ auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
+ DestAddr =
+ CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
+ if (DRD)
+ SrcAddr =
+ CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
+
+ llvm::Value *SrcBegin = nullptr;
+ if (DRD)
+ SrcBegin = SrcAddr.getPointer();
+ auto DestBegin = DestAddr.getPointer();
+ // Cast from pointer to array type to pointer to single element.
+ auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
+ // The basic structure here is a while-do loop.
+ auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
+ auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
+ auto IsEmpty =
+ CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
+ CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+
+ // Enter the loop body, making that address the current address.
+ auto EntryBB = CGF.Builder.GetInsertBlock();
+ CGF.EmitBlock(BodyBB);
+
+ CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
+
+ llvm::PHINode *SrcElementPHI = nullptr;
+ Address SrcElementCurrent = Address::invalid();
+ if (DRD) {
+ SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
+ "omp.arraycpy.srcElementPast");
+ SrcElementPHI->addIncoming(SrcBegin, EntryBB);
+ SrcElementCurrent =
+ Address(SrcElementPHI,
+ SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+ }
+ llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
+ DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
+ DestElementPHI->addIncoming(DestBegin, EntryBB);
+ Address DestElementCurrent =
+ Address(DestElementPHI,
+ DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+
+ // Emit copy.
+ {
+ CodeGenFunction::RunCleanupsScope InitScope(CGF);
+ if (DRD && (DRD->getInitializer() || !Init)) {
+ emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
+ SrcElementCurrent, ElementTy);
+ } else
+ CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
+ /*IsInitializer=*/false);
+ }
+
+ if (DRD) {
+ // Shift the address forward by one element.
+ auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
+ SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+ SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
+ }
+
+ // Shift the address forward by one element.
+ auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
+ DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+ // Check whether we've reached the end.
+ auto Done =
+ CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
+ CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
+ DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
+
+ // Done.
+ CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
+LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
+ if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
+ return CGF.EmitOMPArraySectionExpr(OASE);
+ if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
+ return CGF.EmitLValue(ASE);
+ auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+ CGF.CapturedStmtInfo &&
+ CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
+ E->getType(), VK_LValue, E->getExprLoc());
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ return CGF.EmitLValue(&DRE);
+}
+
+LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
+ const Expr *E) {
+ if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
+ return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
+ return LValue();
+}
+
+void ReductionCodeGen::emitAggregateInitialization(
+ CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
+ const OMPDeclareReductionDecl *DRD) {
+ // Emit VarDecl with copy init for arrays.
+ // Get the address of the original variable captured in current
+ // captured region.
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
+ DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(),
+ DRD, SharedLVal.getAddress());
+}
+
+ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> ReductionOps) {
+ ClausesData.reserve(Shareds.size());
+ SharedAddresses.reserve(Shareds.size());
+ Sizes.reserve(Shareds.size());
+ BaseDecls.reserve(Shareds.size());
+ auto IPriv = Privates.begin();
+ auto IRed = ReductionOps.begin();
+ for (const auto *Ref : Shareds) {
+ ClausesData.emplace_back(Ref, *IPriv, *IRed);
+ std::advance(IPriv, 1);
+ std::advance(IRed, 1);
+ }
+}
+
+void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
+ assert(SharedAddresses.size() == N &&
+ "Number of generated lvalues must be exactly N.");
+ SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
+ emitSharedLValueUB(CGF, ClausesData[N].Ref));
+}
+
+void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
+ if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+ Sizes.emplace_back(
+ CGF.getTypeSize(
+ SharedAddresses[N].first.getType().getNonReferenceType()),
+ nullptr);
+ return;
+ }
+ llvm::Value *Size;
+ llvm::Value *SizeInChars;
+ llvm::Type *ElemType =
+ cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
+ ->getElementType();
+ auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
+ if (AsArraySection) {
+ Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
+ SharedAddresses[N].first.getPointer());
+ Size = CGF.Builder.CreateNUWAdd(
+ Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
+ SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
+ } else {
+ SizeInChars = CGF.getTypeSize(
+ SharedAddresses[N].first.getType().getNonReferenceType());
+ Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
+ }
+ Sizes.emplace_back(SizeInChars, Size);
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(
+ CGF,
+ cast<OpaqueValueExpr>(
+ CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
+ RValue::get(Size));
+ CGF.EmitVariablyModifiedType(PrivateType);
+}
+
+void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
+ llvm::Value *Size) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
+ if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
+ assert(!Size && !Sizes[N].second &&
+ "Size should be nullptr for non-variably modified redution "
+ "items.");
+ return;
+ }
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(
+ CGF,
+ cast<OpaqueValueExpr>(
+ CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
+ RValue::get(Size));
+ CGF.EmitVariablyModifiedType(PrivateType);
+}
+
+void ReductionCodeGen::emitInitialization(
+ CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
+ llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
+ assert(SharedAddresses.size() > N && "No variable was generated");
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+ QualType PrivateType = PrivateVD->getType();
+ PrivateAddr = CGF.Builder.CreateElementBitCast(
+ PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
+ QualType SharedType = SharedAddresses[N].first.getType();
+ SharedLVal = CGF.MakeAddrLValue(
+ CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
+ CGF.ConvertTypeForMem(SharedType)),
+ SharedType, SharedAddresses[N].first.getBaseInfo());
+ if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
+ CGF.getContext().getAsArrayType(PrivateVD->getType())) {
+ emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
+ } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+ emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
+ PrivateAddr, SharedLVal.getAddress(),
+ SharedLVal.getType());
+ } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
+ !CGF.isTrivialInitializer(PrivateVD->getInit())) {
+ CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
+ PrivateVD->getType().getQualifiers(),
+ /*IsInitializer=*/false);
+ }
+}
+
+bool ReductionCodeGen::needCleanups(unsigned N) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
+ return DTorKind != QualType::DK_none;
+}
+
+void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
+ Address PrivateAddr) {
+ auto *PrivateVD =
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
+ QualType PrivateType = PrivateVD->getType();
+ QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
+ if (needCleanups(N)) {
+ PrivateAddr = CGF.Builder.CreateElementBitCast(
+ PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
+ CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
+ }
+}
+
+static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+ LValue BaseLV) {
+ BaseTy = BaseTy.getNonReferenceType();
+ while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+ !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+ if (auto *PtrTy = BaseTy->getAs<PointerType>())
+ BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
+ else {
+ BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
+ BaseTy->castAs<ReferenceType>());
+ }
+ BaseTy = BaseTy->getPointeeType();
+ }
+ return CGF.MakeAddrLValue(
+ CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
+ CGF.ConvertTypeForMem(ElTy)),
+ BaseLV.getType(), BaseLV.getBaseInfo());
+}
+
+static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
+ llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
+ llvm::Value *Addr) {
+ Address Tmp = Address::invalid();
+ Address TopTmp = Address::invalid();
+ Address MostTopTmp = Address::invalid();
+ BaseTy = BaseTy.getNonReferenceType();
+ while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
+ !CGF.getContext().hasSameType(BaseTy, ElTy)) {
+ Tmp = CGF.CreateMemTemp(BaseTy);
+ if (TopTmp.isValid())
+ CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
+ else
+ MostTopTmp = Tmp;
+ TopTmp = Tmp;
+ BaseTy = BaseTy->getPointeeType();
+ }
+ llvm::Type *Ty = BaseLVType;
+ if (Tmp.isValid())
+ Ty = Tmp.getElementType();
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
+ if (Tmp.isValid()) {
+ CGF.Builder.CreateStore(Addr, Tmp);
+ return MostTopTmp;
+ }
+ return Address(Addr, BaseLVAlignment);
+}
+
+Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
+ Address PrivateAddr) {
+ const DeclRefExpr *DE;
+ const VarDecl *OrigVD = nullptr;
+ if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
+ auto *Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+ Base = TempOASE->getBase()->IgnoreParenImpCasts();
+ while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ DE = cast<DeclRefExpr>(Base);
+ OrigVD = cast<VarDecl>(DE->getDecl());
+ } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
+ auto *Base = ASE->getBase()->IgnoreParenImpCasts();
+ while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = TempASE->getBase()->IgnoreParenImpCasts();
+ DE = cast<DeclRefExpr>(Base);
+ OrigVD = cast<VarDecl>(DE->getDecl());
+ }
+ if (OrigVD) {
+ BaseDecls.emplace_back(OrigVD);
+ auto OriginalBaseLValue = CGF.EmitLValue(DE);
+ LValue BaseLValue =
+ loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
+ OriginalBaseLValue);
+ llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
+ BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
+ llvm::Value *Ptr =
+ CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
+ return castToBase(CGF, OrigVD->getType(),
+ SharedAddresses[N].first.getType(),
+ OriginalBaseLValue.getPointer()->getType(),
+ OriginalBaseLValue.getAlignment(), Ptr);
+ }
+ BaseDecls.emplace_back(
+ cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
+ return PrivateAddr;
+}
+
+bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
+ auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
+ return DRD && DRD->getInitializer();
+}
+
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
@@ -720,7 +1134,7 @@ LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
CodeGenFunction &CGF) {
return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
getThreadIDVariable()->getType(),
- AlignmentSource::Decl);
+ LValueBaseInfo(AlignmentSource::Decl, false));
}
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
@@ -728,7 +1142,7 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
IdentTy = llvm::StructType::create(
"ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
- CGM.Int8PtrTy /* psource */, nullptr);
+ CGM.Int8PtrTy /* psource */);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
loadOffloadInfoMetadata();
@@ -747,9 +1161,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
QualType PtrTy = C.getPointerType(Ty).withRestrict();
FunctionArgList Args;
ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
- /*Id=*/nullptr, PtrTy);
+ /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
- /*Id=*/nullptr, PtrTy);
+ /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
Args.push_back(&OmpOutParm);
Args.push_back(&OmpInParm);
auto &FnInfo =
@@ -760,6 +1174,7 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
Fn->removeFnAttr(llvm::Attribute::NoInline);
+ Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
Fn->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
// Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
@@ -842,12 +1257,12 @@ static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
}
-llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
- const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
- OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+static llvm::Value *emitParallelOrTeamsOutlinedFunction(
+ CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
+ const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
+ const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
assert(ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 *");
- const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
bool HasCancel = false;
if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
@@ -857,11 +1272,27 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
HasCancel = OPFD->hasCancel();
CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
- HasCancel, getOutlinedHelperName());
+ HasCancel, OutlinedHelperName);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
}
+llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
+ return emitParallelOrTeamsOutlinedFunction(
+ CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
+}
+
+llvm::Value *CGOpenMPRuntime::emitTeamsOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
+ return emitParallelOrTeamsOutlinedFunction(
+ CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
+}
+
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
const VarDecl *PartIDVar, const VarDecl *TaskTVar,
@@ -1537,6 +1968,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
break;
}
+ case OMPRTL__kmpc_task_reduction_init: {
+ // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
+ break;
+ }
+ case OMPRTL__kmpc_task_reduction_get_th_data: {
+ // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
@@ -1791,8 +2242,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
// threadprivate copy of the variable VD
CodeGenFunction CtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
- /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+ ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1822,8 +2273,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
// of the variable VD
CodeGenFunction DtorCGF(CGM);
FunctionArgList Args;
- ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(),
- /*Id=*/nullptr, CGM.getContext().VoidPtrTy);
+ ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
+ ImplicitParamDecl::Other);
Args.push_back(&Dst);
auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
@@ -1887,6 +2338,27 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
return nullptr;
}
+Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
+ QualType VarType,
+ StringRef Name) {
+ llvm::Twine VarName(Name, ".artificial.");
+ llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
+ llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, SourceLocation()),
+ getThreadID(CGF, SourceLocation()),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
+ CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
+ /*IsSigned=*/false),
+ getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
+ return Address(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+ VarLVType->getPointerTo(/*AddrSpace=*/0)),
+ CGM.getPointerAlign());
+}
+
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
/// function. Here is the logic:
/// if (Cond) {
@@ -2174,10 +2646,8 @@ static llvm::Value *emitCopyprivateCopyFunction(
auto &C = CGM.getContext();
// void copy_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
- ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
+ ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
@@ -2450,16 +2920,14 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
return Schedule | Modifier;
}
-void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
- SourceLocation Loc,
- const OpenMPScheduleTy &ScheduleKind,
- unsigned IVSize, bool IVSigned,
- bool Ordered, llvm::Value *UB,
- llvm::Value *Chunk) {
+void CGOpenMPRuntime::emitForDispatchInit(
+ CodeGenFunction &CGF, SourceLocation Loc,
+ const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
+ bool Ordered, const DispatchRTInput &DispatchValues) {
if (!CGF.HaveInsertPoint())
return;
- OpenMPSchedType Schedule =
- getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
+ OpenMPSchedType Schedule = getRuntimeSchedule(
+ ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
assert(Ordered ||
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
@@ -2470,14 +2938,14 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
// kmp_int[32|64] stride, kmp_int[32|64] chunk);
// If the Chunk was not specified in the clause - use default value 1.
- if (Chunk == nullptr)
- Chunk = CGF.Builder.getIntN(IVSize, 1);
+ llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
+ : CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.getInt32(addMonoNonMonoModifier(
Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
- CGF.Builder.getIntN(IVSize, 0), // Lower
- UB, // Upper
+ DispatchValues.LB, // Lower
+ DispatchValues.UB, // Upper
CGF.Builder.getIntN(IVSize, 1), // Stride
Chunk // Chunk
};
@@ -2686,6 +3154,8 @@ enum KmpTaskTFields {
KmpTaskTStride,
/// (Taskloops only) Is last iteration flag.
KmpTaskTLastIter,
+ /// (Taskloops only) Reduction data.
+ KmpTaskTReductions,
};
} // anonymous namespace
@@ -2770,8 +3240,7 @@ createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name,
const RegionCodeGenTy &Codegen) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl DummyPtr(C, /*DC=*/nullptr, SourceLocation(),
- /*Id=*/nullptr, C.VoidPtrTy);
+ ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
Args.push_back(&DummyPtr);
CodeGenFunction CGF(CGM);
@@ -2874,7 +3343,7 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
// descriptor, so we can reuse the logic that emits Ctors and Dtors.
auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
- IdentInfo, C.CharTy);
+ IdentInfo, C.CharTy, ImplicitParamDecl::Other);
auto *UnRegFn = createOffloadingBinaryDescriptorFunction(
CGM, ".omp_offloading.descriptor_unreg",
@@ -2889,6 +3358,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() {
Desc);
CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
});
+ if (CGM.supportsCOMDAT()) {
+ // It is sufficient to call registration function only once, so create a
+ // COMDAT group for registration/unregistration functions and associated
+ // data. That would reduce startup time and code size. Registration
+ // function serves as a COMDAT group key.
+ auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
+ RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
+ RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
+ RegFn->setComdat(ComdatKey);
+ UnRegFn->setComdat(ComdatKey);
+ DeviceImages->setComdat(ComdatKey);
+ Desc->setComdat(ComdatKey);
+ }
return RegFn;
}
@@ -2958,7 +3440,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
// Create the offloading info metadata node.
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
- // Auxiliar methods to create metadata values and strings.
+ // Auxiliary methods to create metadata values and strings.
auto getMDInt = [&](unsigned v) {
return llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
@@ -3225,6 +3707,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
// kmp_uint64 ub;
// kmp_int64 st;
// kmp_int32 liter;
+ // void * reductions;
// };
auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
UD->startDefinition();
@@ -3248,6 +3731,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
addFieldToRecordDecl(C, RD, KmpUInt64Ty);
addFieldToRecordDecl(C, RD, KmpInt64Ty);
addFieldToRecordDecl(C, RD, KmpInt32Ty);
+ addFieldToRecordDecl(C, RD, C.VoidPtrTy);
}
RD->completeDefinition();
return RD;
@@ -3278,7 +3762,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
/// For taskloops:
/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
-/// tt->shareds);
+/// tt->reductions, tt->shareds);
/// return 0;
/// }
/// \endcode
@@ -3291,10 +3775,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
llvm::Value *TaskPrivatesMap) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
- ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr,
- KmpTaskTWithPrivatesPtrQTy.withRestrict());
+ ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy.withRestrict(),
+ ImplicitParamDecl::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
auto &TaskEntryFnInfo =
@@ -3363,10 +3848,14 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
+ auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
+ auto RLVal = CGF.EmitLValueForField(Base, *RFI);
+ auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
CallArgs.push_back(LBParam);
CallArgs.push_back(UBParam);
CallArgs.push_back(StParam);
CallArgs.push_back(LIParam);
+ CallArgs.push_back(RParam);
}
CallArgs.push_back(SharedsParam);
@@ -3385,10 +3874,11 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
QualType KmpTaskTWithPrivatesQTy) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
- ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr,
- KmpTaskTWithPrivatesPtrQTy.withRestrict());
+ ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy.withRestrict(),
+ ImplicitParamDecl::Other);
Args.push_back(&GtidArg);
Args.push_back(&TaskTypeArg);
FunctionType::ExtInfo Info;
@@ -3444,36 +3934,40 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
FunctionArgList Args;
ImplicitParamDecl TaskPrivatesArg(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
- C.getPointerType(PrivatesQTy).withConst().withRestrict());
+ C.getPointerType(PrivatesQTy).withConst().withRestrict(),
+ ImplicitParamDecl::Other);
Args.push_back(&TaskPrivatesArg);
llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
unsigned Counter = 1;
for (auto *E: PrivateVars) {
Args.push_back(ImplicitParamDecl::Create(
- C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
- .withConst()
- .withRestrict()));
+ C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.getPointerType(C.getPointerType(E->getType()))
+ .withConst()
+ .withRestrict(),
+ ImplicitParamDecl::Other));
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
}
for (auto *E : FirstprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
- C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
- .withConst()
- .withRestrict()));
+ C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.getPointerType(C.getPointerType(E->getType()))
+ .withConst()
+ .withRestrict(),
+ ImplicitParamDecl::Other));
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
}
for (auto *E: LastprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
- C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType()))
- .withConst()
- .withRestrict()));
+ C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.getPointerType(C.getPointerType(E->getType()))
+ .withConst()
+ .withRestrict(),
+ ImplicitParamDecl::Other));
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
PrivateVarsPos[VD] = Counter;
++Counter;
@@ -3488,6 +3982,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
TaskPrivatesMapFnInfo);
TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
+ TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
CodeGenFunction CGF(CGM);
CGF.disableDebugInfo();
@@ -3551,7 +4046,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
SharedRefLValue = CGF.MakeAddrLValue(
Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
- SharedRefLValue.getType(), AlignmentSource::Decl);
+ SharedRefLValue.getType(),
+ LValueBaseInfo(AlignmentSource::Decl,
+ SharedRefLValue.getBaseInfo().getMayAlias()));
QualType Type = OriginalVD->getType();
if (Type->isArrayType()) {
// Initialize firstprivate array.
@@ -3561,7 +4058,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
SharedRefLValue.getAddress(), Type);
} else {
// Initialize firstprivate array using element-by-element
- // intialization.
+ // initialization.
CGF.EmitOMPAggregateAssign(
PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
[&CGF, Elem, Init, &CapturesInfo](Address DestElement,
@@ -3630,12 +4127,14 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
auto &C = CGM.getContext();
FunctionArgList Args;
- ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
- ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
- ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc,
- /*Id=*/nullptr, C.IntTy);
+ ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ KmpTaskTWithPrivatesPtrQTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+ ImplicitParamDecl::Other);
Args.push_back(&DstArg);
Args.push_back(&SrcArg);
Args.push_back(&LastprivArg);
@@ -3764,9 +4263,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
// Emit initial values for private copies (if any).
llvm::Value *TaskPrivatesMap = nullptr;
auto *TaskPrivatesMapTy =
- std::next(cast<llvm::Function>(TaskFunction)->getArgumentList().begin(),
- 3)
- ->getType();
+ std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
TaskPrivatesMap = emitTaskPrivateMappingFunction(
@@ -4006,8 +4503,8 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
DepTaskArgs[5] = CGF.Builder.getInt32(0);
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
- auto &&ThenCodeGen = [this, Loc, &Data, TDBase, KmpTaskTQTyRD,
- NumDependencies, &TaskArgs,
+ auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
+ &TaskArgs,
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
if (!Data.Tied) {
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
@@ -4121,11 +4618,27 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
/*IsInitializer=*/true);
+ // Store reductions address.
+ LValue RedLVal = CGF.EmitLValueForField(
+ Result.TDBase,
+ *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
+ if (Data.Reductions)
+ CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
+ else {
+ CGF.EmitNullInitialization(RedLVal.getAddress(),
+ CGF.getContext().VoidPtrTy);
+ }
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
- UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
- UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
- llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
+ UpLoc,
+ ThreadID,
+ Result.NewTask,
+ IfVal,
+ LBLVal.getPointer(),
+ UBLVal.getPointer(),
+ CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+ llvm::ConstantInt::getNullValue(
+ CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -4134,10 +4647,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
/*isSigned=*/false)
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
- Result.TaskDupFn
- ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
- CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
}
@@ -4241,20 +4753,16 @@ static void emitReductionCombiner(CodeGenFunction &CGF,
CGF.EmitIgnoredExpr(ReductionOp);
}
-static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
- llvm::Type *ArgsType,
- ArrayRef<const Expr *> Privates,
- ArrayRef<const Expr *> LHSExprs,
- ArrayRef<const Expr *> RHSExprs,
- ArrayRef<const Expr *> ReductionOps) {
+llvm::Value *CGOpenMPRuntime::emitReductionFunction(
+ CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps) {
auto &C = CGM.getContext();
// void reduction_func(void *LHSArg, void *RHSArg);
FunctionArgList Args;
- ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
- ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
- C.VoidPtrTy);
+ ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
Args.push_back(&LHSArg);
Args.push_back(&RHSArg);
auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
@@ -4329,11 +4837,11 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
return Fn;
}
-static void emitSingleReductionCombiner(CodeGenFunction &CGF,
- const Expr *ReductionOp,
- const Expr *PrivateRef,
- const DeclRefExpr *LHS,
- const DeclRefExpr *RHS) {
+void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
+ const Expr *ReductionOp,
+ const Expr *PrivateRef,
+ const DeclRefExpr *LHS,
+ const DeclRefExpr *RHS) {
if (PrivateRef->getType()->isArrayType()) {
// Emit reduction for array section.
auto *LHSVar = cast<VarDecl>(LHS->getDecl());
@@ -4353,9 +4861,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
ArrayRef<const Expr *> LHSExprs,
ArrayRef<const Expr *> RHSExprs,
ArrayRef<const Expr *> ReductionOps,
- bool WithNowait, bool SimpleReduction) {
+ ReductionOptionsTy Options) {
if (!CGF.HaveInsertPoint())
return;
+
+ bool WithNowait = Options.WithNowait;
+ bool SimpleReduction = Options.SimpleReduction;
+
// Next code should be emitted for reduction:
//
// static kmp_critical_name lock = { 0 };
@@ -4497,12 +5009,13 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
};
auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
CodeGenFunction &CGF, PrePostActionTy &Action) {
+ auto &RT = CGF.CGM.getOpenMPRuntime();
auto IPriv = Privates.begin();
auto ILHS = LHSExprs.begin();
auto IRHS = RHSExprs.begin();
for (auto *E : ReductionOps) {
- emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
- cast<DeclRefExpr>(*IRHS));
+ RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+ cast<DeclRefExpr>(*IRHS));
++IPriv;
++ILHS;
++IRHS;
@@ -4562,7 +5075,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
}
if (XExpr) {
auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- auto &&AtomicRedGen = [BO, VD, IPriv,
+ auto &&AtomicRedGen = [BO, VD,
Loc](CodeGenFunction &CGF, const Expr *XExpr,
const Expr *EExpr, const Expr *UpExpr) {
LValue X = CGF.EmitLValue(XExpr);
@@ -4572,7 +5085,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitOMPAtomicSimpleUpdateExpr(
X, E, BO, /*IsXLHSInRHSPart=*/true,
llvm::AtomicOrdering::Monotonic, Loc,
- [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) {
+ [&CGF, UpExpr, VD, Loc](RValue XRValue) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(
VD, [&CGF, VD, XRValue, Loc]() -> Address {
@@ -4640,6 +5153,353 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
}
+/// Generates unique name for artificial threadprivate variables.
+/// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
+static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
+ unsigned N) {
+ SmallString<256> Buffer;
+ llvm::raw_svector_ostream Out(Buffer);
+ Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
+ return Out.str();
+}
+
+/// Emits reduction initializer function:
+/// \code
+/// void @.red_init(void* %arg) {
+/// %0 = bitcast void* %arg to <type>*
+/// store <type> <init>, <type>* %0
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N) {
+ auto &C = CGM.getContext();
+ FunctionArgList Args;
+ ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&Param);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_init.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ Address PrivateAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&Param),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ LValue SharedLVal;
+ // If initializer uses initializer from declare reduction construct, emit a
+ // pointer to the address of the original reduction item (reuired by reduction
+ // initializer)
+ if (RCG.usesReductionInitializer(N)) {
+ Address SharedAddr =
+ CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().VoidPtrTy,
+ generateUniqueName("reduction", Loc, N));
+ SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
+ } else {
+ SharedLVal = CGF.MakeNaturalAlignAddrLValue(
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ CGM.getContext().VoidPtrTy);
+ }
+ // Emit the initializer:
+ // %0 = bitcast void* %arg to <type>*
+ // store <type> <init>, <type>* %0
+ RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
+ [](CodeGenFunction &) { return false; });
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emits reduction combiner function:
+/// \code
+/// void @.red_comb(void* %arg0, void* %arg1) {
+/// %lhs = bitcast void* %arg0 to <type>*
+/// %rhs = bitcast void* %arg1 to <type>*
+/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
+/// store <type> %2, <type>* %lhs
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N,
+ const Expr *ReductionOp,
+ const Expr *LHS, const Expr *RHS,
+ const Expr *PrivateRef) {
+ auto &C = CGM.getContext();
+ auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
+ auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
+ FunctionArgList Args;
+ ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&ParamInOut);
+ Args.emplace_back(&ParamIn);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_comb.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ // Remap lhs and rhs variables to the addresses of the function arguments.
+ // %lhs = bitcast void* %arg0 to <type>*
+ // %rhs = bitcast void* %arg1 to <type>*
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
+ // Pull out the pointer to the variable.
+ Address PtrAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&ParamInOut),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ return CGF.Builder.CreateElementBitCast(
+ PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
+ });
+ PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
+ // Pull out the pointer to the variable.
+ Address PtrAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&ParamIn),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ return CGF.Builder.CreateElementBitCast(
+ PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
+ });
+ PrivateScope.Privatize();
+ // Emit the combiner body:
+ // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
+ // store <type> %2, <type>* %lhs
+ CGM.getOpenMPRuntime().emitSingleReductionCombiner(
+ CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
+ cast<DeclRefExpr>(RHS));
+ CGF.FinishFunction();
+ return Fn;
+}
+
+/// Emits reduction finalizer function:
+/// \code
+/// void @.red_fini(void* %arg) {
+/// %0 = bitcast void* %arg to <type>*
+/// <destroy>(<type>* %0)
+/// ret void
+/// }
+/// \endcode
+static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG, unsigned N) {
+ if (!RCG.needCleanups(N))
+ return nullptr;
+ auto &C = CGM.getContext();
+ FunctionArgList Args;
+ ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
+ Args.emplace_back(&Param);
+ auto &FnInfo =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+ auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
+ auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
+ ".red_fini.", &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
+ CodeGenFunction CGF(CGM);
+ CGF.disableDebugInfo();
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
+ Address PrivateAddr = CGF.EmitLoadOfPointer(
+ CGF.GetAddrOfLocalVar(&Param),
+ C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ llvm::Value *Size = nullptr;
+ // If the size of the reduction item is non-constant, load it from global
+ // threadprivate variable.
+ if (RCG.getSizes(N).second) {
+ Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ Size =
+ CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
+ CGM.getContext().getSizeType(), SourceLocation());
+ }
+ RCG.emitAggregateType(CGF, N, Size);
+ // Emit the finalizer body:
+ // <destroy>(<type>* %0)
+ RCG.emitCleanups(CGF, N, PrivateAddr);
+ CGF.FinishFunction();
+ return Fn;
+}
+
+llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
+ if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
+ return nullptr;
+
+ // Build typedef struct:
+ // kmp_task_red_input {
+ // void *reduce_shar; // shared reduction item
+ // size_t reduce_size; // size of data item
+ // void *reduce_init; // data initialization routine
+ // void *reduce_fini; // data finalization routine
+ // void *reduce_comb; // data combiner routine
+ // kmp_task_red_flags_t flags; // flags for additional info from compiler
+ // } kmp_task_red_input_t;
+ ASTContext &C = CGM.getContext();
+ auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
+ RD->startDefinition();
+ const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
+ const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+ const FieldDecl *FlagsFD = addFieldToRecordDecl(
+ C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
+ RD->completeDefinition();
+ QualType RDType = C.getRecordType(RD);
+ unsigned Size = Data.ReductionVars.size();
+ llvm::APInt ArraySize(/*numBits=*/64, Size);
+ QualType ArrayRDType = C.getConstantArrayType(
+ RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ // kmp_task_red_input_t .rd_input.[Size];
+ Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
+ ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
+ Data.ReductionOps);
+ for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
+ // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
+ llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
+ llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
+ llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
+ TaskRedInput.getPointer(), Idxs,
+ /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
+ ".rd_input.gep.");
+ LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
+ // ElemLVal.reduce_shar = &Shareds[Cnt];
+ LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
+ RCG.emitSharedLValue(CGF, Cnt);
+ llvm::Value *CastedShared =
+ CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
+ CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
+ RCG.emitAggregateType(CGF, Cnt);
+ llvm::Value *SizeValInChars;
+ llvm::Value *SizeVal;
+ std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
+ // We use delayed creation/initialization for VLAs, array sections and
+ // custom reduction initializations. It is required because runtime does not
+ // provide the way to pass the sizes of VLAs/array sections to
+ // initializer/combiner/finalizer functions and does not pass the pointer to
+ // original reduction item to the initializer. Instead threadprivate global
+ // variables are used to store these values and use them in the functions.
+ bool DelayedCreation = !!SizeVal;
+ SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
+ /*isSigned=*/false);
+ LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
+ CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
+ // ElemLVal.reduce_init = init;
+ LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
+ llvm::Value *InitAddr =
+ CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
+ CGF.EmitStoreOfScalar(InitAddr, InitLVal);
+ DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
+ // ElemLVal.reduce_fini = fini;
+ LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
+ llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
+ llvm::Value *FiniAddr = Fini
+ ? CGF.EmitCastToVoidPtr(Fini)
+ : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+ CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
+ // ElemLVal.reduce_comb = comb;
+ LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
+ llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
+ CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
+ RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
+ CGF.EmitStoreOfScalar(CombAddr, CombLVal);
+ // ElemLVal.flags = 0;
+ LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
+ if (DelayedCreation) {
+ CGF.EmitStoreOfScalar(
+ llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
+ FlagsLVal);
+ } else
+ CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
+ }
+ // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
+ // *data);
+ llvm::Value *Args[] = {
+ CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+ /*isSigned=*/true),
+ llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
+ CGM.VoidPtrTy)};
+ return CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
+}
+
+void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ ReductionCodeGen &RCG,
+ unsigned N) {
+ auto Sizes = RCG.getSizes(N);
+ // Emit threadprivate global variable if the type is non-constant
+ // (Sizes.second = nullptr).
+ if (Sizes.second) {
+ llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
+ /*isSigned=*/false);
+ Address SizeAddr = getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().getSizeType(),
+ generateUniqueName("reduction_size", Loc, N));
+ CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
+ }
+ // Store address of the original reduction item if custom initializer is used.
+ if (RCG.usesReductionInitializer(N)) {
+ Address SharedAddr = getAddrOfArtificialThreadPrivate(
+ CGF, CGM.getContext().VoidPtrTy,
+ generateUniqueName("reduction", Loc, N));
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
+ SharedAddr, /*IsVolatile=*/false);
+ }
+}
+
+Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ llvm::Value *ReductionsPtr,
+ LValue SharedLVal) {
+ // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
+ // *d);
+ llvm::Value *Args[] = {
+ CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
+ /*isSigned=*/true),
+ ReductionsPtr,
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(),
+ CGM.VoidPtrTy)};
+ return Address(
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args),
+ SharedLVal.getAlignment());
+}
+
void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
@@ -4874,25 +5734,45 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
return Body;
}
-/// \brief Emit the num_teams clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no num_teams clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of teams for a target directive. Inspect the num_teams
+/// clause associated with a teams construct combined or closely nested
+/// with the target directive.
+///
+/// Emit a team of size one for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
static llvm::Value *
-emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
- const OMPExecutableDirective &D) {
+emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any num_teams clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
- "Not expecting clause in directive.");
+ auto &Bld = CGF.Builder;
+
+ // If the target directive is combined with a teams directive:
+ // Return the value in the num_teams clause, if any.
+ // Otherwise, return 0 to denote the runtime default.
+ if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
+ if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
+ CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
+ auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
+ /*IgnoreResultAssign*/ true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+
+ // The default value is 0.
+ return Bld.getInt32(0);
+ }
+
+ // If the target directive is combined with a parallel directive but not a
+ // teams directive, start one team.
+ if (isOpenMPParallelDirective(D.getDirectiveKind()))
+ return Bld.getInt32(1);
// If the current target region has a teams region enclosed, we need to get
// the number of teams to pass to the runtime function call. This is done
@@ -4910,38 +5790,92 @@ emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
- return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
- /*IsSigned=*/true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
}
// If we have an enclosed teams directive but no num_teams clause we use
// the default value 0.
- return CGF.Builder.getInt32(0);
+ return Bld.getInt32(0);
}
// No teams associated with the directive.
return nullptr;
}
-/// \brief Emit the thread_limit clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no thread_limit clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of threads for a target directive. Inspect the
+/// thread_limit clause associated with a teams construct combined or closely
+/// nested with the target directive.
+///
+/// Emit the num_threads clause for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
static llvm::Value *
-emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
- CodeGenFunction &CGF,
- const OMPExecutableDirective &D) {
+emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+ CodeGenFunction &CGF,
+ const OMPExecutableDirective &D) {
assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
"teams directive expected to be "
"emitted only for the host!");
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any thread_limit clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
- "Not expecting clause in directive.");
+ auto &Bld = CGF.Builder;
+
+ //
+ // If the target directive is combined with a teams directive:
+ // Return the value in the thread_limit clause, if any.
+ //
+ // If the target directive is combined with a parallel directive:
+ // Return the value in the num_threads clause, if any.
+ //
+ // If both clauses are set, select the minimum of the two.
+ //
+ // If neither teams or parallel combined directives set the number of threads
+ // in a team, return 0 to denote the runtime default.
+ //
+ // If this is not a teams directive return nullptr.
+
+ if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
+ isOpenMPParallelDirective(D.getDirectiveKind())) {
+ llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
+ llvm::Value *NumThreadsVal = nullptr;
+ llvm::Value *ThreadLimitVal = nullptr;
+
+ if (const auto *ThreadLimitClause =
+ D.getSingleClause<OMPThreadLimitClause>()) {
+ CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
+ auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
+ /*IgnoreResultAssign*/ true);
+ ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+
+ if (const auto *NumThreadsClause =
+ D.getSingleClause<OMPNumThreadsClause>()) {
+ CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+ llvm::Value *NumThreads =
+ CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
+ /*IgnoreResultAssign*/ true);
+ NumThreadsVal =
+ Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
+ }
+
+ // Select the lesser of thread_limit and num_threads.
+ if (NumThreadsVal)
+ ThreadLimitVal = ThreadLimitVal
+ ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
+ ThreadLimitVal),
+ NumThreadsVal, ThreadLimitVal)
+ : NumThreadsVal;
+
+ // Set default value passed to the runtime if either teams or a target
+ // parallel type directive is found but no clause is specified.
+ if (!ThreadLimitVal)
+ ThreadLimitVal = DefaultThreadLimitVal;
+
+ return ThreadLimitVal;
+ }
// If the current target region has a teams region enclosed, we need to get
// the thread limit to pass to the runtime function call. This is done
@@ -5494,7 +6428,7 @@ public:
// We have to process the component lists that relate with the same
// declaration in a single chunk so that we can generate the map flags
// correctly. Therefore, we organize all lists in a map.
- llvm::DenseMap<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
+ llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
// Helper function to fill the information map for the different supported
// clauses.
@@ -5818,16 +6752,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
llvm::Value *BPVal = *BasePointers[i];
- if (BPVal->getType()->isPointerTy())
- BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
- else {
- assert(BPVal->getType()->isIntegerTy() &&
- "If not a pointer, the value type must be an integer.");
- BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
- }
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.BasePointersArray, 0, i);
+ BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(BPVal, BPAddr);
@@ -5836,16 +6765,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
llvm::Value *PVal = Pointers[i];
- if (PVal->getType()->isPointerTy())
- PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
- else {
- assert(PVal->getType()->isIntegerTy() &&
- "If not a pointer, the value type must be an integer.");
- PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
- }
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.PointersArray, 0, i);
+ P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
CGF.Builder.CreateStore(PVal, PAddr);
@@ -5984,8 +6908,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
OffloadError);
// Fill up the pointer arrays and transfer execution to the device.
- auto &&ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, Device,
- OutlinedFnID, OffloadError, OffloadErrorQType,
+ auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device,
+ OutlinedFnID, OffloadError,
&D](CodeGenFunction &CGF, PrePostActionTy &) {
auto &RT = CGF.CGM.getOpenMPRuntime();
// Emit the offloading arrays.
@@ -6021,24 +6945,50 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
// Return value of the runtime offloading call.
llvm::Value *Return;
- auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
- auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
+ auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
+ auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
- // If we have NumTeams defined this means that we have an enclosed teams
- // region. Therefore we also expect to have ThreadLimit defined. These two
- // values should be defined in the presence of a teams directive, regardless
- // of having any clauses associated. If the user is using teams but no
- // clauses, these two values will be the default that should be passed to
- // the runtime library - a 32-bit integer with the value zero.
+ // The target region is an outlined function launched by the runtime
+ // via calls __tgt_target() or __tgt_target_teams().
+ //
+ // __tgt_target() launches a target region with one team and one thread,
+ // executing a serial region. This master thread may in turn launch
+ // more threads within its team upon encountering a parallel region,
+ // however, no additional teams can be launched on the device.
+ //
+ // __tgt_target_teams() launches a target region with one or more teams,
+ // each with one or more threads. This call is required for target
+ // constructs such as:
+ // 'target teams'
+ // 'target' / 'teams'
+ // 'target teams distribute parallel for'
+ // 'target parallel'
+ // and so on.
+ //
+ // Note that on the host and CPU targets, the runtime implementation of
+ // these calls simply call the outlined function without forking threads.
+ // The outlined functions themselves have runtime calls to
+ // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
+ // the compiler in emitTeamsCall() and emitParallelCall().
+ //
+ // In contrast, on the NVPTX target, the implementation of
+ // __tgt_target_teams() launches a GPU kernel with the requested number
+ // of teams and threads so no additional calls to the runtime are required.
if (NumTeams) {
- assert(ThreadLimit && "Thread limit expression should be available along "
- "with number of teams.");
+ // If we have NumTeams defined this means that we have an enclosed teams
+ // region. Therefore we also expect to have NumThreads defined. These two
+ // values should be defined in the presence of a teams directive,
+ // regardless of having any clauses associated. If the user is using teams
+ // but no clauses, these two values will be the default that should be
+ // passed to the runtime library - a 32-bit integer with the value zero.
+ assert(NumThreads && "Thread limit expression should be available along "
+ "with number of teams.");
llvm::Value *OffloadingArgs[] = {
DeviceID, OutlinedFnID,
PointerNum, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
Info.MapTypesArray, NumTeams,
- ThreadLimit};
+ NumThreads};
Return = CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
} else {
@@ -6095,17 +7045,18 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (!S)
return;
- // If we find a OMP target directive, codegen the outline function and
- // register the result.
- // FIXME: Add other directives with target when they become supported.
- bool isTargetDirective = isa<OMPTargetDirective>(S);
+ // Codegen OMP target directives that offload compute to the device.
+ bool requiresDeviceCodegen =
+ isa<OMPExecutableDirective>(S) &&
+ isOpenMPTargetExecutionDirective(
+ cast<OMPExecutableDirective>(S)->getDirectiveKind());
- if (isTargetDirective) {
- auto *E = cast<OMPExecutableDirective>(S);
+ if (requiresDeviceCodegen) {
+ auto &E = *cast<OMPExecutableDirective>(S);
unsigned DeviceID;
unsigned FileID;
unsigned Line;
- getTargetEntryUniqueInfo(CGM.getContext(), E->getLocStart(), DeviceID,
+ getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
FileID, Line);
// Is this a target region that should not be emitted as an entry point? If
@@ -6114,13 +7065,22 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
ParentName, Line))
return;
- llvm::Function *Fn;
- llvm::Constant *Addr;
- std::tie(Fn, Addr) =
- CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction(
- CGM, cast<OMPTargetDirective>(*E), ParentName,
- /*isOffloadEntry=*/true);
- assert(Fn && Addr && "Target region emission failed.");
+ switch (S->getStmtClass()) {
+ case Stmt::OMPTargetDirectiveClass:
+ CodeGenFunction::EmitOMPTargetDeviceFunction(
+ CGM, ParentName, cast<OMPTargetDirective>(*S));
+ break;
+ case Stmt::OMPTargetParallelDirectiveClass:
+ CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
+ CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
+ break;
+ case Stmt::OMPTargetTeamsDirectiveClass:
+ CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
+ break;
+ default:
+ llvm_unreachable("Unknown target directive for OpenMP device codegen.");
+ }
return;
}
@@ -6182,7 +7142,7 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
}
}
- // If we are in target mode we do not emit any global (declare target is not
+ // If we are in target mode, we do not emit any global (declare target is not
// implemented yet). Therefore we signal that GD was processed in this case.
return true;
}
@@ -6271,8 +7231,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Generate the code for the opening of the data environment. Capture all the
// arguments of the runtime call by reference because they are used in the
// closing of the region.
- auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
- CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
+ PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
@@ -6318,8 +7278,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
};
// Generate code for the closing of the data region.
- auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
assert(Info.isValid() && "Invalid data environment closing arguments.");
llvm::Value *BasePointersArrayArg = nullptr;
@@ -6397,7 +7356,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
"Expecting either target enter, exit data, or update directives.");
// Generate the code for the opening of the data environment.
- auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
MappableExprsHandler::MapValuesArrayTy Pointers;
OpenPOWER on IntegriCloud