diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp | 1469 |
1 files changed, 856 insertions, 613 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index 39e1cdf..6135cf3 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -26,7 +26,7 @@ using namespace CodeGen; namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. -class OMPLexicalScope final : public CodeGenFunction::LexicalScope { +class OMPLexicalScope : public CodeGenFunction::LexicalScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { for (const auto *C : S.clauses()) { if (auto *CPI = OMPClauseWithPreInit::get(C)) { @@ -54,10 +54,11 @@ class OMPLexicalScope final : public CodeGenFunction::LexicalScope { public: OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S, - bool AsInlined = false) + bool AsInlined = false, bool EmitPreInitStmt = true) : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), InlinedShareds(CGF) { - emitPreInitStmt(CGF, S); + if (EmitPreInitStmt) + emitPreInitStmt(CGF, S); if (AsInlined) { if (S.hasAssociatedStmt()) { auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); @@ -81,6 +82,39 @@ public: } }; +/// Lexical scope for OpenMP parallel construct, that handles correct codegen +/// for captured expressions. +class OMPParallelScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !(isOpenMPTargetExecutionDirective(Kind) || + isOpenMPLoopBoundSharingDirective(Kind)) && + isOpenMPParallelDirective(Kind); + } + +public: + OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + +/// Lexical scope for OpenMP teams construct, that handles correct codegen +/// for captured expressions. +class OMPTeamsScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !isOpenMPTargetExecutionDirective(Kind) && + isOpenMPTeamsDirective(Kind); + } + +public: + OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { @@ -194,21 +228,58 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType, return TmpAddr; } -llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { - assert( - CapturedStmtInfo && - "CapturedStmtInfo should be set when generating the captured function"); - const CapturedDecl *CD = S.getCapturedDecl(); - const RecordDecl *RD = S.getCapturedRecordDecl(); +static QualType getCanonicalParamType(ASTContext &C, QualType T) { + if (T->isLValueReferenceType()) { + return C.getLValueReferenceType( + getCanonicalParamType(C, T.getNonReferenceType()), + /*SpelledAsLValue=*/false); + } + if (T->isPointerType()) + return C.getPointerType(getCanonicalParamType(C, T->getPointeeType())); + return C.getCanonicalParamType(T); +} + +namespace { + /// Contains required data for proper outlined function codegen. + struct FunctionOptions { + /// Captured statement for which the function is generated. + const CapturedStmt *S = nullptr; + /// true if cast to/from UIntPtr is required for variables captured by + /// value. + bool UIntPtrCastRequired = true; + /// true if only casted argumefnts must be registered as local args or VLA + /// sizes. + bool RegisterCastedArgsOnly = false; + /// Name of the generated function. + StringRef FunctionName; + explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, + bool RegisterCastedArgsOnly, + StringRef FunctionName) + : S(S), UIntPtrCastRequired(UIntPtrCastRequired), + RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), + FunctionName(FunctionName) {} + }; +} + +static std::pair<llvm::Function *, bool> emitOutlinedFunctionPrologue( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> + &LocalAddrs, + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> + &VLASizes, + llvm::Value *&CXXThisValue, const FunctionOptions &FO) { + const CapturedDecl *CD = FO.S->getCapturedDecl(); + const RecordDecl *RD = FO.S->getCapturedRecordDecl(); assert(CD->hasBody() && "missing CapturedDecl body"); + CXXThisValue = nullptr; // Build the argument list. + CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); - FunctionArgList Args; + bool HasUIntPtrArgs = false; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); - auto I = S.captures().begin(); + auto I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); IdentifierInfo *II = nullptr; @@ -220,29 +291,26 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { // deal with pointers. We can pass in the same way the VLA type sizes to the // outlined function. if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || - I->capturesVariableArrayType()) - ArgType = Ctx.getUIntPtrType(); + I->capturesVariableArrayType()) { + HasUIntPtrArgs = true; + if (FO.UIntPtrCastRequired) + ArgType = Ctx.getUIntPtrType(); + } if (I->capturesVariable() || I->capturesVariableByCopy()) { CapVar = I->getCapturedVar(); II = CapVar->getIdentifier(); } else if (I->capturesThis()) - II = &getContext().Idents.get("this"); + II = &Ctx.Idents.get("this"); else { assert(I->capturesVariableArrayType()); - II = &getContext().Idents.get("vla"); - } - if (ArgType->isVariablyModifiedType()) { - bool IsReference = ArgType->isLValueReferenceType(); - ArgType = - getContext().getCanonicalParamType(ArgType.getNonReferenceType()); - if (IsReference && !ArgType->isPointerType()) { - ArgType = getContext().getLValueReferenceType( - ArgType, /*SpelledAsLValue=*/false); - } + II = &Ctx.Idents.get("vla"); } - Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, - FD->getLocation(), II, ArgType)); + if (ArgType->isVariablyModifiedType()) + ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); + Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, + FD->getLocation(), II, ArgType, + ImplicitParamDecl::Other)); ++I; } Args.append( @@ -255,89 +323,166 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); - llvm::Function *F = llvm::Function::Create( - FuncLLVMTy, llvm::GlobalValue::InternalLinkage, - CapturedStmtInfo->getHelperName(), &CGM.getModule()); + llvm::Function *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FO.FunctionName, &CGM.getModule()); CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); if (CD->isNothrow()) F->addFnAttr(llvm::Attribute::NoUnwind); // Generate the function. - StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), - CD->getBody()->getLocStart()); + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); - I = S.captures().begin(); + I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - Address LocalAddr = GetAddrOfLocalVar(Args[Cnt]); + Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If the variable is a reference we need to materialize it here. if (CurVD->getType()->isReferenceType()) { - Address RefAddr = CreateMemTemp(CurVD->getType(), getPointerAlign(), - ".materialized_ref"); - EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, /*Volatile=*/false, - CurVD->getType()); + Address RefAddr = CGF.CreateMemTemp( + CurVD->getType(), CGM.getPointerAlign(), ".materialized_ref"); + CGF.EmitStoreOfScalar(LocalAddr.getPointer(), RefAddr, + /*Volatile=*/false, CurVD->getType()); LocalAddr = RefAddr; } - setAddrOfLocalVar(CurVD, LocalAddr); + if (!FO.RegisterCastedArgsOnly) + LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); ++Cnt; ++I; continue; } - LValue ArgLVal = - MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), - AlignmentSource::Decl); + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), + Args[Cnt]->getType(), BaseInfo); if (FD->hasCapturedVLAType()) { - LValue CastedArgLVal = - MakeAddrLValue(castValueFromUintptr(*this, FD->getType(), - Args[Cnt]->getName(), ArgLVal), - FD->getType(), AlignmentSource::Decl); + if (FO.UIntPtrCastRequired) { + ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), + Args[Cnt]->getName(), + ArgLVal), + FD->getType(), BaseInfo); + } auto *ExprArg = - EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal(); + CGF.EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); auto VAT = FD->getCapturedVLAType(); - VLASizeMap[VAT->getSizeExpr()] = ExprArg; + VLASizes.insert({Args[Cnt], {VAT->getSizeExpr(), ExprArg}}); } else if (I->capturesVariable()) { auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); Address ArgAddr = ArgLVal.getAddress(); if (!VarTy->isReferenceType()) { if (ArgLVal.getType()->isLValueReferenceType()) { - ArgAddr = EmitLoadOfReference( + ArgAddr = CGF.EmitLoadOfReference( ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { assert(ArgLVal.getType()->isPointerType()); - ArgAddr = EmitLoadOfPointer( + ArgAddr = CGF.EmitLoadOfPointer( ArgAddr, ArgLVal.getType()->castAs<PointerType>()); } } - setAddrOfLocalVar( - Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var))); + if (!FO.RegisterCastedArgsOnly) { + LocalAddrs.insert( + {Args[Cnt], + {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); + } } else if (I->capturesVariableByCopy()) { assert(!FD->getType()->isAnyPointerType() && "Not expecting a captured pointer."); auto *Var = I->getCapturedVar(); QualType VarTy = Var->getType(); - setAddrOfLocalVar(Var, castValueFromUintptr(*this, FD->getType(), - Args[Cnt]->getName(), ArgLVal, - VarTy->isReferenceType())); + LocalAddrs.insert( + {Args[Cnt], + {Var, + FO.UIntPtrCastRequired + ? castValueFromUintptr(CGF, FD->getType(), Args[Cnt]->getName(), + ArgLVal, VarTy->isReferenceType()) + : ArgLVal.getAddress()}}); } else { // If 'this' is captured, load it into CXXThisValue. assert(I->capturesThis()); - CXXThisValue = - EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); + CXXThisValue = CGF.EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()) + .getScalarVal(); + LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress()}}); } ++Cnt; ++I; } + return {F, HasUIntPtrArgs}; +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + bool NeedWrapperFunction = + getDebugInfo() && + CGM.getCodeGenOpts().getDebugInfo() >= codegenoptions::LimitedDebugInfo; + FunctionArgList Args; + llvm::DenseMap<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; + llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; + FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, + CapturedStmtInfo->getHelperName()); + llvm::Function *F; + bool HasUIntPtrArgs; + std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( + *this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + setAddrOfLocalVar(LocalAddrPair.second.first, + LocalAddrPair.second.second); + } + } + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; PGO.assignRegionCounters(GlobalDecl(CD), F); CapturedStmtInfo->EmitBody(*this, CD->getBody()); FinishFunction(CD->getBodyRBrace()); - - return F; + if (!NeedWrapperFunction || !HasUIntPtrArgs) + return F; + + FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, + /*RegisterCastedArgsOnly=*/true, + ".nondebug_wrapper."); + CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); + WrapperCGF.disableDebugInfo(); + Args.clear(); + LocalAddrs.clear(); + VLASizes.clear(); + llvm::Function *WrapperF = + emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, + WrapperCGF.CXXThisValue, WrapperFO).first; + LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); + llvm::SmallVector<llvm::Value *, 4> CallArgs; + for (const auto *Arg : Args) { + llvm::Value *CallArg; + auto I = LocalAddrs.find(Arg); + if (I != LocalAddrs.end()) { + LValue LV = + WrapperCGF.MakeAddrLValue(I->second.second, Arg->getType(), BaseInfo); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + } else { + auto EI = VLASizes.find(Arg); + if (EI != VLASizes.end()) + CallArg = EI->second.second; + else { + LValue LV = WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(Arg), + Arg->getType(), BaseInfo); + CallArg = WrapperCGF.EmitLoadOfScalar(LV, SourceLocation()); + } + } + CallArgs.emplace_back(CallArg); + } + WrapperCGF.Builder.CreateCall(F, CallArgs); + WrapperCGF.FinishFunction(); + return WrapperF; } //===----------------------------------------------------------------------===// @@ -404,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } -/// Check if the combiner is a call to UDR combiner and if it is so return the -/// UDR decl used for reduction. -static const OMPDeclareReductionDecl * -getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast<CallExpr>(ReductionOp)) - if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) - if (auto *DRE = - dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) - return DRD; - return nullptr; -} - -static void emitInitWithReductionInitializer(CodeGenFunction &CGF, - const OMPDeclareReductionDecl *DRD, - const Expr *InitOp, - Address Private, Address Original, - QualType Ty) { - if (DRD->getInitializer()) { - std::pair<llvm::Function *, llvm::Function *> Reduction = - CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast<CallExpr>(InitOp); - auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); - const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); - const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); - auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); - PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); - (void)PrivateScope.Privatize(); - RValue Func = RValue::get(Reduction.second); - CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); - CGF.EmitIgnoredExpr(InitOp); - } else { - llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); - LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); - RValue InitRVal; - switch (CGF.getEvaluationKind(Ty)) { - case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); - break; - case TEK_Complex: - InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); - break; - case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); - break; - } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); - CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), - /*IsInitializer=*/false); - } -} - -/// \brief Emit initialization of arrays of complex types. -/// \param DestAddr Address of the array. -/// \param Type Type of array. -/// \param Init Initial expression of array. -/// \param SrcAddr Address of the original array. -static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, - QualType Type, const Expr *Init, - Address SrcAddr = Address::invalid()) { - auto *DRD = getReductionInit(Init); - // Perform element-by-element initialization. - QualType ElementTy; - - // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); - if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); - - llvm::Value *SrcBegin = nullptr; - if (DRD) - SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); - // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); - // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = - CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); - CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); - CGF.EmitBlock(BodyBB); - - CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); - - llvm::PHINode *SrcElementPHI = nullptr; - Address SrcElementCurrent = Address::invalid(); - if (DRD) { - SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPHI->addIncoming(SrcBegin, EntryBB); - SrcElementCurrent = - Address(SrcElementPHI, - SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - } - llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( - DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); - DestElementPHI->addIncoming(DestBegin, EntryBB); - Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - - // Emit copy. - { - CodeGenFunction::RunCleanupsScope InitScope(CGF); - if (DRD && (DRD->getInitializer() || !Init)) { - emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, - SrcElementCurrent, ElementTy); - } else - CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), - /*IsInitializer=*/false); - } - - if (DRD) { - // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); - } - - // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - // Check whether we've reached the end. - auto Done = - CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); - CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); - - // Done. - CGF.EmitBlock(DoneBB, /*IsFinished=*/true); -} - void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { @@ -906,259 +901,111 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV, llvm::Value *Addr) { - Address Tmp = Address::invalid(); - Address TopTmp = Address::invalid(); - Address MostTopTmp = Address::invalid(); - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - Tmp = CGF.CreateMemTemp(BaseTy); - if (TopTmp.isValid()) - CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); - else - MostTopTmp = Tmp; - TopTmp = Tmp; - BaseTy = BaseTy->getPointeeType(); - } - llvm::Type *Ty = BaseLV.getPointer()->getType(); - if (Tmp.isValid()) - Ty = Tmp.getElementType(); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); - if (Tmp.isValid()) { - CGF.Builder.CreateStore(Addr, Tmp); - return MostTopTmp; - } - return Address(Addr, BaseLV.getAlignment()); -} - -static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV) { - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs<PointerType>()) - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs<ReferenceType>()); - } - BaseTy = BaseTy->getPointeeType(); - } - return CGF.MakeAddrLValue( - Address( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), - BaseLV.getAlignment()), - BaseLV.getType(), BaseLV.getAlignmentSource()); -} - void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); auto IRed = C->reduction_ops().begin(); - for (auto IRef : C->varlists()) { - auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); - auto *DRD = getReductionInit(*IRed); - if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) - Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto OASELValueLB = EmitOMPArraySectionExpr(OASE); - auto OASELValueUB = - EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = - loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), - OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address { - return OASELValueLB.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, - OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit VarDecl with copy init for arrays. - // Get the address of the original variable captured in current - // captured region. - auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), - OASELValueLB.getPointer()); - Size = Builder.CreateNUWAdd( - Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get(Size)); - EmitVariablyModifiedType(PrivateVD->getType()); - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, - OASELValueLB.getAddress()); - EmitAutoVarCleanups(Emission); - // Emit private VarDecl with reduction init. - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - OASELValueLB.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), - OASELValueLB.getType(), OriginalBaseLValue, - Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast<DeclRefExpr>(Base); - auto *OrigVD = cast<VarDecl>(DE->getDecl()); - auto ASELValue = EmitLValue(ASE); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = loadToBegin( - *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address { - return ASELValue.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, - OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - ASELValue.getAddress(), - ASELValue.getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - ASELValue.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), ASELValue.getType(), - OriginalBaseLValue, Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), - "rhs.begin"); - }); - } else { - auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); - QualType Type = PrivateVD->getType(); - if (getContext().getAsArrayType(Type)) { - // Store the address of the original variable associated with the LHS - // implicit variable. - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - Address OriginalAddr = EmitLValue(&DRE).getAddress(); - PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, - LHSVD]() -> Address { - OriginalAddr = Builder.CreateElementBitCast( - OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); - return OriginalAddr; - }); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - if (Type->isVariablyModifiedType()) { - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast<OpaqueValueExpr>( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get( - getTypeSize(OrigVD->getType().getNonReferenceType()))); - EmitVariablyModifiedType(Type); - } - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, OriginalAddr); - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); - }); - } else { - // Store the address of the original variable associated with the LHS - // implicit variable. - Address OriginalAddr = Address::invalid(); - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, - &OriginalAddr]() -> Address { - DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - OriginalAddr = EmitLValue(&DRE).getAddress(); - return OriginalAddr; - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - OriginalAddr, - PrivateVD->getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - return Addr; - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Shareds.emplace_back(Ref); + Privates.emplace_back(*IPriv); + ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto ILHS = LHSs.begin(); + auto IRHS = RHSs.begin(); + auto IPriv = Privates.begin(); + for (const auto *IRef : Shareds) { + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); + // Emit private VarDecl with reduction init. + RedCG.emitSharedLValue(*this, Count); + RedCG.emitAggregateType(*this, Count); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), + RedCG.getSharedLValue(Count), + [&Emission](CodeGenFunction &CGF) { + CGF.EmitAutoVarInit(Emission); + return true; + }); + EmitAutoVarCleanups(Emission); + Address BaseAddr = RedCG.adjustPrivateAddress( + *this, Count, Emission.getAllocatedAddress()); + bool IsRegistered = PrivateScope.addPrivate( + RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + if (isa<OMPArraySectionExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if (isa<ArraySubscriptExpr>(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); + }); + } else { + QualType Type = PrivateVD->getType(); + bool IsArray = getContext().getAsArrayType(Type) != nullptr; + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + // Store the address of the original variable associated with the LHS + // implicit variable. + if (IsArray) { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); } - ++ILHS; - ++IRHS; - ++IPriv; - ++IRed; + PrivateScope.addPrivate( + LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { + return IsArray + ? Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + : GetAddrOfLocalVar(PrivateVD); + }); } + ++ILHS; + ++IRHS; + ++IPriv; + ++Count; } } void CodeGenFunction::EmitOMPReductionClauseFinal( - const OMPExecutableDirective &D) { + const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { if (!HaveInsertPoint()) return; llvm::SmallVector<const Expr *, 8> Privates; @@ -1174,14 +1021,15 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); } if (HasAtLeastOneReduction) { + bool WithNowait = D.getSingleClause<OMPNowaitClause>() || + isOpenMPParallelDirective(D.getDirectiveKind()) || + D.getDirectiveKind() == OMPD_simd; + bool SimpleReduction = D.getDirectiveKind() == OMPD_simd; // Emit nowait reduction if nowait clause is present or directive is a // parallel directive (it always has implicit barrier). CGM.getOpenMPRuntime().emitReduction( *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps, - D.getSingleClause<OMPNowaitClause>() || - isOpenMPParallelDirective(D.getDirectiveKind()) || - D.getDirectiveKind() == OMPD_simd, - D.getDirectiveKind() == OMPD_simd); + {WithNowait, SimpleReduction, ReductionKind}); } } @@ -1210,14 +1058,23 @@ static void emitPostUpdateForReductionClause( CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &S, - OpenMPDirectiveKind InnermostKind, - const RegionCodeGenTy &CodeGen) { - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). - emitParallelOrTeamsOutlinedFunction(S, - *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); +namespace { +/// Codegen lambda for appending distribute lower and upper bounds to outlined +/// parallel function. This is necessary for combined constructs such as +/// 'distribute parallel for' +typedef llvm::function_ref<void(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &)> + CodeGenBoundParametersTy; +} // anonymous namespace + +static void emitCommonOMPParallelDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + const CodeGenBoundParametersTy &CodeGenBoundParameters) { + const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), @@ -1239,13 +1096,22 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, } } - OMPLexicalScope Scope(CGF, S); + OMPParallelScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk + // lower and upper bounds with the pragma 'for' chunking mechanism. + // The following lambda takes care of appending the lower and upper bound + // parameters when necessary + CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } +static void emitEmptyBoundParameters(CodeGenFunction &, + const OMPExecutableDirective &, + llvm::SmallVectorImpl<llvm::Value *> &) {} + void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -1264,9 +1130,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1611,6 +1478,13 @@ void CodeGenFunction::EmitOMPSimdFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } +static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); +} + void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); @@ -1677,7 +1551,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { // Emit final copy of the lastprivate variables at the end of loops. if (HasLastprivateClause) CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1693,9 +1567,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { +void CodeGenFunction::EmitOMPOuterLoop( + bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, + CodeGenFunction::OMPPrivateScope &LoopScope, + const CodeGenFunction::OMPLoopArguments &LoopArgs, + const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, + const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { auto &RT = CGM.getOpenMPRuntime(); const Expr *IVExpr = S.getIterationVariable(); @@ -1713,15 +1590,18 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { - // UB = min(UB, GlobalUB) - EmitIgnoredExpr(S.getEnsureUpperBound()); + // UB = min(UB, GlobalUB) or + // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. + // 'distribute parallel for') + EmitIgnoredExpr(LoopArgs.EUB); // IV = LB - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // IV < UB - BoolCondVal = EvaluateExprAsBool(S.getCond()); + BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); } else { - BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, - LB, UB, ST); + BoolCondVal = + RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); } // If there are any cleanups between here and the loop-exit scope, @@ -1741,7 +1621,7 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). if (DynamicOrOrdered) - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(LoopArgs.Init); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); @@ -1755,24 +1635,27 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { - if (Ordered) { - CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( - CGF, Loc, IVSize, IVSigned); - } - }); + + // when 'distribute' is not combined with a 'for': + // while (idx <= UB) { BODY; ++idx; } + // when 'distribute' is combined with a 'for' + // (e.g. 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + EmitOMPInnerLoop( + S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); + }, + [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { + CodeGenOrdered(CGF, Loc, IVSize, IVSigned); + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". - EmitIgnoredExpr(S.getNextLowerBound()); - EmitIgnoredExpr(S.getNextUpperBound()); + EmitIgnoredExpr(LoopArgs.NextLB); + EmitIgnoredExpr(LoopArgs.NextUB); } EmitBranch(CondBlock); @@ -1791,7 +1674,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, void CodeGenFunction::EmitOMPForOuterLoop( const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + const OMPLoopArguments &LoopArgs, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1800,7 +1684,7 @@ void CodeGenFunction::EmitOMPForOuterLoop( assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, - /*Chunked=*/Chunk != nullptr)) && + LoopArgs.Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -1858,22 +1742,46 @@ void CodeGenFunction::EmitOMPForOuterLoop( const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { - llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); + auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); + llvm::Value *LBVal = DispatchBounds.first; + llvm::Value *UBVal = DispatchBounds.second; + CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, + LoopArgs.Chunk}; RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, Ordered, UBVal, Chunk); + IVSigned, Ordered, DipatchRTInputValues); } else { RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, IL, LB, UB, ST, Chunk); + Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, + LoopArgs.ST, LoopArgs.Chunk); } - EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, - ST, IL, Chunk); + auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, + const unsigned IVSize, + const bool IVSigned) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, + IVSigned); + } + }; + + OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); + OuterLoopArgs.IncExpr = S.getInc(); + OuterLoopArgs.Init = S.getInit(); + OuterLoopArgs.Cond = S.getCond(); + OuterLoopArgs.NextLB = S.getNextLowerBound(); + OuterLoopArgs.NextUB = S.getNextUpperBound(); + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, + emitOMPLoopBodyWithStopPoint, CodeGenOrdered); } +static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, + const unsigned IVSize, const bool IVSigned) {} + void CodeGenFunction::EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, - const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, - Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, + OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, + const CodeGenLoopTy &CodeGenLoopContent) { auto &RT = CGM.getOpenMPRuntime(); @@ -1886,26 +1794,159 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, /* Ordered = */ false, - IL, LB, UB, ST, Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, + IVSigned, /* Ordered = */ false, LoopArgs.IL, + LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, + LoopArgs.Chunk); - EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, - S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); + // for combined 'distribute' and 'for' the increment expression of distribute + // is store in DistInc. For 'distribute' alone, it is in Inc. + Expr *IncExpr; + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) + IncExpr = S.getDistInc(); + else + IncExpr = S.getInc(); + + // this routine is shared by 'omp distribute parallel for' and + // 'omp distribute': select the right EUB expression depending on the + // directive + OMPLoopArguments OuterLoopArgs; + OuterLoopArgs.LB = LoopArgs.LB; + OuterLoopArgs.UB = LoopArgs.UB; + OuterLoopArgs.ST = LoopArgs.ST; + OuterLoopArgs.IL = LoopArgs.IL; + OuterLoopArgs.Chunk = LoopArgs.Chunk; + OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound(); + OuterLoopArgs.IncExpr = IncExpr; + OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit(); + OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextLowerBound() + : S.getNextLowerBound(); + OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedNextUpperBound() + : S.getNextUpperBound(); + + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, + LoopScope, OuterLoopArgs, CodeGenLoopContent, + emitEmptyOrdered); +} + +/// Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast<VarDecl>(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + +static std::pair<LValue, LValue> +emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + + // When composing 'distribute' with 'for' (e.g. as in 'distribute + // parallel for') we need to use the 'distribute' + // chunk lower and upper bounds rather than the whole loop iteration + // space. These are parameters to the outlined function for 'parallel' + // and we copy the bounds of the previous schedule into the + // the current ones. + LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); + LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); + llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); + PrevLBVal = CGF.EmitScalarConversion( + PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); + PrevUBVal = CGF.EmitScalarConversion( + PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), + LS.getIterationVariable()->getType(), SourceLocation()); + + CGF.EmitStoreOfScalar(PrevLBVal, LB); + CGF.EmitStoreOfScalar(PrevUBVal, UB); + + return {LB, UB}; +} + +/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then +/// we need to use the LB and UB expressions generated by the worksharing +/// code generation support, whereas in non combined situations we would +/// just emit 0 and the LastIteration expression +/// This function is necessary due to the difference of the LB and UB +/// types for the RT emission routines for 'for_static_init' and +/// 'for_dispatch_init' +static std::pair<llvm::Value *, llvm::Value *> +emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + // when implementing a dynamic schedule for a 'for' combined with a + // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop + // is not normalized as each team only executes its own assigned + // distribute chunk + QualType IteratorTy = IVExpr->getType(); + llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, + SourceLocation()); + return {LBVal, UBVal}; +} + +static void emitDistributeParallelForDistributeInnerBoundParams( + CodeGenFunction &CGF, const OMPExecutableDirective &S, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { + const auto &Dir = cast<OMPLoopDirective>(S); + LValue LB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); + auto LBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(LBCast); + LValue UB = + CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); + + auto UBCast = CGF.Builder.CreateIntCast( + CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); + CapturedVars.push_back(UBCast); +} + +static void +emitInnerParallelForWhenCombined(CodeGenFunction &CGF, + const OMPLoopDirective &S, + CodeGenFunction::JumpDest LoopExit) { + auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), + emitDistributeParallelForInnerBounds, + emitDistributeParallelForDispatchBounds); + }; + + emitCommonOMPParallelDirective( + CGF, S, OMPD_for, CGInlinedWorksharingLoop, + emitDistributeParallelForDistributeInnerBoundParams); } void CodeGenFunction::EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, + S.getDistInc()); + }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_distribute_parallel_for, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, + /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, + /*HasCancel=*/false); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( @@ -2003,15 +2044,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( }); } -void CodeGenFunction::EmitOMPTargetTeamsDirective( - const OMPTargetTeamsDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( const OMPTargetTeamsDistributeDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective( @@ -2052,14 +2084,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( }); } -/// \brief Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast<VarDecl>(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2072,7 +2096,10 @@ namespace { }; } // namespace -bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { +bool CodeGenFunction::EmitOMPWorksharingLoop( + const OMPLoopDirective &S, Expr *EUB, + const CodeGenLoopBoundsTy &CodeGenLoopBounds, + const CodeGenDispatchBoundsTy &CGDispatchBounds) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -2122,10 +2149,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + + std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); + LValue LB = Bounds.first; + LValue UB = Bounds.second; LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -2211,9 +2238,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. + const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), + ST.getAddress(), IL.getAddress(), + Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + LoopArguments, CGDispatchBounds); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, @@ -2222,7 +2251,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { CGF.EmitLoadOfScalar(IL, S.getLocStart())); }); } - EmitOMPReductionClauseFinal(S); + EmitOMPReductionClauseFinal( + S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) + ? /*Parallel and Simd*/ OMPD_parallel_for_simd + : /*Parallel only*/ OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( *this, S, [&](CodeGenFunction &CGF) -> llvm::Value * { @@ -2248,12 +2280,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { return HasLastprivateClause; } +/// The following two functions generate expressions for the loop lower +/// and upper bounds in case of static and dynamic (dispatch) schedule +/// of the associated 'for' or 'distribute' loop. +static std::pair<LValue, LValue> +emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + LValue LB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); + return {LB, UB}; +} + +/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not +/// consider the lower and upper bound expressions generated by the +/// worksharing loop support, but we use 0 and the iteration space size as +/// constants +static std::pair<llvm::Value *, llvm::Value *> +emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, + Address LB, Address UB) { + const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); + const Expr *IVExpr = LS.getIterationVariable(); + const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); + llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); + llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); + return {LBVal, UBVal}; +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2271,7 +2333,9 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2320,8 +2384,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getLocStart(), - /*fpContractable=*/false); + OK_Ordinary, S.getLocStart(), FPOptions()); // Increment for loop counter. UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, S.getLocStart()); @@ -2397,7 +2460,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getLocEnd()); }; CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); - CGF.EmitOMPReductionClauseFinal(S); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); // Emit post-update of the reduction variables if IsLastIter != 0. emitPostUpdateForReductionClause( CGF, S, [&](CodeGenFunction &CGF) -> llvm::Value * { @@ -2523,9 +2586,11 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -2533,9 +2598,11 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S); + CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, + emitDispatchForLoopBounds); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -2545,7 +2612,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, + emitEmptyBoundParameters); } void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -2629,11 +2697,32 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, ++ID; } } + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + auto IPriv = C->privates().begin(); + auto IRed = C->reduction_ops().begin(); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Data.ReductionVars.emplace_back(Ref); + Data.ReductionCopies.emplace_back(*IPriv); + Data.ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); + } + } + Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, S.getLocStart(), LHSs, RHSs, Data); // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( + auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs]( CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); @@ -2688,6 +2777,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } } + if (Data.Reductions) { + OMPLexicalScope LexScope(CGF, S, /*AsInlined=*/true); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, + Data.ReductionOps); + llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); + for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { + RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitAggregateType(CGF, Cnt); + Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( + CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); + Replacement = + Address(CGF.EmitScalarConversion( + Replacement.getPointer(), CGF.getContext().VoidPtrTy, + CGF.getContext().getPointerType( + Data.ReductionCopies[Cnt]->getType()), + SourceLocation()), + Replacement.getAlignment()); + Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement); + Scope.addPrivate(RedCG.getBaseDecl(Cnt), + [Replacement]() { return Replacement; }); + // FIXME: This must removed once the runtime library is fixed. + // Emit required threadprivate variables for + // initilizer/combiner/finalizer. + CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(), + RedCG, Cnt); + } + } (void)Scope.Privatize(); Action.Enter(CGF); @@ -2763,7 +2880,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } -void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { +void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, + const CodeGenLoopTy &CodeGenLoop, + Expr *IncExpr) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -2804,10 +2923,17 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { // Emit 'then' code. { // Emit helper vars inits. - LValue LB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + + LValue LB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedLowerBoundVariable() + : S.getLowerBoundVariable()))); + LValue UB = EmitOMPHelperVar( + *this, cast<DeclRefExpr>( + (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedUpperBoundVariable() + : S.getUpperBoundVariable()))); LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -2859,15 +2985,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedEnsureUpperBound() + : S.getEnsureUpperBound()); // IV = LB; - EmitIgnoredExpr(S.getInit()); + EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedInit() + : S.getInit()); + + Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) + ? S.getCombinedCond() + : S.getCond(); + + // for distribute alone, codegen // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); + // when combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, + [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { + CodeGenLoop(CGF, S, LoopExit); }, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); @@ -2876,9 +3012,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, - LB.getAddress(), UB.getAddress(), ST.getAddress(), - IL.getAddress(), Chunk); + const OMPLoopArguments LoopArguments = { + LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), + Chunk}; + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, + CodeGenLoop); } // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -2900,7 +3038,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S); + + CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, @@ -3250,7 +3389,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, NewVValType = XRValExpr->getType(); auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, - IsSeqCst, IsPostfixUpdate](RValue XRValue) -> RValue { + IsPostfixUpdate](RValue XRValue) -> RValue { CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); RValue Res = CGF.EmitAnyExpr(UE); @@ -3277,7 +3416,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, NewVValType = X->getType().getNonReferenceType(); ExprRValue = convertToType(CGF, ExprRValue, E->getType(), X->getType().getNonReferenceType(), Loc); - auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue { + auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) -> RValue { NewVVal = XRValue; return ExprRValue; }; @@ -3327,6 +3466,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_firstprivate: case OMPC_lastprivate: case OMPC_reduction: + case OMPC_task_reduction: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: @@ -3404,41 +3544,24 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } -std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> -CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( - CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, - bool IsOffloadEntry) { - llvm::Function *OutlinedFn = nullptr; - llvm::Constant *OutlinedFnID = nullptr; - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { - OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); - CGF.EmitOMPPrivateClause(S, PrivateScope); - (void)PrivateScope.Privatize(); - - Action.Enter(CGF); - CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }; - // Emit target region as a standalone region. - CGM.getOpenMPRuntime().emitTargetOutlinedFunction( - S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); - return std::make_pair(OutlinedFn, OutlinedFnID); -} - -void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { +static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + const RegionCodeGenTy &CodeGen) { + assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); + CodeGenModule &CGM = CGF.CGM; const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - GenerateOpenMPCapturedVars(CS, CapturedVars); - llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; - // Check if we have any if clause associated with the directive. const Expr *IfCond = nullptr; - - if (auto *C = S.getSingleClause<OMPIfClause>()) { - IfCond = C->getCondition(); + // Check for the at most one if clause associated with the target region. + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_target) { + IfCond = C->getCondition(); + break; + } } // Check if we have any device clause associated with the directive. @@ -3453,43 +3576,76 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { bool IsOffloadEntry = true; if (IfCond) { bool Val; - if (ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) + if (CGF.ConstantFoldsToSimpleInteger(IfCond, Val) && !Val) IsOffloadEntry = false; } if (CGM.getLangOpts().OMPTargetTriples.empty()) IsOffloadEntry = false; - assert(CurFuncDecl && "No parent declaration for target region!"); + assert(CGF.CurFuncDecl && "No parent declaration for target region!"); StringRef ParentName; // In case we have Ctors/Dtors we use the complete type variant to produce // the mangling of the device outlined kernel. - if (auto *D = dyn_cast<CXXConstructorDecl>(CurFuncDecl)) + if (auto *D = dyn_cast<CXXConstructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Ctor_Complete)); - else if (auto *D = dyn_cast<CXXDestructorDecl>(CurFuncDecl)) + else if (auto *D = dyn_cast<CXXDestructorDecl>(CGF.CurFuncDecl)) ParentName = CGM.getMangledName(GlobalDecl(D, Dtor_Complete)); else ParentName = - CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); + CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CGF.CurFuncDecl))); - std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( - CGM, S, ParentName, IsOffloadEntry); - OMPLexicalScope Scope(*this, S); - CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, + IsOffloadEntry, CodeGen); + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); + CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device, CapturedVars); } +static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, + PrePostActionTy &Action) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + + Action.Enter(CGF); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); +} + +void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, + StringRef ParentName, + const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). - emitParallelOrTeamsOutlinedFunction(S, - *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); + const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); + auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( + S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); - const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); - const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); - const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); + const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); + const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); if (NT || TL) { Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; @@ -3498,7 +3654,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, S.getLocStart()); } - OMPLexicalScope Scope(CGF, S); + OMPTeamsScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, @@ -3511,10 +3667,47 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); (void)PrivateScope.Privatize(); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); }; emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); + emitPostUpdateForReductionClause( + *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); +} + +static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDirective &S) { + auto *CS = S.getCapturedStmt(OMPD_teams); + Action.Enter(CGF); + auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDirective( + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPCancellationPointDirective( @@ -3740,9 +3933,48 @@ void CodeGenFunction::EmitOMPTargetExitDataDirective( CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); } +static void emitTargetParallelRegion(CodeGenFunction &CGF, + const OMPTargetParallelDirective &S, + PrePostActionTy &Action) { + // Get the captured statement associated with the 'parallel' region. + auto *CS = S.getCapturedStmt(OMPD_parallel); + Action.Enter(CGF); + auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); + (void)PrivateScope.Privatize(); + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); + }; + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause( + CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); +} + +void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetParallelDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + void CodeGenFunction::EmitOMPTargetParallelDirective( const OMPTargetParallelDirective &S) { - // TODO: codegen for target parallel. + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetParallelRegion(CGF, S, Action); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); } void CodeGenFunction::EmitOMPTargetParallelForDirective( @@ -3884,7 +4116,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, CodeGen); }; - EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + if (Data.Nogroup) + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + else { + CGM.getOpenMPRuntime().emitTaskgroupRegion( + *this, + [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + }, + S.getLocStart()); + } } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { |