diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp | 794 |
1 files changed, 767 insertions, 27 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 12a3a77..22ee00f 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -14,7 +14,9 @@ #include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "clang/AST/Decl.h" +#include "clang/AST/StmtOpenMP.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Value.h" @@ -24,16 +26,81 @@ using namespace clang; using namespace CodeGen; +namespace { +/// \brief API for captured statement code generation in OpenMP constructs. +class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { +public: + CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS, + const VarDecl *ThreadIDVar) + : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar), + Directive(D) { + assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); + } + + /// \brief Gets a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const { return ThreadIDVar; } + + /// \brief Gets an LValue for the current ThreadID variable. + LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + + static bool classof(const CGCapturedStmtInfo *Info) { + return Info->getKind() == CR_OpenMP; + } + + /// \brief Emit the captured statement body. + void EmitBody(CodeGenFunction &CGF, Stmt *S) override; + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { return ".omp_outlined."; } + +private: + /// \brief A variable or parameter storing global thread id for OpenMP + /// constructs. + const VarDecl *ThreadIDVar; + /// \brief OpenMP executable directive associated with the region. + const OMPExecutableDirective &Directive; +}; +} // namespace + +LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { + return CGF.MakeNaturalAlignAddrLValue( + CGF.GetAddrOfLocalVar(ThreadIDVar), + CGF.getContext().getPointerType(ThreadIDVar->getType())); +} + +void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + CGF.EmitOMPPrivateClause(Directive, PrivateScope); + CGF.EmitOMPFirstprivateClause(Directive, PrivateScope); + if (PrivateScope.Privatize()) + // Emit implicit barrier to synchronize threads and avoid data races. + CGF.CGM.getOpenMPRuntime().EmitOMPBarrierCall(CGF, Directive.getLocStart(), + /*IsExplicit=*/false); + CGCapturedStmtInfo::EmitBody(CGF, S); +} + CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) : CGM(CGM), DefaultOpenMPPSource(nullptr) { IdentTy = llvm::StructType::create( "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, - CGM.Int8PtrTy /* psource */, NULL); + CGM.Int8PtrTy /* psource */, nullptr); // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), llvm::PointerType::getUnqual(CGM.Int32Ty)}; Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); + KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); +} + +llvm::Value * +CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar) { + const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + CodeGenFunction CGF(CGM, true); + CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar); + CGF.CapturedStmtInfo = &CGInfo; + return CGF.GenerateCapturedStmtFunction(*CS); } llvm::Value * @@ -50,11 +117,10 @@ CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) { DefaultOpenMPPSource = llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); } - llvm::GlobalVariable *DefaultOpenMPLocation = cast<llvm::GlobalVariable>( - CGM.CreateRuntimeVariable(IdentTy, ".kmpc_default_loc.addr")); + auto DefaultOpenMPLocation = new llvm::GlobalVariable( + CGM.getModule(), IdentTy, /*isConstant*/ true, + llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr); DefaultOpenMPLocation->setUnnamedAddr(true); - DefaultOpenMPLocation->setConstant(true); - DefaultOpenMPLocation->setLinkage(llvm::GlobalValue::PrivateLinkage); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true); llvm::Constant *Values[] = {Zero, @@ -62,6 +128,7 @@ CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) { Zero, Zero, DefaultOpenMPPSource}; llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values); DefaultOpenMPLocation->setInitializer(Init); + OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation; return DefaultOpenMPLocation; } return Entry; @@ -77,14 +144,17 @@ llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation( assert(CGF.CurFn && "No function in current CodeGenFunction."); llvm::Value *LocValue = nullptr; - OpenMPLocMapTy::iterator I = OpenMPLocMap.find(CGF.CurFn); - if (I != OpenMPLocMap.end()) { - LocValue = I->second; - } else { + auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); + if (I != OpenMPLocThreadIDMap.end()) + LocValue = I->second.DebugLoc; + // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if + // GetOpenMPThreadID was called before this routine. + if (LocValue == nullptr) { // Generate "ident_t .kmpc_loc.addr;" llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr"); AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy)); - OpenMPLocMap[CGF.CurFn] = AI; + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.DebugLoc = AI; LocValue = AI; CGBuilderTy::InsertPointGuard IPG(CGF.Builder); @@ -95,7 +165,7 @@ llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation( } // char **psource = &.kmpc_loc_<flags>.addr.psource; - llvm::Value *PSource = + auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(LocValue, 0, IdentField_PSource); auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); @@ -119,32 +189,54 @@ llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation( return LocValue; } -llvm::Value *CGOpenMPRuntime::GetOpenMPGlobalThreadNum(CodeGenFunction &CGF, - SourceLocation Loc) { +llvm::Value *CGOpenMPRuntime::GetOpenMPThreadID(CodeGenFunction &CGF, + SourceLocation Loc) { assert(CGF.CurFn && "No function in current CodeGenFunction."); - llvm::Value *GTid = nullptr; - OpenMPGtidMapTy::iterator I = OpenMPGtidMap.find(CGF.CurFn); - if (I != OpenMPGtidMap.end()) { - GTid = I->second; + llvm::Value *ThreadID = nullptr; + // Check whether we've already cached a load of the thread id in this + // function. + auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); + if (I != OpenMPLocThreadIDMap.end()) { + ThreadID = I->second.ThreadID; + if (ThreadID != nullptr) + return ThreadID; + } + if (auto OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { + // Check if this an outlined function with thread id passed as argument. + auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable(); + auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); + auto RVal = CGF.EmitLoadOfLValue(LVal, Loc); + LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(), + ThreadIDVar->getType()); + ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal(); + // If value loaded in entry block, cache it and use it everywhere in + // function. + if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.ThreadID = ThreadID; + } } else { - // Generate "int32 .kmpc_global_thread_num.addr;" + // This is not an outlined function region - need to call __kmpc_int32 + // kmpc_global_thread_num(ident_t *loc). + // Generate thread id value and cache this value for use across the + // function. CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)}; - GTid = CGF.EmitRuntimeCall( + ThreadID = CGF.EmitRuntimeCall( CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args); - OpenMPGtidMap[CGF.CurFn] = GTid; + auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); + Elem.second.ThreadID = ThreadID; } - return GTid; + return ThreadID; } void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); - if (OpenMPGtidMap.count(CGF.CurFn)) - OpenMPGtidMap.erase(CGF.CurFn); - if (OpenMPLocMap.count(CGF.CurFn)) - OpenMPLocMap.erase(CGF.CurFn); + if (OpenMPLocThreadIDMap.count(CGF.CurFn)) + OpenMPLocThreadIDMap.erase(CGF.CurFn); } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { @@ -165,7 +257,7 @@ CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) { llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, true); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); break; } @@ -173,10 +265,658 @@ CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) { // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, false); + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; } + case OMPRTL__kmpc_threadprivate_cached: { + // Build void *__kmpc_threadprivate_cached(ident_t *loc, + // kmp_int32 global_tid, void *data, size_t size, void ***cache); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.VoidPtrTy, CGM.SizeTy, + CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); + break; + } + case OMPRTL__kmpc_critical: { + // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *crit); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(KmpCriticalNameTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); + break; + } + case OMPRTL__kmpc_threadprivate_register: { + // Build void __kmpc_threadprivate_register(ident_t *, void *data, + // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); + // typedef void *(*kmpc_ctor)(void *); + auto KmpcCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg*/ false)->getPointerTo(); + // typedef void *(*kmpc_cctor)(void *, void *); + llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto KmpcCopyCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, + /*isVarArg*/ false)->getPointerTo(); + // typedef void (*kmpc_dtor)(void *); + auto KmpcDtorTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) + ->getPointerTo(); + llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, + KmpcCopyCtorTy, KmpcDtorTy}; + auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, + /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); + break; + } + case OMPRTL__kmpc_end_critical: { + // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *crit); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(KmpCriticalNameTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); + break; + } + case OMPRTL__kmpc_cancel_barrier: { + // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); + break; + } + // Build __kmpc_for_static_init*( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + case OMPRTL__kmpc_for_static_init_4: { + auto ITy = CGM.Int32Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4"); + break; + } + case OMPRTL__kmpc_for_static_init_4u: { + auto ITy = CGM.Int32Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u"); + break; + } + case OMPRTL__kmpc_for_static_init_8: { + auto ITy = CGM.Int64Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8"); + break; + } + case OMPRTL__kmpc_for_static_init_8u: { + auto ITy = CGM.Int64Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy, // p_stride + ITy, // incr + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u"); + break; + } + case OMPRTL__kmpc_for_static_fini: { + // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); + break; + } + case OMPRTL__kmpc_push_num_threads: { + // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_threads) + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); + break; + } + case OMPRTL__kmpc_serialized_parallel: { + // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); + break; + } + case OMPRTL__kmpc_end_serialized_parallel: { + // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); + break; + } + case OMPRTL__kmpc_flush: { + // Build void __kmpc_flush(ident_t *loc, ...); + llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); + break; + } + case OMPRTL__kmpc_master: { + // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); + break; + } + case OMPRTL__kmpc_end_master: { + // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); + break; + } } return RTLFn; } + +llvm::Constant * +CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { + // Lookup the entry, lazily creating it if necessary. + return GetOrCreateInternalVariable(CGM.Int8PtrPtrTy, + Twine(CGM.getMangledName(VD)) + ".cache."); +} + +llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF, + const VarDecl *VD, + llvm::Value *VDAddr, + SourceLocation Loc) { + auto VarTy = VDAddr->getType()->getPointerElementType(); + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), + GetOpenMPThreadID(CGF, Loc), + CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), + CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), + getOrCreateThreadPrivateCache(VD)}; + return CGF.EmitRuntimeCall( + CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); +} + +void CGOpenMPRuntime::EmitOMPThreadPrivateVarInit( + CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, + llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { + // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime + // library. + auto OMPLoc = EmitOpenMPUpdateLocation(CGF, Loc); + CGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), + OMPLoc); + // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) + // to register constructor/destructor for variable. + llvm::Value *Args[] = {OMPLoc, + CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), + Ctor, CopyCtor, Dtor}; + CGF.EmitRuntimeCall( + CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); +} + +llvm::Function *CGOpenMPRuntime::EmitOMPThreadPrivateVarDefinition( + const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, + bool PerformInit, CodeGenFunction *CGF) { + VD = VD->getDefinition(CGM.getContext()); + if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { + ThreadPrivateWithDefinition.insert(VD); + QualType ASTTy = VD->getType(); + + llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; + auto Init = VD->getAnyInitializer(); + if (CGM.getLangOpts().CPlusPlus && PerformInit) { + // Generate function that re-emits the declaration's initializer into the + // threadprivate copy of the variable VD + CodeGenFunction CtorCGF(CGM); + FunctionArgList Args; + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + Args.push_back(&Dst); + + auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( + CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), + /*isVariadic=*/false); + auto FTy = CGM.getTypes().GetFunctionType(FI); + auto Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, ".__kmpc_global_ctor_.", Loc); + CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, + Args, SourceLocation()); + auto ArgVal = CtorCGF.EmitLoadOfScalar( + CtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + auto Arg = CtorCGF.Builder.CreatePointerCast( + ArgVal, + CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); + CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), + /*IsInitializer=*/true); + ArgVal = CtorCGF.EmitLoadOfScalar( + CtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); + CtorCGF.FinishFunction(); + Ctor = Fn; + } + if (VD->getType().isDestructedType() != QualType::DK_none) { + // Generate function that emits destructor call for the threadprivate copy + // of the variable VD + CodeGenFunction DtorCGF(CGM); + FunctionArgList Args; + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + Args.push_back(&Dst); + + auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( + CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), + /*isVariadic=*/false); + auto FTy = CGM.getTypes().GetFunctionType(FI); + auto Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, ".__kmpc_global_dtor_.", Loc); + DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, + SourceLocation()); + auto ArgVal = DtorCGF.EmitLoadOfScalar( + DtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + DtorCGF.emitDestroy(ArgVal, ASTTy, + DtorCGF.getDestroyer(ASTTy.isDestructedType()), + DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); + DtorCGF.FinishFunction(); + Dtor = Fn; + } + // Do not emit init function if it is not required. + if (!Ctor && !Dtor) + return nullptr; + + llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto CopyCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, + /*isVarArg=*/false)->getPointerTo(); + // Copying constructor for the threadprivate variable. + // Must be NULL - reserved by runtime, but currently it requires that this + // parameter is always NULL. Otherwise it fires assertion. + CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); + if (Ctor == nullptr) { + auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg=*/false)->getPointerTo(); + Ctor = llvm::Constant::getNullValue(CtorTy); + } + if (Dtor == nullptr) { + auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, + /*isVarArg=*/false)->getPointerTo(); + Dtor = llvm::Constant::getNullValue(DtorTy); + } + if (!CGF) { + auto InitFunctionTy = + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); + auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( + InitFunctionTy, ".__omp_threadprivate_init_."); + CodeGenFunction InitCGF(CGM); + FunctionArgList ArgList; + InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, + CGM.getTypes().arrangeNullaryFunction(), ArgList, + Loc); + EmitOMPThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); + InitCGF.FinishFunction(); + return InitFunction; + } + EmitOMPThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); + } + return nullptr; +} + +void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *OutlinedFn, + llvm::Value *CapturedStruct) { + // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) + llvm::Value *Args[] = { + EmitOpenMPUpdateLocation(CGF, Loc), + CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument + // (there is only one additional argument - 'context') + CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), + CGF.EmitCastToVoidPtr(CapturedStruct)}; + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_fork_call); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +void CGOpenMPRuntime::EmitOMPSerialCall(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *OutlinedFn, + llvm::Value *CapturedStruct) { + auto ThreadID = GetOpenMPThreadID(CGF, Loc); + // Build calls: + // __kmpc_serialized_parallel(&Loc, GTid); + llvm::Value *SerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID}; + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_serialized_parallel); + CGF.EmitRuntimeCall(RTLFn, SerArgs); + + // OutlinedFn(>id, &zero, CapturedStruct); + auto ThreadIDAddr = EmitThreadIDAddress(CGF, Loc); + auto Int32Ty = + CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); + auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct}; + CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); + + // __kmpc_end_serialized_parallel(&Loc, GTid); + llvm::Value *EndSerArgs[] = {EmitOpenMPUpdateLocation(CGF, Loc), ThreadID}; + RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel); + CGF.EmitRuntimeCall(RTLFn, EndSerArgs); +} + +// If we're inside an (outlined) parallel region, use the region info's +// thread-ID variable (it is passed in a first argument of the outlined function +// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in +// regular serial code region, get thread ID by calling kmp_int32 +// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and +// return the address of that temp. +llvm::Value *CGOpenMPRuntime::EmitThreadIDAddress(CodeGenFunction &CGF, + SourceLocation Loc) { + if (auto OMPRegionInfo = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + return CGF.EmitLoadOfLValue(OMPRegionInfo->getThreadIDVariableLValue(CGF), + SourceLocation()).getScalarVal(); + auto ThreadID = GetOpenMPThreadID(CGF, Loc); + auto Int32Ty = + CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); + auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); + CGF.EmitStoreOfScalar(ThreadID, + CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty)); + + return ThreadIDTemp; +} + +llvm::Constant * +CGOpenMPRuntime::GetOrCreateInternalVariable(llvm::Type *Ty, + const llvm::Twine &Name) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + Out << Name; + auto RuntimeName = Out.str(); + auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first; + if (Elem.second) { + assert(Elem.second->getType()->getPointerElementType() == Ty && + "OMP internal variable has different type than requested"); + return &*Elem.second; + } + + return Elem.second = new llvm::GlobalVariable( + CGM.getModule(), Ty, /*IsConstant*/ false, + llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), + Elem.first()); +} + +llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) { + llvm::Twine Name(".gomp_critical_user_", CriticalName); + return GetOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); +} + +void CGOpenMPRuntime::EmitOMPCriticalRegion( + CodeGenFunction &CGF, StringRef CriticalName, + const std::function<void()> &CriticalOpGen, SourceLocation Loc) { + auto RegionLock = GetCriticalRegionLock(CriticalName); + // __kmpc_critical(ident_t *, gtid, Lock); + // CriticalOpGen(); + // __kmpc_end_critical(ident_t *, gtid, Lock); + // Prepare arguments and build a call to __kmpc_critical + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), + GetOpenMPThreadID(CGF, Loc), RegionLock}; + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_critical); + CGF.EmitRuntimeCall(RTLFn, Args); + CriticalOpGen(); + // Build a call to __kmpc_end_critical + RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_critical); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +static void EmitOMPIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, + const std::function<void()> &BodyOpGen) { + llvm::Value *CallBool = CGF.EmitScalarConversion( + IfCond, + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), + CGF.getContext().BoolTy); + + auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); + auto *ContBlock = CGF.createBasicBlock("omp_if.end"); + // Generate the branch (If-stmt) + CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); + CGF.EmitBlock(ThenBlock); + BodyOpGen(); + // Emit the rest of bblocks/branches + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); +} + +void CGOpenMPRuntime::EmitOMPMasterRegion( + CodeGenFunction &CGF, const std::function<void()> &MasterOpGen, + SourceLocation Loc) { + // if(__kmpc_master(ident_t *, gtid)) { + // MasterOpGen(); + // __kmpc_end_master(ident_t *, gtid); + // } + // Prepare arguments and build a call to __kmpc_master + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), + GetOpenMPThreadID(CGF, Loc)}; + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_master); + auto *IsMaster = CGF.EmitRuntimeCall(RTLFn, Args); + EmitOMPIfStmt(CGF, IsMaster, [&]() -> void { + MasterOpGen(); + // Build a call to __kmpc_end_master. + // OpenMP [1.2.2 OpenMP Language Terminology] + // For C/C++, an executable statement, possibly compound, with a single + // entry at the top and a single exit at the bottom, or an OpenMP construct. + // * Access to the structured block must not be the result of a branch. + // * The point of exit cannot be a branch out of the structured block. + // * The point of entry must not be a call to setjmp(). + // * longjmp() and throw() must not violate the entry/exit criteria. + // * An expression statement, iteration statement, selection statement, or + // try block is considered to be a structured block if the corresponding + // compound statement obtained by enclosing it in { and } would be a + // structured block. + // It is analyzed in Sema, so we can just call __kmpc_end_master() on + // fallthrough rather than pushing a normal cleanup for it. + RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_end_master); + CGF.EmitRuntimeCall(RTLFn, Args); + }); +} + +void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, bool IsExplicit) { + // Build call __kmpc_cancel_barrier(loc, thread_id); + auto Flags = static_cast<OpenMPLocationFlags>( + OMP_IDENT_KMPC | + (IsExplicit ? OMP_IDENT_BARRIER_EXPL : OMP_IDENT_BARRIER_IMPL)); + // Build call __kmpc_cancel_barrier(loc, thread_id); + // Replace __kmpc_barrier() function by __kmpc_cancel_barrier() because this + // one provides the same functionality and adds initial support for + // cancellation constructs introduced in OpenMP 4.0. __kmpc_cancel_barrier() + // is provided default by the runtime library so it safe to make such + // replacement. + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, Flags), + GetOpenMPThreadID(CGF, Loc)}; + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_cancel_barrier); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +/// \brief Schedule types for 'omp for' loops (these enumerators are taken from +/// the enum sched_type in kmp.h). +enum OpenMPSchedType { + /// \brief Lower bound for default (unordered) versions. + OMP_sch_lower = 32, + OMP_sch_static_chunked = 33, + OMP_sch_static = 34, + OMP_sch_dynamic_chunked = 35, + OMP_sch_guided_chunked = 36, + OMP_sch_runtime = 37, + OMP_sch_auto = 38, + /// \brief Lower bound for 'ordered' versions. + OMP_ord_lower = 64, + /// \brief Lower bound for 'nomerge' versions. + OMP_nm_lower = 160, +}; + +/// \brief Map the OpenMP loop schedule to the runtime enumeration. +static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) { + switch (ScheduleKind) { + case OMPC_SCHEDULE_static: + return Chunked ? OMP_sch_static_chunked : OMP_sch_static; + case OMPC_SCHEDULE_dynamic: + return OMP_sch_dynamic_chunked; + case OMPC_SCHEDULE_guided: + return OMP_sch_guided_chunked; + case OMPC_SCHEDULE_auto: + return OMP_sch_auto; + case OMPC_SCHEDULE_runtime: + return OMP_sch_runtime; + case OMPC_SCHEDULE_unknown: + assert(!Chunked && "chunk was specified but schedule kind not known"); + return OMP_sch_static; + } + llvm_unreachable("Unexpected runtime schedule"); +} + +bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, + bool Chunked) const { + auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); + return Schedule == OMP_sch_static; +} + +void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPScheduleClauseKind ScheduleKind, + unsigned IVSize, bool IVSigned, + llvm::Value *IL, llvm::Value *LB, + llvm::Value *UB, llvm::Value *ST, + llvm::Value *Chunk) { + OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + // TODO: Implement dynamic schedule. + + // If the Chunk was not specified in the clause - use default value 1. + if (Chunk == nullptr) + Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1); + + llvm::Value *Args[] = { + EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + GetOpenMPThreadID(CGF, Loc), + CGF.Builder.getInt32(Schedule), // Schedule type + IL, // &isLastIter + LB, // &LB + UB, // &UB + ST, // &Stride + CGF.Builder.getIntN(IVSize, 1), // Incr + Chunk // Chunk + }; + assert((IVSize == 32 || IVSize == 64) && + "Index size is not compatible with the omp runtime"); + auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4 + : OMPRTL__kmpc_for_static_init_4u) + : (IVSigned ? OMPRTL__kmpc_for_static_init_8 + : OMPRTL__kmpc_for_static_init_8u); + auto RTLFn = CreateRuntimeFunction(F); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +void CGOpenMPRuntime::EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPScheduleClauseKind ScheduleKind) { + assert((ScheduleKind == OMPC_SCHEDULE_static || + ScheduleKind == OMPC_SCHEDULE_unknown) && + "Non-static schedule kinds are not yet implemented"); + // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + GetOpenMPThreadID(CGF, Loc)}; + auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF, + llvm::Value *NumThreads, + SourceLocation Loc) { + // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) + llvm::Value *Args[] = { + EmitOpenMPUpdateLocation(CGF, Loc), GetOpenMPThreadID(CGF, Loc), + CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; + llvm::Constant *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_push_num_threads); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +void CGOpenMPRuntime::EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, + SourceLocation Loc) { + // Build call void __kmpc_flush(ident_t *loc, ...) + // FIXME: List of variables is ignored by libiomp5 runtime, no need to + // generate it, just request full memory fence. + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), + llvm::ConstantInt::get(CGM.Int32Ty, 0)}; + auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush); + CGF.EmitRuntimeCall(RTLFn, Args); +} |