diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp | 522 |
1 files changed, 475 insertions, 47 deletions
diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp index 90af6d5..282633b 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaCUDA.cpp @@ -18,11 +18,25 @@ #include "clang/Sema/Lookup.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" +#include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" using namespace clang; +void Sema::PushForceCUDAHostDevice() { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + ForceCUDAHostDeviceDepth++; +} + +bool Sema::PopForceCUDAHostDevice() { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + if (ForceCUDAHostDeviceDepth == 0) + return false; + ForceCUDAHostDeviceDepth--; + return true; +} + ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, SourceLocation GGGLoc) { @@ -40,21 +54,73 @@ ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, /*IsExecConfig=*/true); } +Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) { + bool HasHostAttr = false; + bool HasDeviceAttr = false; + bool HasGlobalAttr = false; + bool HasInvalidTargetAttr = false; + while (Attr) { + switch(Attr->getKind()){ + case AttributeList::AT_CUDAGlobal: + HasGlobalAttr = true; + break; + case AttributeList::AT_CUDAHost: + HasHostAttr = true; + break; + case AttributeList::AT_CUDADevice: + HasDeviceAttr = true; + break; + case AttributeList::AT_CUDAInvalidTarget: + HasInvalidTargetAttr = true; + break; + default: + break; + } + Attr = Attr->getNext(); + } + if (HasInvalidTargetAttr) + return CFT_InvalidTarget; + + if (HasGlobalAttr) + return CFT_Global; + + if (HasHostAttr && HasDeviceAttr) + return CFT_HostDevice; + + if (HasDeviceAttr) + return CFT_Device; + + return CFT_Host; +} + +template <typename A> +static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { + return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { + return isa<A>(Attribute) && + !(IgnoreImplicitAttr && Attribute->isImplicit()); + }); +} + /// IdentifyCUDATarget - Determine the CUDA compilation target for this function -Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) { +Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, + bool IgnoreImplicitHDAttr) { + // Code that lives outside a function is run on the host. + if (D == nullptr) + return CFT_Host; + if (D->hasAttr<CUDAInvalidTargetAttr>()) return CFT_InvalidTarget; if (D->hasAttr<CUDAGlobalAttr>()) return CFT_Global; - if (D->hasAttr<CUDADeviceAttr>()) { - if (D->hasAttr<CUDAHostAttr>()) + if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { + if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) return CFT_HostDevice; return CFT_Device; - } else if (D->hasAttr<CUDAHostAttr>()) { + } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { return CFT_Host; - } else if (D->isImplicit()) { + } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { // Some implicit declarations (like intrinsic functions) are not marked. // Set the most lenient target on them for maximal flexibility. return CFT_HostDevice; @@ -95,9 +161,8 @@ Sema::CUDAFunctionPreference Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, const FunctionDecl *Callee) { assert(Callee && "Callee must be valid."); + CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); - CUDAFunctionTarget CallerTarget = - (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host; // If one of the targets is invalid, the check always fails, no matter what // the other target is. @@ -107,8 +172,7 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, // (a) Can't call global from some contexts until we support CUDA's // dynamic parallelism. if (CalleeTarget == CFT_Global && - (CallerTarget == CFT_Global || CallerTarget == CFT_Device || - (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice))) + (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) return CFP_Never; // (b) Calling HostDevice is OK for everyone. @@ -145,52 +209,27 @@ Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, llvm_unreachable("All cases should've been handled by now."); } -template <typename T> -static void EraseUnwantedCUDAMatchesImpl( - Sema &S, const FunctionDecl *Caller, llvm::SmallVectorImpl<T> &Matches, - std::function<const FunctionDecl *(const T &)> FetchDecl) { +void Sema::EraseUnwantedCUDAMatches( + const FunctionDecl *Caller, + SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { if (Matches.size() <= 1) return; + using Pair = std::pair<DeclAccessPair, FunctionDecl*>; + // Gets the CUDA function preference for a call from Caller to Match. - auto GetCFP = [&](const T &Match) { - return S.IdentifyCUDAPreference(Caller, FetchDecl(Match)); + auto GetCFP = [&](const Pair &Match) { + return IdentifyCUDAPreference(Caller, Match.second); }; // Find the best call preference among the functions in Matches. - Sema::CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( + CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( Matches.begin(), Matches.end(), - [&](const T &M1, const T &M2) { return GetCFP(M1) < GetCFP(M2); })); + [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); // Erase all functions with lower priority. - Matches.erase( - llvm::remove_if(Matches, - [&](const T &Match) { return GetCFP(Match) < BestCFP; }), - Matches.end()); -} - -void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, - SmallVectorImpl<FunctionDecl *> &Matches){ - EraseUnwantedCUDAMatchesImpl<FunctionDecl *>( - *this, Caller, Matches, [](const FunctionDecl *item) { return item; }); -} - -void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller, - SmallVectorImpl<DeclAccessPair> &Matches) { - EraseUnwantedCUDAMatchesImpl<DeclAccessPair>( - *this, Caller, Matches, [](const DeclAccessPair &item) { - return dyn_cast<FunctionDecl>(item.getDecl()); - }); -} - -void Sema::EraseUnwantedCUDAMatches( - const FunctionDecl *Caller, - SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){ - EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>( - *this, Caller, Matches, - [](const std::pair<DeclAccessPair, FunctionDecl *> &item) { - return dyn_cast<FunctionDecl>(item.second); - }); + llvm::erase_if(Matches, + [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); } /// When an implicitly-declared special member has to invoke more than one @@ -441,9 +480,23 @@ bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { // * a __device__ function with this signature was already declared, in which // case in which case we output an error, unless the __device__ decl is in a // system header, in which case we leave the constexpr function unattributed. -void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD, +// +// In addition, all function decls are treated as __host__ __device__ when +// ForceCUDAHostDeviceDepth > 0 (corresponding to code within a +// #pragma clang force_cuda_host_device_begin/end +// pair). +void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, const LookupResult &Previous) { - assert(getLangOpts().CUDA && "May be called only for CUDA compilations."); + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + + if (ForceCUDAHostDeviceDepth > 0) { + if (!NewD->hasAttr<CUDAHostAttr>()) + NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); + if (!NewD->hasAttr<CUDADeviceAttr>()) + NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + return; + } + if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) @@ -480,3 +533,378 @@ void Sema::maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *NewD, NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); } + +// In CUDA, there are some constructs which may appear in semantically-valid +// code, but trigger errors if we ever generate code for the function in which +// they appear. Essentially every construct you're not allowed to use on the +// device falls into this category, because you are allowed to use these +// constructs in a __host__ __device__ function, but only if that function is +// never codegen'ed on the device. +// +// To handle semantic checking for these constructs, we keep track of the set of +// functions we know will be emitted, either because we could tell a priori that +// they would be emitted, or because they were transitively called by a +// known-emitted function. +// +// We also keep a partial call graph of which not-known-emitted functions call +// which other not-known-emitted functions. +// +// When we see something which is illegal if the current function is emitted +// (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or +// CheckCUDACall), we first check if the current function is known-emitted. If +// so, we immediately output the diagnostic. +// +// Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags +// until we discover that the function is known-emitted, at which point we take +// it out of this map and emit the diagnostic. + +Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc, + unsigned DiagID, FunctionDecl *Fn, + Sema &S) + : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn), + ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) { + switch (K) { + case K_Nop: + break; + case K_Immediate: + case K_ImmediateWithCallStack: + ImmediateDiag.emplace(S.Diag(Loc, DiagID)); + break; + case K_Deferred: + assert(Fn && "Must have a function to attach the deferred diag to."); + PartialDiag.emplace(S.PDiag(DiagID)); + break; + } +} + +// Print notes showing how we can reach FD starting from an a priori +// known-callable function. +static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) { + auto FnIt = S.CUDAKnownEmittedFns.find(FD); + while (FnIt != S.CUDAKnownEmittedFns.end()) { + DiagnosticBuilder Builder( + S.Diags.Report(FnIt->second.Loc, diag::note_called_by)); + Builder << FnIt->second.FD; + Builder.setForceEmit(); + + FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD); + } +} + +Sema::CUDADiagBuilder::~CUDADiagBuilder() { + if (ImmediateDiag) { + // Emit our diagnostic and, if it was a warning or error, output a callstack + // if Fn isn't a priori known-emitted. + bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel( + DiagID, Loc) >= DiagnosticsEngine::Warning; + ImmediateDiag.reset(); // Emit the immediate diag. + if (IsWarningOrError && ShowCallStack) + EmitCallStackNotes(S, Fn); + } else if (PartialDiag) { + assert(ShowCallStack && "Must always show call stack for deferred diags."); + S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)}); + } +} + +// Do we know that we will eventually codegen the given function? +static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { + // Templates are emitted when they're instantiated. + if (FD->isDependentContext()) + return false; + + // When compiling for device, host functions are never emitted. Similarly, + // when compiling for host, device and global functions are never emitted. + // (Technically, we do emit a host-side stub for global functions, but this + // doesn't count for our purposes here.) + Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); + if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) + return false; + if (!S.getLangOpts().CUDAIsDevice && + (T == Sema::CFT_Device || T == Sema::CFT_Global)) + return false; + + // Check whether this function is externally visible -- if so, it's + // known-emitted. + // + // We have to check the GVA linkage of the function's *definition* -- if we + // only have a declaration, we don't know whether or not the function will be + // emitted, because (say) the definition could include "inline". + FunctionDecl *Def = FD->getDefinition(); + + // We may currently be parsing the body of FD, in which case + // FD->getDefinition() will be null, but we still want to treat FD as though + // it's a definition. + if (!Def && FD->willHaveBody()) + Def = FD; + + if (Def && + !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) + return true; + + // Otherwise, the function is known-emitted if it's in our set of + // known-emitted functions. + return S.CUDAKnownEmittedFns.count(FD) > 0; +} + +Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, + unsigned DiagID) { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + CUDADiagBuilder::Kind DiagKind = [&] { + switch (CurrentCUDATarget()) { + case CFT_Global: + case CFT_Device: + return CUDADiagBuilder::K_Immediate; + case CFT_HostDevice: + // An HD function counts as host code if we're compiling for host, and + // device code if we're compiling for device. Defer any errors in device + // mode until the function is known-emitted. + if (getLangOpts().CUDAIsDevice) { + return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + ? CUDADiagBuilder::K_ImmediateWithCallStack + : CUDADiagBuilder::K_Deferred; + } + return CUDADiagBuilder::K_Nop; + + default: + return CUDADiagBuilder::K_Nop; + } + }(); + return CUDADiagBuilder(DiagKind, Loc, DiagID, + dyn_cast<FunctionDecl>(CurContext), *this); +} + +Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, + unsigned DiagID) { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + CUDADiagBuilder::Kind DiagKind = [&] { + switch (CurrentCUDATarget()) { + case CFT_Host: + return CUDADiagBuilder::K_Immediate; + case CFT_HostDevice: + // An HD function counts as host code if we're compiling for host, and + // device code if we're compiling for device. Defer any errors in device + // mode until the function is known-emitted. + if (getLangOpts().CUDAIsDevice) + return CUDADiagBuilder::K_Nop; + + return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) + ? CUDADiagBuilder::K_ImmediateWithCallStack + : CUDADiagBuilder::K_Deferred; + default: + return CUDADiagBuilder::K_Nop; + } + }(); + return CUDADiagBuilder(DiagKind, Loc, DiagID, + dyn_cast<FunctionDecl>(CurContext), *this); +} + +// Emit any deferred diagnostics for FD and erase them from the map in which +// they're stored. +static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) { + auto It = S.CUDADeferredDiags.find(FD); + if (It == S.CUDADeferredDiags.end()) + return; + bool HasWarningOrError = false; + for (PartialDiagnosticAt &PDAt : It->second) { + const SourceLocation &Loc = PDAt.first; + const PartialDiagnostic &PD = PDAt.second; + HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel( + PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning; + DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID())); + Builder.setForceEmit(); + PD.Emit(Builder); + } + S.CUDADeferredDiags.erase(It); + + // FIXME: Should this be called after every warning/error emitted in the loop + // above, instead of just once per function? That would be consistent with + // how we handle immediate errors, but it also seems like a bit much. + if (HasWarningOrError) + EmitCallStackNotes(S, FD); +} + +// Indicate that this function (and thus everything it transtively calls) will +// be codegen'ed, and emit any deferred diagnostics on this function and its +// (transitive) callees. +static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller, + FunctionDecl *OrigCallee, SourceLocation OrigLoc) { + // Nothing to do if we already know that FD is emitted. + if (IsKnownEmitted(S, OrigCallee)) { + assert(!S.CUDACallGraph.count(OrigCallee)); + return; + } + + // We've just discovered that OrigCallee is known-emitted. Walk our call + // graph to see what else we can now discover also must be emitted. + + struct CallInfo { + FunctionDecl *Caller; + FunctionDecl *Callee; + SourceLocation Loc; + }; + llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}}; + llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen; + Seen.insert(OrigCallee); + while (!Worklist.empty()) { + CallInfo C = Worklist.pop_back_val(); + assert(!IsKnownEmitted(S, C.Callee) && + "Worklist should not contain known-emitted functions."); + S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc}; + EmitDeferredDiags(S, C.Callee); + + // If this is a template instantiation, explore its callgraph as well: + // Non-dependent calls are part of the template's callgraph, while dependent + // calls are part of to the instantiation's call graph. + if (auto *Templ = C.Callee->getPrimaryTemplate()) { + FunctionDecl *TemplFD = Templ->getAsFunction(); + if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) { + Seen.insert(TemplFD); + Worklist.push_back( + {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc}); + } + } + + // Add all functions called by Callee to our worklist. + auto CGIt = S.CUDACallGraph.find(C.Callee); + if (CGIt == S.CUDACallGraph.end()) + continue; + + for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc : + CGIt->second) { + FunctionDecl *NewCallee = FDLoc.first; + SourceLocation CallLoc = FDLoc.second; + if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee)) + continue; + Seen.insert(NewCallee); + Worklist.push_back( + {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc}); + } + + // C.Callee is now known-emitted, so we no longer need to maintain its list + // of callees in CUDACallGraph. + S.CUDACallGraph.erase(CGIt); + } +} + +bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + assert(Callee && "Callee may not be null."); + // FIXME: Is bailing out early correct here? Should we instead assume that + // the caller is a global initializer? + FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); + if (!Caller) + return true; + + // If the caller is known-emitted, mark the callee as known-emitted. + // Otherwise, mark the call in our call graph so we can traverse it later. + bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); + if (CallerKnownEmitted) + MarkKnownEmitted(*this, Caller, Callee, Loc); + else { + // If we have + // host fn calls kernel fn calls host+device, + // the HD function does not get instantiated on the host. We model this by + // omitting at the call to the kernel from the callgraph. This ensures + // that, when compiling for host, only HD functions actually called from the + // host get marked as known-emitted. + if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) + CUDACallGraph[Caller].insert({Callee, Loc}); + } + + CUDADiagBuilder::Kind DiagKind = [&] { + switch (IdentifyCUDAPreference(Caller, Callee)) { + case CFP_Never: + return CUDADiagBuilder::K_Immediate; + case CFP_WrongSide: + assert(Caller && "WrongSide calls require a non-null caller"); + // If we know the caller will be emitted, we know this wrong-side call + // will be emitted, so it's an immediate error. Otherwise, defer the + // error until we know the caller is emitted. + return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack + : CUDADiagBuilder::K_Deferred; + default: + return CUDADiagBuilder::K_Nop; + } + }(); + + if (DiagKind == CUDADiagBuilder::K_Nop) + return true; + + // Avoid emitting this error twice for the same location. Using a hashtable + // like this is unfortunate, but because we must continue parsing as normal + // after encountering a deferred error, it's otherwise very tricky for us to + // ensure that we only emit this deferred error once. + if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) + return true; + + CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) + << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); + CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, + Caller, *this) + << Callee; + return DiagKind != CUDADiagBuilder::K_Immediate && + DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack; +} + +void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) + return; + FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); + if (!CurFn) + return; + CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); + if (Target == CFT_Global || Target == CFT_Device) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + } else if (Target == CFT_HostDevice) { + Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); + Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); + } +} + +void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, + const LookupResult &Previous) { + assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); + CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); + for (NamedDecl *OldND : Previous) { + FunctionDecl *OldFD = OldND->getAsFunction(); + if (!OldFD) + continue; + + CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); + // Don't allow HD and global functions to overload other functions with the + // same signature. We allow overloading based on CUDA attributes so that + // functions can have different implementations on the host and device, but + // HD/global functions "exist" in some sense on both the host and device, so + // should have the same implementation on both sides. + if (NewTarget != OldTarget && + ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || + (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && + !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, + /* ConsiderCudaAttrs = */ false)) { + Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) + << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; + Diag(OldFD->getLocation(), diag::note_previous_declaration); + NewFD->setInvalidDecl(); + break; + } + } +} + +template <typename AttrTy> +static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, + const FunctionDecl &TemplateFD) { + if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { + AttrTy *Clone = Attribute->clone(S.Context); + Clone->setInherited(true); + FD->addAttr(Clone); + } +} + +void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, + const FunctionTemplateDecl &TD) { + const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); + copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); + copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); + copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); +} |