summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r--contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp92
1 files changed, 52 insertions, 40 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
index 6a04d4e..83febcb 100644
--- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp
@@ -15,6 +15,7 @@
#include "CGCUDARuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
+#include "ConstantBuilder.h"
#include "clang/AST/Decl.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
@@ -29,7 +30,8 @@ namespace {
class CGNVCUDARuntime : public CGCUDARuntime {
private:
- llvm::Type *IntTy, *SizeTy, *VoidTy;
+ llvm::IntegerType *IntTy, *SizeTy;
+ llvm::Type *VoidTy;
llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
/// Convenience reference to LLVM Context
@@ -55,10 +57,18 @@ private:
/// where the C code specifies const char*.
llvm::Constant *makeConstantString(const std::string &Str,
const std::string &Name = "",
+ const std::string &SectionName = "",
unsigned Alignment = 0) {
llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
llvm::ConstantInt::get(SizeTy, 0)};
auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
+ llvm::GlobalVariable *GV =
+ cast<llvm::GlobalVariable>(ConstStr.getPointer());
+ if (!SectionName.empty())
+ GV->setSection(SectionName);
+ if (Alignment)
+ GV->setAlignment(Alignment);
+
return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
ConstStr.getPointer(), Zeros);
}
@@ -87,9 +97,9 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
CodeGen::CodeGenTypes &Types = CGM.getTypes();
ASTContext &Ctx = CGM.getContext();
- IntTy = Types.ConvertType(Ctx.IntTy);
- SizeTy = Types.ConvertType(Ctx.getSizeType());
- VoidTy = llvm::Type::getVoidTy(Context);
+ IntTy = CGM.IntTy;
+ SizeTy = CGM.SizeTy;
+ VoidTy = CGM.VoidTy;
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
@@ -118,37 +128,28 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
FunctionArgList &Args) {
- // Build the argument value list and the argument stack struct type.
- SmallVector<llvm::Value *, 16> ArgValues;
- std::vector<llvm::Type *> ArgTypes;
- for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
- I != E; ++I) {
- llvm::Value *V = CGF.GetAddrOfLocalVar(*I).getPointer();
- ArgValues.push_back(V);
- assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
- ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
- }
- llvm::StructType *ArgStackTy = llvm::StructType::get(Context, ArgTypes);
-
- llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
-
- // Emit the calls to cudaSetupArgument
+ // Emit a call to cudaSetupArgument for each arg in Args.
llvm::Constant *cudaSetupArgFn = getSetupArgumentFn();
- for (unsigned I = 0, E = Args.size(); I != E; ++I) {
- llvm::Value *Args[3];
- llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
- Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy);
- Args[1] = CGF.Builder.CreateIntCast(
- llvm::ConstantExpr::getSizeOf(ArgTypes[I]),
- SizeTy, false);
- Args[2] = CGF.Builder.CreateIntCast(
- llvm::ConstantExpr::getOffsetOf(ArgStackTy, I),
- SizeTy, false);
+ llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
+ CharUnits Offset = CharUnits::Zero();
+ for (const VarDecl *A : Args) {
+ CharUnits TyWidth, TyAlign;
+ std::tie(TyWidth, TyAlign) =
+ CGM.getContext().getTypeInfoInChars(A->getType());
+ Offset = Offset.alignTo(TyAlign);
+ llvm::Value *Args[] = {
+ CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
+ VoidPtrTy),
+ llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()),
+ llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
+ };
llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero);
+ llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock);
CGF.EmitBlock(NextBlock);
+ Offset += TyWidth;
}
// Emit the call to cudaLaunch
@@ -290,18 +291,29 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
continue;
}
- // Create initialized wrapper structure that points to the loaded GPU binary
- llvm::Constant *Values[] = {
- llvm::ConstantInt::get(IntTy, 0x466243b1), // Fatbin wrapper magic.
- llvm::ConstantInt::get(IntTy, 1), // Fatbin version.
- makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "", 16), // Data.
- llvm::ConstantPointerNull::get(VoidPtrTy)}; // Unused in fatbin v1.
- llvm::GlobalVariable *FatbinWrapper = new llvm::GlobalVariable(
- TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage,
- llvm::ConstantStruct::get(FatbinWrapperTy, Values),
- "__cuda_fatbin_wrapper");
+ const char *FatbinConstantName =
+ CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
// NVIDIA's cuobjdump looks for fatbins in this section.
- FatbinWrapper->setSection(".nvFatBinSegment");
+ const char *FatbinSectionName =
+ CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
+
+ // Create initialized wrapper structure that points to the loaded GPU binary
+ ConstantInitBuilder Builder(CGM);
+ auto Values = Builder.beginStruct(FatbinWrapperTy);
+ // Fatbin wrapper magic.
+ Values.addInt(IntTy, 0x466243b1);
+ // Fatbin version.
+ Values.addInt(IntTy, 1);
+ // Data.
+ Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(),
+ "", FatbinConstantName, 8));
+ // Unused in fatbin v1.
+ Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
+ llvm::GlobalVariable *FatbinWrapper =
+ Values.finishAndCreateGlobal("__cuda_fatbin_wrapper",
+ CGM.getPointerAlign(),
+ /*constant*/ true);
+ FatbinWrapper->setSection(FatbinSectionName);
// GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
OpenPOWER on IntegriCloud