diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp | 92 |
1 files changed, 52 insertions, 40 deletions
diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp index 6a04d4e..83febcb 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCUDANV.cpp @@ -15,6 +15,7 @@ #include "CGCUDARuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" +#include "ConstantBuilder.h" #include "clang/AST/Decl.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" @@ -29,7 +30,8 @@ namespace { class CGNVCUDARuntime : public CGCUDARuntime { private: - llvm::Type *IntTy, *SizeTy, *VoidTy; + llvm::IntegerType *IntTy, *SizeTy; + llvm::Type *VoidTy; llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy; /// Convenience reference to LLVM Context @@ -55,10 +57,18 @@ private: /// where the C code specifies const char*. llvm::Constant *makeConstantString(const std::string &Str, const std::string &Name = "", + const std::string &SectionName = "", unsigned Alignment = 0) { llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0), llvm::ConstantInt::get(SizeTy, 0)}; auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str()); + llvm::GlobalVariable *GV = + cast<llvm::GlobalVariable>(ConstStr.getPointer()); + if (!SectionName.empty()) + GV->setSection(SectionName); + if (Alignment) + GV->setAlignment(Alignment); + return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(), ConstStr.getPointer(), Zeros); } @@ -87,9 +97,9 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM) CodeGen::CodeGenTypes &Types = CGM.getTypes(); ASTContext &Ctx = CGM.getContext(); - IntTy = Types.ConvertType(Ctx.IntTy); - SizeTy = Types.ConvertType(Ctx.getSizeType()); - VoidTy = llvm::Type::getVoidTy(Context); + IntTy = CGM.IntTy; + SizeTy = CGM.SizeTy; + VoidTy = CGM.VoidTy; CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy)); VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy)); @@ -118,37 +128,28 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF, void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args) { - // Build the argument value list and the argument stack struct type. - SmallVector<llvm::Value *, 16> ArgValues; - std::vector<llvm::Type *> ArgTypes; - for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end(); - I != E; ++I) { - llvm::Value *V = CGF.GetAddrOfLocalVar(*I).getPointer(); - ArgValues.push_back(V); - assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType"); - ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType()); - } - llvm::StructType *ArgStackTy = llvm::StructType::get(Context, ArgTypes); - - llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); - - // Emit the calls to cudaSetupArgument + // Emit a call to cudaSetupArgument for each arg in Args. llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); - for (unsigned I = 0, E = Args.size(); I != E; ++I) { - llvm::Value *Args[3]; - llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); - Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy); - Args[1] = CGF.Builder.CreateIntCast( - llvm::ConstantExpr::getSizeOf(ArgTypes[I]), - SizeTy, false); - Args[2] = CGF.Builder.CreateIntCast( - llvm::ConstantExpr::getOffsetOf(ArgStackTy, I), - SizeTy, false); + llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); + CharUnits Offset = CharUnits::Zero(); + for (const VarDecl *A : Args) { + CharUnits TyWidth, TyAlign; + std::tie(TyWidth, TyAlign) = + CGM.getContext().getTypeInfoInChars(A->getType()); + Offset = Offset.alignTo(TyAlign); + llvm::Value *Args[] = { + CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(), + VoidPtrTy), + llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()), + llvm::ConstantInt::get(SizeTy, Offset.getQuantity()), + }; llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); + llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); CGF.EmitBlock(NextBlock); + Offset += TyWidth; } // Emit the call to cudaLaunch @@ -290,18 +291,29 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() { continue; } - // Create initialized wrapper structure that points to the loaded GPU binary - llvm::Constant *Values[] = { - llvm::ConstantInt::get(IntTy, 0x466243b1), // Fatbin wrapper magic. - llvm::ConstantInt::get(IntTy, 1), // Fatbin version. - makeConstantString(GpuBinaryOrErr.get()->getBuffer(), "", 16), // Data. - llvm::ConstantPointerNull::get(VoidPtrTy)}; // Unused in fatbin v1. - llvm::GlobalVariable *FatbinWrapper = new llvm::GlobalVariable( - TheModule, FatbinWrapperTy, true, llvm::GlobalValue::InternalLinkage, - llvm::ConstantStruct::get(FatbinWrapperTy, Values), - "__cuda_fatbin_wrapper"); + const char *FatbinConstantName = + CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin"; // NVIDIA's cuobjdump looks for fatbins in this section. - FatbinWrapper->setSection(".nvFatBinSegment"); + const char *FatbinSectionName = + CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment"; + + // Create initialized wrapper structure that points to the loaded GPU binary + ConstantInitBuilder Builder(CGM); + auto Values = Builder.beginStruct(FatbinWrapperTy); + // Fatbin wrapper magic. + Values.addInt(IntTy, 0x466243b1); + // Fatbin version. + Values.addInt(IntTy, 1); + // Data. + Values.add(makeConstantString(GpuBinaryOrErr.get()->getBuffer(), + "", FatbinConstantName, 8)); + // Unused in fatbin v1. + Values.add(llvm::ConstantPointerNull::get(VoidPtrTy)); + llvm::GlobalVariable *FatbinWrapper = + Values.finishAndCreateGlobal("__cuda_fatbin_wrapper", + CGM.getPointerAlign(), + /*constant*/ true); + FatbinWrapper->setSection(FatbinSectionName); // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper); llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall( |