diff options
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX')
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTX.td | 34 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h | 4 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 105 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 68 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 3 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 4 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h | 4 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp | 20 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 12 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 7 | ||||
-rw-r--r-- | contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h | 16 |
11 files changed, 229 insertions, 48 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td index ae7710e..7aee359 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td +++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td @@ -24,7 +24,30 @@ include "NVPTXInstrInfo.td" // - Need at least one feature to avoid generating zero sized array by // TableGen in NVPTXGenSubtarget.inc. //===----------------------------------------------------------------------===// -def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; + +// SM Versions +def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", + "Target SM 1.0">; +def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", + "Target SM 1.1">; +def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", + "Target SM 1.2">; +def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", + "Target SM 1.3">; +def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", + "Target SM 2.0">; +def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", + "Target SM 2.1">; +def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", + "Target SM 3.0">; +def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", + "Target SM 3.5">; + +// PTX Versions +def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", + "Use PTX version 3.0">; +def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", + "Use PTX version 3.1">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -33,7 +56,14 @@ def FeatureDummy : SubtargetFeature<"dummy", "dummy", "true", "">; class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [FeatureDummy]>; +def : Proc<"sm_10", [SM10]>; +def : Proc<"sm_11", [SM11]>; +def : Proc<"sm_12", [SM12]>; +def : Proc<"sm_13", [SM13]>; +def : Proc<"sm_20", [SM20]>; +def : Proc<"sm_21", [SM21]>; +def : Proc<"sm_30", [SM30]>; +def : Proc<"sm_35", [SM35]>; def NVPTXInstrInfo : InstrInfo { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 24b3bd5..c7cabf6 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -16,7 +16,7 @@ #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" namespace llvm { @@ -31,7 +31,7 @@ public: NVPTXAllocaHoisting() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); + AU.addRequired<DataLayout>(); AU.addPreserved<MachineFunctionAnalysis>(); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index f2b9616..0a885ce 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -68,7 +68,54 @@ static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", cl::location(llvm::InterleaveSrcInPtx)); +namespace { +/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V +/// depends. +void DiscoverDependentGlobals(Value *V, + DenseSet<GlobalVariable*> &Globals) { + if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) + Globals.insert(GV); + else { + if (User *U = dyn_cast<User>(V)) { + for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { + DiscoverDependentGlobals(U->getOperand(i), Globals); + } + } + } +} +/// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable +/// instances to be emitted, but only after any dependents have been added +/// first. +void VisitGlobalVariableForEmission(GlobalVariable *GV, + SmallVectorImpl<GlobalVariable*> &Order, + DenseSet<GlobalVariable*> &Visited, + DenseSet<GlobalVariable*> &Visiting) { + // Have we already visited this one? + if (Visited.count(GV)) return; + + // Do we have a circular dependency? + if (Visiting.count(GV)) + report_fatal_error("Circular dependency found in global variable set"); + + // Start visiting this global + Visiting.insert(GV); + + // Make sure we visit all dependents first + DenseSet<GlobalVariable*> Others; + for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) + DiscoverDependentGlobals(GV->getOperand(i), Others); + + for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), + E = Others.end(); I != E; ++I) + VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); + + // Now we can visit ourself + Order.push_back(GV); + Visited.insert(GV); + Visiting.erase(GV); +} +} // @TODO: This is a copy from AsmPrinter.cpp. The function is static, so we // cannot just link to the existing version. @@ -98,10 +145,10 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding - // opportunities. Attempt to fold the expression using TargetData as a + // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getTargetData())) + ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) return LowerConstant(C, AP); @@ -115,7 +162,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address const Constant *PtrVal = CE->getOperand(0); SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end()); @@ -145,7 +192,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { return LowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); @@ -155,7 +202,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { } case Instruction::PtrToInt: { - const TargetData &TD = *AP.TM.getTargetData(); + const DataLayout &TD = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -270,7 +317,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); const TargetLowering *TLI = TM.getTargetLowering(); Type *Ty = F->getReturnType(); @@ -874,7 +921,7 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(OutContext, *TM.getTargetData()); + Mang = new Mangler(OutContext, *TM.getDataLayout()); // Emit header before any dwarf directives are emitted below. emitHeader(M, OS1); @@ -893,10 +940,27 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitDeclarations(M, OS2); - // Print out module-level global variables here. + // As ptxas does not support forward references of globals, we need to first + // sort the list of module-level globals in def-use order. We visit each + // global variable in order, and ensure that we emit it *after* its dependent + // globals. We use a little extra memory maintaining both a set and a list to + // have fast searches while maintaining a strict ordering. + SmallVector<GlobalVariable*,8> Globals; + DenseSet<GlobalVariable*> GVVisited; + DenseSet<GlobalVariable*> GVVisiting; + + // Visit each global variable, in order for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - printModuleLevelGV(I, OS2); + I != E; ++I) + VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); + + assert(GVVisited.size() == M.getGlobalList().size() && + "Missed a global variable"); + assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); + + // Print out module-level global variables in proper order + for (unsigned i = 0, e = Globals.size(); i != e; ++i) + printModuleLevelGV(Globals[i], OS2); OS2 << '\n'; @@ -910,7 +974,8 @@ void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { O << "//\n"; O << "\n"; - O << ".version 3.0\n"; + unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); + O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; O << ".target "; O << nvptxSubtarget.getTargetName(); @@ -1023,7 +1088,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, return; } - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1296,7 +1361,7 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, raw_ostream &O) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. const PointerType *PTy = GVar->getType(); @@ -1342,7 +1407,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, static unsigned int -getOpenCLAlignment(const TargetData *TD, +getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) return TD->getPrefTypeAlignment(Ty); @@ -1421,7 +1486,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); const AttrListPtr &PAL = F->getAttributes(); const TargetLowering *TLI = TM.getTargetLowering(); Function::const_arg_iterator I, E; @@ -1456,7 +1521,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, continue; } - if (PAL.paramHasAttr(paramIndex+1, Attribute::ByVal) == false) { + if (PAL.getParamAttributes(paramIndex+1). + hasAttribute(Attributes::ByVal) == false) { // Just a scalar const PointerType *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { @@ -1524,6 +1590,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // <a> = PAL.getparamalignment // size = typeallocsize of element type unsigned align = PAL.getParamAlignment(paramIndex+1); + if (align == 0) + align = TD->getABITypeAlignment(ETy); + unsigned sz = TD->getTypeAllocSize(ETy); O << "\t.param .align " << align << " .b8 "; @@ -1714,7 +1783,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); @@ -1843,7 +1912,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, AggBuffer *aggBuffer) { - const TargetData *TD = TM.getTargetData(); + const DataLayout *TD = TM.getDataLayout(); int Bytes; // Old constants diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6ea10ea..f1a99d7 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -174,10 +174,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PTX does not support load / store predicate registers - setOperationAction(ISD::LOAD, MVT::i1, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::STORE, MVT::i1, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setOperationAction(ISD::STORE, MVT::i1, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::i32, MVT::i1, Expand); setTruncStoreAction(MVT::i16, MVT::i1, Expand); @@ -402,7 +403,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); - unsigned sz = getTargetData()->getTypeAllocSize(ETy); + unsigned sz = getDataLayout()->getTypeAllocSize(ETy); O << ".param .align " << align << " .b8 "; O << "_"; @@ -655,11 +656,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else { if (Func) { // direct call if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) - retAlignment = getTargetData()->getABITypeAlignment(retTy); + retAlignment = getDataLayout()->getABITypeAlignment(retTy); } else { // indirect call const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction()); if (!llvm::getAlign(*CallI, 0, retAlignment)) - retAlignment = getTargetData()->getABITypeAlignment(retTy); + retAlignment = getDataLayout()->getABITypeAlignment(retTy); } SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, @@ -856,11 +857,64 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_SUBVECTOR: return Op; case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: return LowerSTORE(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } + +// v = ld i1* addr +// => +// v1 = ld i8* addr +// v = trunc v1 to i1 +SDValue NVPTXTargetLowering:: +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(Node); + DebugLoc dl = Node->getDebugLoc(); + assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; + assert(Node->getValueType(0) == MVT::i1 && + "Custom lowering for i1 load only"); + SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + LD->getAlignment()); + SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); + // The legalizer (the caller) is expecting two values from the legalized + // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() + // in LegalizeDAG.cpp which also uses MergeValues. + SDValue Ops[] = {result, LD->getChain()}; + return DAG.getMergeValues(Ops, 2, dl); +} + +// st i1 v, addr +// => +// v1 = zxt v to i8 +// st i8, addr +SDValue NVPTXTargetLowering:: +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3 = ST->getValue(); + assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only"); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, + MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + return Result; +} + + SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, EVT v) const { @@ -916,7 +970,7 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - const TargetData *TD = getTargetData(); + const DataLayout *TD = getDataLayout(); const Function *F = MF.getFunction(); const AttrListPtr &PAL = F->getAttributes(); @@ -965,7 +1019,7 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.paramHasAttr(i+1, Attribute::ByVal) == false) { + if (PAL.getParamAttributes(i+1).hasAttribute(Attributes::ByVal) == false) { // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 86246e6..94a177c 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -138,6 +138,9 @@ private: SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 56b2372..9273931 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -21,7 +21,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" using namespace llvm; @@ -110,7 +110,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { SmallVector<MemTransferInst *, 4> aggrMemcpys; SmallVector<MemSetInst *, 4> aggrMemsets; - TargetData *TD = &getAnalysis<TargetData>(); + DataLayout *TD = &getAnalysis<DataLayout>(); LLVMContext &Context = F.getParent()->getContext(); // diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index ac7f150..b150c69 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -17,7 +17,7 @@ #include "llvm/Pass.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" namespace llvm { @@ -28,7 +28,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass { NVPTXLowerAggrCopies() : FunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<TargetData>(); + AU.addRequired<DataLayout>(); AU.addPreserved<MachineFunctionAnalysis>(); } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp index 6aadd43..7b62cce 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -34,16 +34,18 @@ DriverInterface(cl::desc("Choose driver interface:"), NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -:NVPTXGenSubtargetInfo(TT, "", FS), // Don't pass CPU to subtarget, - // because we don't register all - // nvptx targets. - Is64Bit(is64Bit) { +: NVPTXGenSubtargetInfo(TT, CPU, FS), + Is64Bit(is64Bit), + PTXVersion(0), + SmVersion(10) { drvInterface = DriverInterface; // Provide the default CPU if none std::string defCPU = "sm_10"; + ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); + // Get the TargetName from the FS if available if (FS.empty() && CPU.empty()) TargetName = defCPU; @@ -52,6 +54,12 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, else llvm_unreachable("we are not using FeatureStr"); - // Set up the SmVersion - SmVersion = atoi(TargetName.c_str()+3); + // We default to PTX 3.1, but we cannot just default to it in the initializer + // since the attribute parser checks if the given option is >= the default. + // So if we set ptx31 as the default, the ptx30 attribute would never match. + // Instead, we use 0 as the default and manually set 31 if the default is + // used. + if (PTXVersion == 0) { + PTXVersion = 31; + } } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h index 8f2a629..3cfd971 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h @@ -25,13 +25,17 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - - unsigned int SmVersion; + std::string TargetName; NVPTX::DrvInterface drvInterface; - bool dummy; // For the 'dummy' feature, see NVPTX.td bool Is64Bit; + // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned PTXVersion; + + // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 + unsigned int SmVersion; + public: /// This constructor initializes the data members to match that /// of the specified module. @@ -69,6 +73,8 @@ public: NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } std::string getTargetName() const { return TargetName; } + unsigned getPTXVersion() const { return PTXVersion; } + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); std::string getDataLayout() const { diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 433f415..cbb4900 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -32,7 +32,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -71,8 +71,9 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), Subtarget(TT, CPU, FS, is64bit), - DataLayout(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) + DL(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit), + STTI(&TLInfo), VTTI(&TLInfo) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { } diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h index b3f9cac..11bc9d4 100644 --- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -21,10 +21,11 @@ #include "NVPTXSubtarget.h" #include "NVPTXFrameLowering.h" #include "ManagedStringPool.h" -#include "llvm/Target/TargetData.h" +#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetTransformImpl.h" namespace llvm { @@ -32,7 +33,7 @@ namespace llvm { /// class NVPTXTargetMachine : public LLVMTargetMachine { NVPTXSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment + const DataLayout DL; // Calculates type size & alignment NVPTXInstrInfo InstrInfo; NVPTXTargetLowering TLInfo; TargetSelectionDAGInfo TSInfo; @@ -44,6 +45,9 @@ class NVPTXTargetMachine : public LLVMTargetMachine { // Hold Strings that can be free'd all together with NVPTXTargetMachine ManagedStringPool ManagedStrPool; + ScalarTargetTransformImpl STTI; + VectorTargetTransformImpl VTTI; + //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); @@ -58,7 +62,7 @@ public: return &FrameLowering; } virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout;} + virtual const DataLayout *getDataLayout() const { return &DL;} virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} virtual const NVPTXRegisterInfo *getRegisterInfo() const { @@ -72,6 +76,12 @@ public: virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + virtual const ScalarTargetTransformInfo *getScalarTargetTransformInfo()const { + return &STTI; + } + virtual const VectorTargetTransformInfo *getVectorTargetTransformInfo()const { + return &VTTI; + } //virtual bool addInstSelector(PassManagerBase &PM, // CodeGenOpt::Level OptLevel); |