diff options
Diffstat (limited to 'lib/Target/PTX')
31 files changed, 1830 insertions, 953 deletions
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index 331266d..ce08916 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -1,13 +1,11 @@ set(LLVM_TARGET_DEFINITIONS PTX.td) tablegen(PTXGenAsmWriter.inc -gen-asm-writer) +tablegen(PTXGenCallingConv.inc -gen-callingconv) tablegen(PTXGenDAGISel.inc -gen-dag-isel) -tablegen(PTXGenInstrInfo.inc -gen-instr-desc) -tablegen(PTXGenInstrNames.inc -gen-instr-enums) -tablegen(PTXGenRegisterInfo.inc -gen-register-desc) -tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PTXGenRegisterNames.inc -gen-register-enums) -tablegen(PTXGenSubtarget.inc -gen-subtarget) +tablegen(PTXGenInstrInfo.inc -gen-instr-info) +tablegen(PTXGenRegisterInfo.inc -gen-register-info) +tablegen(PTXGenSubtargetInfo.inc -gen-subtarget) add_llvm_target(PTXCodeGen PTXAsmPrinter.cpp @@ -15,7 +13,6 @@ add_llvm_target(PTXCodeGen PTXISelLowering.cpp PTXInstrInfo.cpp PTXFrameLowering.cpp - PTXMCAsmInfo.cpp PTXMCAsmStreamer.cpp PTXMFInfoExtract.cpp PTXRegisterInfo.cpp @@ -24,3 +21,4 @@ add_llvm_target(PTXCodeGen ) add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..df0f63f --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMPTXDesc + PTXMCTargetDesc.cpp + PTXMCAsmInfo.cpp + ) diff --git a/lib/Target/PTX/MCTargetDesc/Makefile b/lib/Target/PTX/MCTargetDesc/Makefile new file mode 100644 index 0000000..35f5a7b --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPTXDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp index b670abd..efefead 100644 --- a/lib/Target/PTX/PTXMCAsmInfo.cpp +++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp @@ -12,10 +12,15 @@ //===----------------------------------------------------------------------===// #include "PTXMCAsmInfo.h" +#include "llvm/ADT/Triple.h" using namespace llvm; PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::ptx64) + PointerSize = 8; + CommentString = "//"; PrivateGlobalPrefix = "$L__"; diff --git a/lib/Target/PTX/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h index 03f5d66..03f5d66 100644 --- a/lib/Target/PTX/PTXMCAsmInfo.h +++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp new file mode 100644 index 0000000..23f70bd --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -0,0 +1,60 @@ +//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "PTXMCTargetDesc.h" +#include "PTXMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "PTXGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "PTXGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createPTXMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitPTXMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializePTXMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); +} + +static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitPTXMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializePTXMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, + createPTXMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, + createPTXMCSubtargetInfo); +} + +extern "C" void LLVMInitializePTXMCAsmInfo() { + RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); + RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); +} diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h new file mode 100644 index 0000000..1003b0b --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXMCTARGETDESC_H +#define PTXMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target ThePTX32Target; +extern Target ThePTX64Target; + +} // End llvm namespace + +// Defines symbolic names for PTX registers. +#define GET_REGINFO_ENUM +#include "PTXGenRegisterInfo.inc" + +// Defines symbolic names for the PTX instructions. +#define GET_INSTRINFO_ENUM +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "PTXGenSubtargetInfo.inc" + +#endif diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile index 2c40d69..93dd38a 100644 --- a/lib/Target/PTX/Makefile +++ b/lib/Target/PTX/Makefile @@ -13,14 +13,12 @@ TARGET = PTX # Make sure that tblgen is run, first thing. BUILT_SOURCES = PTXGenAsmWriter.inc \ + PTXGenCallingConv.inc \ PTXGenDAGISel.inc \ PTXGenInstrInfo.inc \ - PTXGenInstrNames.inc \ PTXGenRegisterInfo.inc \ - PTXGenRegisterInfo.h.inc \ - PTXGenRegisterNames.inc \ - PTXGenSubtarget.inc + PTXGenSubtargetInfo.inc -DIRS = TargetInfo +DIRS = TargetInfo MCTargetDesc include $(LEVEL)/Makefile.common diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index ec2be92..28cab24 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -15,6 +15,7 @@ #ifndef PTX_H #define PTX_H +#include "MCTargetDesc/PTXMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -42,14 +43,6 @@ namespace llvm { FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); - extern Target ThePTX32Target; - extern Target ThePTX64Target; } // namespace llvm; -// Defines symbolic names for PTX registers. -#include "PTXGenRegisterNames.inc" - -// Defines symbolic names for the PTX instructions. -#include "PTXGenInstrNames.inc" - #endif // PTX_H diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 231866a..f6fbe9f 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -16,7 +16,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// -// Subtarget Features. +// Subtarget Features //===----------------------------------------------------------------------===// //===- Architectural Features ---------------------------------------------===// @@ -30,34 +30,54 @@ def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", //===- PTX Version --------------------------------------------------------===// def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", - "Use PTX Language Version 2.0", - []>; + "Use PTX Language Version 2.0">; def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", - "Use PTX Language Version 2.1", - [FeaturePTX20]>; + "Use PTX Language Version 2.1">; def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", - "Use PTX Language Version 2.2", - [FeaturePTX21]>; + "Use PTX Language Version 2.2">; def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", - "Use PTX Language Version 2.3", - [FeaturePTX22]>; - -//===- PTX Shader Model ---------------------------------------------------===// - -def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", - "Enable Shader Model 1.0 compliance">; -def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3", - "Enable Shader Model 1.3 compliance", - [FeatureSM10, FeatureDouble]>; -def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0", - "Enable Shader Model 2.0 compliance", - [FeatureSM13]>; + "Use PTX Language Version 2.3">; + +//===- PTX Target ---------------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", + "Use Shader Model 1.0">; +def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", + "Use Shader Model 1.1">; +def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", + "Use Shader Model 1.2">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", + "Use Shader Model 1.3">; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", + "Use Shader Model 2.0">; +def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", + "Use Shader Model 2.1">; +def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", + "Use Shader Model 2.2">; +def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", + "Use Shader Model 2.3">; + +def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", + "PTX_COMPUTE_1_0", + "Use Compute Compatibility 1.0">; +def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", + "PTX_COMPUTE_1_1", + "Use Compute Compatibility 1.1">; +def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", + "PTX_COMPUTE_1_2", + "Use Compute Compatibility 1.2">; +def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", + "PTX_COMPUTE_1_3", + "Use Compute Compatibility 1.3">; +def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", + "PTX_COMPUTE_2_0", + "Use Compute Compatibility 2.0">; //===----------------------------------------------------------------------===// -// PTX supported processors. +// PTX supported processors //===----------------------------------------------------------------------===// class Proc<string Name, list<SubtargetFeature> Features> @@ -65,6 +85,27 @@ class Proc<string Name, list<SubtargetFeature> Features> def : Proc<"generic", []>; +// Processor definitions for compute/shader models +def : Proc<"compute_10", [FeatureCOMPUTE10]>; +def : Proc<"compute_11", [FeatureCOMPUTE11]>; +def : Proc<"compute_12", [FeatureCOMPUTE12]>; +def : Proc<"compute_13", [FeatureCOMPUTE13]>; +def : Proc<"compute_20", [FeatureCOMPUTE20]>; +def : Proc<"sm_10", [FeatureSM10]>; +def : Proc<"sm_11", [FeatureSM11]>; +def : Proc<"sm_12", [FeatureSM12]>; +def : Proc<"sm_13", [FeatureSM13]>; +def : Proc<"sm_20", [FeatureSM20]>; +def : Proc<"sm_21", [FeatureSM21]>; +def : Proc<"sm_22", [FeatureSM22]>; +def : Proc<"sm_23", [FeatureSM23]>; + +// Processor definitions for common GPU architectures +def : Proc<"g80", [FeatureSM10]>; +def : Proc<"gt200", [FeatureSM13]>; +def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; +def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// @@ -72,6 +113,12 @@ def : Proc<"generic", []>; include "PTXRegisterInfo.td" //===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "PTXCallingConv.td" + +//===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 29c4781..2848d54 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -22,9 +22,12 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -34,6 +37,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -62,8 +66,13 @@ public: const char *Modifier = 0); void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier = 0); + void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + // autogen'd. void printInstruction(const MachineInstr *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); @@ -71,20 +80,23 @@ public: private: void EmitVariableDeclaration(const GlobalVariable *gv); void EmitFunctionDeclaration(); + + StringMap<unsigned> SourceIdMap; }; // class PTXAsmPrinter } // namespace static const char PARAM_PREFIX[] = "__param_"; +static const char RETURN_PREFIX[] = "__ret_"; static const char *getRegisterTypeName(unsigned RegNo) { #define TEST_REGCLS(cls, clsstr) \ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; - TEST_REGCLS(Preds, pred); - TEST_REGCLS(RRegu16, u16); - TEST_REGCLS(RRegu32, u32); - TEST_REGCLS(RRegu64, u64); - TEST_REGCLS(RRegf32, f32); - TEST_REGCLS(RRegf64, f64); + TEST_REGCLS(RegPred, pred); + TEST_REGCLS(RegI16, b16); + TEST_REGCLS(RegI32, b32); + TEST_REGCLS(RegI64, b64); + TEST_REGCLS(RegF32, b32); + TEST_REGCLS(RegF64, b64); #undef TEST_REGCLS llvm_unreachable("Not in any register class!"); @@ -162,6 +174,27 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + (ST.supportsDouble() ? "" : ", map_f64_to_f32"))); + // .address_size directive is optional, but it must immediately follow + // the .target directive if present within a module + if (ST.supportsPTX23()) { + std::string addrSize = ST.is64Bit() ? "64" : "32"; + OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize)); + } + + OutStreamer.AddBlankLine(); + + // Define any .file directives + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + DICompileUnit DIUnit(*I); + StringRef FN = DIUnit.getFilename(); + StringRef Dir = DIUnit.getDirectory(); + GetOrCreateSourceID(FN, Dir); + } + OutStreamer.AddBlankLine(); // declare global variables @@ -194,6 +227,21 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { def += ';'; OutStreamer.EmitRawText(Twine(def)); } + + const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); + DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() + << " frame object(s)\n"); + for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { + DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); + if (FrameInfo->getObjectSize(i) > 0) { + std::string def = "\t.reg .b"; + def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits + def += " s"; + def += utostr(i); + def += ";"; + OutStreamer.EmitRawText(Twine(def)); + } + } } void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -202,6 +250,54 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { raw_string_ostream OS(str); + DebugLoc DL = MI->getDebugLoc(); + if (!DL.isUnknown()) { + + const MDNode *S = DL.getScope(MF->getFunction()->getContext()); + + // This is taken from DwarfDebug.cpp, which is conveniently not a public + // LLVM class. + StringRef Fn; + StringRef Dir; + unsigned Src = 1; + if (S) { + DIDescriptor Scope(S); + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Fn = CU.getFilename(); + Dir = CU.getDirectory(); + } else if (Scope.isFile()) { + DIFile F(S); + Fn = F.getFilename(); + Dir = F.getDirectory(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Fn = SP.getFilename(); + Dir = SP.getDirectory(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Fn = DB.getFilename(); + Dir = DB.getDirectory(); + } else + assert(0 && "Unexpected scope info"); + + Src = GetOrCreateSourceID(Fn, Dir); + } + OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(), + 0, 0, 0, Fn); + + const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc(); + + OS << "\t.loc "; + OS << utostr(MDL.getFileNum()); + OS << " "; + OS << utostr(MDL.getLine()); + OS << " "; + OS << utostr(MDL.getColumn()); + OS << "\n"; + } + + // Emit predicate printPredicateOperand(MI, OS); @@ -275,6 +371,11 @@ void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; } +void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +} + void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // Check to see if this is a special global used by LLVM, if so, emit it. if (EmitSpecialLLVMGlobal(gv)) @@ -311,7 +412,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { decl += ".b8 "; decl += gvsym->getName(); decl += "["; - + if (elementTy->isArrayTy()) { assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); @@ -320,7 +421,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { elementTy = arrayTy->getElementType(); unsigned numElements = arrayTy->getNumElements(); - + while (elementTy->isArrayTy()) { arrayTy = dyn_cast<const ArrayType>(elementTy); @@ -336,17 +437,17 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // Compute the size of the array, in bytes. uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3) * numElements; - + decl += utostr(arraySize); } - + decl += "]"; - + // handle string constants (assume ConstantArray means string) - + if (gv->hasInitializer()) { - Constant *C = gv->getInitializer(); + const Constant *C = gv->getInitializer(); if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) { decl += " = {"; @@ -354,10 +455,11 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { if (i > 0) decl += ","; - - decl += "0x" + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); + + decl += "0x" + + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); } - + decl += "}"; } } @@ -393,17 +495,25 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); const bool isKernel = MFI->isKernel(); - unsigned reg; + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); std::string decl = isKernel ? ".entry" : ".func"; - // Print return register - reg = MFI->retReg(); - if (!isKernel && reg != PTX::NoRegister) { - decl += " (.reg ."; // FIXME: could it return in .param space? - decl += getRegisterTypeName(reg); - decl += " "; - decl += getRegisterName(reg); + unsigned cnt = 0; + + if (!isKernel) { + decl += " ("; + for (PTXMachineFunctionInfo::ret_iterator + i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); + } decl += ")"; } @@ -411,40 +521,31 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { decl += " "; decl += CurrentFnSym->getName().str(); - // Print parameter list - if (!MFI->argRegEmpty()) { - decl += " ("; - if (isKernel) { - unsigned cnt = 0; - for(PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - reg = *i; - assert(reg != PTX::NoRegister && "Not a valid register!"); - if (i != b) - decl += ", "; - decl += ".param ."; - decl += getRegisterTypeName(reg); - decl += " "; - decl += PARAM_PREFIX; - decl += utostr(++cnt); - } + decl += " ("; + + cnt = 0; + + // Print parameters + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + if (isKernel || ST.useParamSpaceForDeviceArgs()) { + decl += ".param .b"; + decl += utostr(*i); + decl += " "; + decl += PARAM_PREFIX; + decl += utostr(++cnt); } else { - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - reg = *i; - assert(reg != PTX::NoRegister && "Not a valid register!"); - if (i != b) - decl += ", "; - decl += ".reg ."; - decl += getRegisterTypeName(reg); - decl += " "; - decl += getRegisterName(reg); - } + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); } - decl += ")"; } + decl += ")"; OutStreamer.EmitRawText(Twine(decl)); } @@ -468,6 +569,33 @@ printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { } } +unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If FE did not provide a file name, then assume stdin. + if (FileName.empty()) + return GetOrCreateSourceID("<stdin>", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !sys::path::is_absolute(FileName)) { + SmallString<128> FullPathName = DirName; + sys::path::append(FullPathName, FileName); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } + + StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); + if (Entry.getValue()) + return Entry.getValue(); + + unsigned SrcId = SourceIdMap.size(); + Entry.setValue(SrcId); + + // Print out a .file directive to specify files for .loc directives. + OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); + + return SrcId; +} + #include "PTXGenAsmWriter.inc" // Force static initialization. diff --git a/lib/Target/PTX/PTXCallingConv.td b/lib/Target/PTX/PTXCallingConv.td new file mode 100644 index 0000000..3e3ff48 --- /dev/null +++ b/lib/Target/PTX/PTXCallingConv.td @@ -0,0 +1,29 @@ + +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, + CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, + CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, + CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, + CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> +]>; diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index b3c85da..9adfa62 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "PTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -41,8 +42,6 @@ class PTXDAGToDAGISel : public SelectionDAGISel { #include "PTXGenDAGISel.inc" private: - SDNode *SelectREAD_PARAM(SDNode *Node); - // We need this only because we can't match intruction BRAdp // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td SDNode *SelectBRCOND(SDNode *Node); @@ -67,8 +66,6 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { switch (Node->getOpcode()) { - case PTXISD::READ_PARAM: - return SelectREAD_PARAM(Node); case ISD::BRCOND: return SelectBRCOND(Node); default: @@ -76,37 +73,6 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { } } -SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) { - SDValue index = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); - unsigned opcode; - - if (index.getOpcode() != ISD::TargetConstant) - llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant"); - - if (Node->getValueType(0) == MVT::i16) { - opcode = PTX::LDpiU16; - } - else if (Node->getValueType(0) == MVT::i32) { - opcode = PTX::LDpiU32; - } - else if (Node->getValueType(0) == MVT::i64) { - opcode = PTX::LDpiU64; - } - else if (Node->getValueType(0) == MVT::f32) { - opcode = PTX::LDpiF32; - } - else if (Node->getValueType(0) == MVT::f64) { - opcode = PTX::LDpiF64; - } - else { - llvm_unreachable("Unknown parameter type for ld.param"); - } - - return PTXInstrInfo:: - GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index); -} - SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { assert(Node->getNumOperands() >= 3); diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index e9b1d8c..6fcf710 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -15,7 +15,9 @@ #include "PTXISelLowering.h" #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" +#include "PTXSubtarget.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -24,49 +26,80 @@ using namespace llvm; +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "PTXGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. - addRegisterClass(MVT::i1, PTX::PredsRegisterClass); - addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass); - addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass); - addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass); - addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass); - addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass); + addRegisterClass(MVT::i1, PTX::RegPredRegisterClass); + addRegisterClass(MVT::i16, PTX::RegI16RegisterClass); + addRegisterClass(MVT::i32, PTX::RegI32RegisterClass); + addRegisterClass(MVT::i64, PTX::RegI64RegisterClass); + addRegisterClass(MVT::f32, PTX::RegF32RegisterClass); + addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); setBooleanContents(ZeroOrOneBooleanContent); + setMinFunctionAlignment(2); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + //////////////////////////////////// + /////////// Expansion ////////////// + //////////////////////////////////// + + // (any/zero/sign) extload => load + (any/zero/sign) extend - // Turn i16 (z)extload into load + (z)extend setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); - - // Turn f32 extload into load + fextend - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - // Turn f64 truncstore into trunc + store. - setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // f32 extload => load + fextend + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // f64 truncstore => trunc + store + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // sign_extend_inreg => sign_extend + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // br_cc => brcond - // Customize translation of memory addresses - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - - // Expand BR_CC into BRCOND setOperationAction(ISD::BR_CC, MVT::Other, Expand); - // Expand SELECT_CC into SETCC + // select_cc => setcc + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - // need to lower SETCC of Preds into bitwise logic + //////////////////////////////////// + //////////// Legal ///////////////// + //////////////////////////////////// + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + //////////////////////////////////// + //////////// Custom //////////////// + //////////////////////////////////// + + // customise setcc to use bitwise logic if possible + setOperationAction(ISD::SETCC, MVT::i1, Custom); - setMinFunctionAlignment(2); + // customize translation of memory addresses + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); // Compute derived properties from the register classes computeRegisterProperties(); @@ -93,8 +126,10 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { llvm_unreachable("Unknown opcode"); case PTXISD::COPY_ADDRESS: return "PTXISD::COPY_ADDRESS"; - case PTXISD::READ_PARAM: - return "PTXISD::READ_PARAM"; + case PTXISD::LOAD_PARAM: + return "PTXISD::LOAD_PARAM"; + case PTXISD::STORE_PARAM: + return "PTXISD::STORE_PARAM"; case PTXISD::EXIT: return "PTXISD::EXIT"; case PTXISD::RET: @@ -113,18 +148,18 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Op2 = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - + // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic - + if (Op1.getOpcode() == ISD::Constant && (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || cast<ConstantSDNode>(Op1)->isNullValue()) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { - return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); } - + return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); } @@ -149,27 +184,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // Calling Convention Implementation //===----------------------------------------------------------------------===// -namespace { -struct argmap_entry { - MVT::SimpleValueType VT; - TargetRegisterClass *RC; - TargetRegisterClass::iterator loc; - - argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC) - : VT(_VT), RC(_RC), loc(_RC->begin()) {} - - void reset() { loc = RC->begin(); } - bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; } -} argmap[] = { - argmap_entry(MVT::i1, PTX::PredsRegisterClass), - argmap_entry(MVT::i16, PTX::RRegu16RegisterClass), - argmap_entry(MVT::i32, PTX::RRegu32RegisterClass), - argmap_entry(MVT::i64, PTX::RRegu64RegisterClass), - argmap_entry(MVT::f32, PTX::RRegf32RegisterClass), - argmap_entry(MVT::f64, PTX::RRegf64RegisterClass) -}; -} // end anonymous namespace - SDValue PTXTargetLowering:: LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -181,6 +195,7 @@ SDValue PTXTargetLowering:: if (isVarArg) llvm_unreachable("PTX does not support varargs"); MachineFunction &MF = DAG.getMachineFunction(); + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); switch (CallConv) { @@ -195,44 +210,76 @@ SDValue PTXTargetLowering:: break; } - // Make sure we don't add argument registers twice - if (MFI->isDoneAddArg()) - llvm_unreachable("cannot add argument registers twice"); - - // Reset argmap before allocation - for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap); - i != e; ++ i) - i->reset(); - - for (int i = 0, e = Ins.size(); i != e; ++ i) { - MVT::SimpleValueType VT = Ins[i].VT.SimpleTy; - - struct argmap_entry *entry = std::find(argmap, - argmap + array_lengthof(argmap), VT); - if (entry == argmap + array_lengthof(argmap)) - llvm_unreachable("Type of argument is not supported"); - - if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass) - llvm_unreachable("cannot pass preds to kernel"); - - MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); - - unsigned preg = *++(entry->loc); // allocate start from register 1 - unsigned vreg = RegInfo.createVirtualRegister(entry->RC); - RegInfo.addLiveIn(preg, vreg); - - MFI->addArgReg(preg); - - SDValue inval; - if (MFI->isKernel()) - inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain, - DAG.getTargetConstant(i, MVT::i32)); - else - inval = DAG.getCopyFromReg(Chain, dl, vreg, VT); - InVals.push_back(inval); + // We do one of two things here: + // IsKernel || SM >= 2.0 -> Use param space for arguments + // SM < 2.0 -> Use registers for arguments + if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { + // We just need to emit the proper LOAD_PARAM ISDs + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + + assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && + "Kernels cannot take pred operands"); + + SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + DAG.getTargetConstant(i, MVT::i32)); + InVals.push_back(ArgValue); + + // Instead of storing a physical register in our argument list, we just + // store the total size of the parameter, in bits. The ASM printer + // knows how to process this. + MFI->addArgReg(Ins[i].VT.getStoreSizeInBits()); + } + } + else { + // For device functions, we use the PTX calling convention to do register + // assignments then create CopyFromReg ISDs for the allocated registers + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, + *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_PTX); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + + CCValAssign& VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + TargetRegisterClass* TRC = 0; + + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + } + else { + llvm_unreachable("Unknown parameter type"); + } + + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); + MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + InVals.push_back(ArgValue); + + MFI->addArgReg(VA.getLocReg()); + } } - - MFI->doneAddArg(); return Chain; } @@ -254,51 +301,47 @@ SDValue PTXTargetLowering:: assert(Outs.size() == 0 && "Kernel must return void."); return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); case CallingConv::PTX_Device: - assert(Outs.size() <= 1 && "Can at most return one value."); + //assert(Outs.size() <= 1 && "Can at most return one value."); break; } - // PTX_Device - - // return void - if (Outs.size() == 0) - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); SDValue Flag; - unsigned reg; - if (Outs[0].VT == MVT::i16) { - reg = PTX::RH0; - } - else if (Outs[0].VT == MVT::i32) { - reg = PTX::R0; - } - else if (Outs[0].VT == MVT::i64) { - reg = PTX::RD0; - } - else if (Outs[0].VT == MVT::f32) { - reg = PTX::F0; - } - else { - assert(Outs[0].VT == MVT::f64 && "Can return only basic types"); - reg = PTX::FD0; - } + // Even though we could use the .param space for return arguments for + // device functions if SM >= 2.0 and the number of return arguments is + // only 1, we just always use registers since this makes the codegen + // easier. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); - MachineFunction &MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); - MFI->setRetReg(reg); + CCInfo.AnalyzeReturn(Outs, RetCC_PTX); + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign& VA = RVLocs[i]; - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) - DAG.getMachineFunction().getRegInfo().addLiveOut(reg); + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); - // Copy the result values into the output registers - Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag); + unsigned Reg = VA.getLocReg(); - // Guarantee that all emitted copies are stuck together, - // avoiding something bad - Flag = Chain.getValue(1); + DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); - return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); + Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad + Flag = Chain.getValue(1); + + MFI->addRetReg(Reg); + } + + if (Flag.getNode() == 0) { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + } + else { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); + } } diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index 225c000..4318541 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -24,12 +24,13 @@ class PTXTargetMachine; namespace PTXISD { enum NodeType { FIRST_NUMBER = ISD::BUILTIN_OP_END, - READ_PARAM, + LOAD_PARAM, + STORE_PARAM, EXIT, RET, COPY_ADDRESS }; -} // namespace PTXISD +} // namespace PTXISD class PTXTargetLowering : public TargetLowering { public: @@ -40,7 +41,7 @@ class PTXTargetLowering : public TargetLowering { virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -58,9 +59,9 @@ class PTXTargetLowering : public TargetLowering { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; - + private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; }; // class PTXTargetLowering diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td index e4e0999..8cee351 100644 --- a/lib/Target/PTX/PTXInstrFormats.td +++ b/lib/Target/PTX/PTXInstrFormats.td @@ -9,7 +9,7 @@ // PTX Predicate operand, default to (0, 0) = (zero-reg, always). // Leave PrintMethod empty; predicate printing is defined elsewhere. -def pred : PredicateOperand<OtherVT, (ops Preds, i32imm), +def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), (ops (i1 zero_reg), (i32 0))>; let Namespace = "PTX" in { diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index a12a6d0..425265a 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -18,27 +18,29 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; - +#define GET_INSTRINFO_CTOR #include "PTXGenInstrInfo.inc" +using namespace llvm; + PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) - : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)), + : PTXGenInstrInfo(), RI(_TM, *this), TM(_TM) {} static const struct map_entry { const TargetRegisterClass *cls; const int opcode; } map[] = { - { &PTX::RRegu16RegClass, PTX::MOVU16rr }, - { &PTX::RRegu32RegClass, PTX::MOVU32rr }, - { &PTX::RRegu64RegClass, PTX::MOVU64rr }, - { &PTX::RRegf32RegClass, PTX::MOVF32rr }, - { &PTX::RRegf64RegClass, PTX::MOVF64rr }, - { &PTX::PredsRegClass, PTX::MOVPREDrr } + { &PTX::RegI16RegClass, PTX::MOVU16rr }, + { &PTX::RegI32RegClass, PTX::MOVU32rr }, + { &PTX::RegI64RegClass, PTX::MOVU64rr }, + { &PTX::RegF32RegClass, PTX::MOVF32rr }, + { &PTX::RegF64RegClass, PTX::MOVF64rr }, + { &PTX::RegPredRegClass, PTX::MOVPREDrr } }; void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, @@ -47,8 +49,8 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { if (map[i].cls->contains(DstReg, SrcReg)) { - const TargetInstrDesc &TID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg). + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). addReg(SrcReg, getKillRegState(KillSrc)); AddDefaultPredicate(MI); return; @@ -69,8 +71,8 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) if (DstRC == map[i].cls) { - const TargetInstrDesc &TID = get(map[i].opcode); - MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg); + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg); AddDefaultPredicate(MI); return true; } @@ -155,7 +157,7 @@ DefinesPredicate(MachineInstr *MI, const MachineOperand &MO = MI->getOperand(0); - if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass) + if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass) return false; Pred.push_back(MO); @@ -178,13 +180,13 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock::const_iterator iter = MBB.end(); const MachineInstr& instLast1 = *--iter; - const TargetInstrDesc &desc1 = instLast1.getDesc(); + const MCInstrDesc &desc1 = instLast1.getDesc(); // for special case that MBB has only 1 instruction const bool IsSizeOne = MBB.size() == 1; // if IsSizeOne is true, *--iter and instLast2 are invalid // we put a dummy value in instLast2 and desc2 since they are used const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; - const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); + const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); DEBUG(dbgs() << "\n"); DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); @@ -288,6 +290,77 @@ InsertBranch(MachineBasicBlock &MBB, } } +// Memory operand folding for spills +void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineInstr& MI = *MII; + DebugLoc DL = MI.getDebugLoc(); + + DEBUG(dbgs() << "storeRegToStackSlot: " << MI); + + int OpCode; + + // Select the appropriate opcode based on the register class + if (RC == PTX::RegI16RegisterClass) { + OpCode = PTX::STACKSTOREI16; + } else if (RC == PTX::RegI32RegisterClass) { + OpCode = PTX::STACKSTOREI32; + } else if (RC == PTX::RegI64RegisterClass) { + OpCode = PTX::STACKSTOREI32; + } else if (RC == PTX::RegF32RegisterClass) { + OpCode = PTX::STACKSTOREF32; + } else if (RC == PTX::RegF64RegisterClass) { + OpCode = PTX::STACKSTOREF64; + } else { + llvm_unreachable("Unknown PTX register class!"); + } + + // Build the store instruction (really a mov) + MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); + MIB.addFrameIndex(FrameIdx); + MIB.addReg(SrcReg); + + AddDefaultPredicate(MIB); +} + +void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineInstr& MI = *MII; + DebugLoc DL = MI.getDebugLoc(); + + DEBUG(dbgs() << "loadRegToStackSlot: " << MI); + + int OpCode; + + // Select the appropriate opcode based on the register class + if (RC == PTX::RegI16RegisterClass) { + OpCode = PTX::STACKLOADI16; + } else if (RC == PTX::RegI32RegisterClass) { + OpCode = PTX::STACKLOADI32; + } else if (RC == PTX::RegI64RegisterClass) { + OpCode = PTX::STACKLOADI32; + } else if (RC == PTX::RegF32RegisterClass) { + OpCode = PTX::STACKLOADF32; + } else if (RC == PTX::RegF64RegisterClass) { + OpCode = PTX::STACKLOADF64; + } else { + llvm_unreachable("Unknown PTX register class!"); + } + + // Build the load instruction (really a mov) + MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); + MIB.addReg(DestReg); + MIB.addFrameIndex(FrameIdx); + + AddDefaultPredicate(MIB); +} + // static helper routines MachineSDNode *PTXInstrInfo:: @@ -316,7 +389,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { } bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { - const TargetInstrDesc &desc = inst.getDesc(); + const MCInstrDesc &desc = inst.getDesc(); return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); } diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h index a04be77..871f1ac 100644 --- a/lib/Target/PTX/PTXInstrInfo.h +++ b/lib/Target/PTX/PTXInstrInfo.h @@ -17,6 +17,9 @@ #include "PTXRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#define GET_INSTRINFO_HEADER +#include "PTXGenInstrInfo.inc" + namespace llvm { class PTXTargetMachine; @@ -24,7 +27,7 @@ class MachineSDNode; class SDValue; class SelectionDAG; -class PTXInstrInfo : public TargetInstrInfoImpl { +class PTXInstrInfo : public PTXGenInstrInfo { private: const PTXRegisterInfo RI; PTXTargetMachine &TM; @@ -84,6 +87,29 @@ public: const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; + // Memory operand folding for spills + // TODO: Implement this eventually and get rid of storeRegToStackSlot and + // loadRegFromStackSlot. Doing so will get rid of the "stack" registers + // we currently use to spill, though I doubt the overall effect on ptxas + // output will be large. I have yet to see a case where ptxas is unable + // to see through the "stack" register usage and hence generates + // efficient code anyway. + // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + // MachineInstr* MI, + // const SmallVectorImpl<unsigned> &Ops, + // int FrameIndex) const; + + virtual void storeRegToStackSlot(MachineBasicBlock& MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass* RC, + const TargetRegisterInfo* TRI) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + // static helper routines static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index d5d08be..6bfe906 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; // Shader Model Support -def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">; -def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">; -def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">; -def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">; +def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; +def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; +def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; +def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; // PTX Version Support def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; @@ -143,11 +143,11 @@ def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; // Address operands def MEMri32 : Operand<i32> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RRegu32, i32imm); + let MIOperandInfo = (ops RegI32, i32imm); } def MEMri64 : Operand<i64> { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RRegu64, i64imm); + let MIOperandInfo = (ops RegI64, i64imm); } def MEMii32 : Operand<i32> { let PrintMethod = "printMemOperand"; @@ -163,6 +163,10 @@ def MEMpi : Operand<i32> { let PrintMethod = "printParamOperand"; let MIOperandInfo = (ops i32imm); } +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} // Branch & call targets have OtherVT type. def brtarget : Operand<OtherVT>; @@ -180,181 +184,190 @@ def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; def PTXexit : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; def PTXret - : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>; + : SDNode<"PTXISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def PTXcopyaddress : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// //===- Floating-Point Instructions - 2 Operand Form -----------------------===// multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> { - def rr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), + def rr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), !strconcat(opcstr, ".f32\t$d, $a"), - [(set RRegf32:$d, (opnode RRegf32:$a))]>; - def ri32 : InstPTX<(outs RRegf32:$d), + [(set RegF32:$d, (opnode RegF32:$a))]>; + def ri32 : InstPTX<(outs RegF32:$d), (ins f32imm:$a), !strconcat(opcstr, ".f32\t$d, $a"), - [(set RRegf32:$d, (opnode fpimm:$a))]>; - def rr64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), + [(set RegF32:$d, (opnode fpimm:$a))]>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), !strconcat(opcstr, ".f64\t$d, $a"), - [(set RRegf64:$d, (opnode RRegf64:$a))]>; - def ri64 : InstPTX<(outs RRegf64:$d), + [(set RegF64:$d, (opnode RegF64:$a))]>; + def ri64 : InstPTX<(outs RegF64:$d), (ins f64imm:$a), !strconcat(opcstr, ".f64\t$d, $a"), - [(set RRegf64:$d, (opnode fpimm:$a))]>; + [(set RegF64:$d, (opnode fpimm:$a))]>; } //===- Floating-Point Instructions - 3 Operand Form -----------------------===// multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> { - def rr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b), + def rr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>; - def ri32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, f32imm:$b), + [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>; + def ri32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>; - def rr64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b), + [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>; - def ri64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, f64imm:$b), + [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>; + def ri64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>; + [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>; } //===- Floating-Point Instructions - 4 Operand Form -----------------------===// multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> { - def rrr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b, RRegf32:$c), + def rrr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b, RegF32:$c), !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, - RRegf32:$b), - RRegf32:$c))]>; - def rri32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b, f32imm:$c), + [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, + RegF32:$b), + RegF32:$c))]>; + def rri32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b, f32imm:$c), !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, - RRegf32:$b), + [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, + RegF32:$b), fpimm:$c))]>; - def rrr64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b, RRegf64:$c), + def rrr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b, RegF64:$c), !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, - RRegf64:$b), - RRegf64:$c))]>; - def rri64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b, f64imm:$c), + [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, + RegF64:$b), + RegF64:$c))]>; + def rri64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b, f64imm:$c), !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, - RRegf64:$b), + [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, + RegF64:$b), fpimm:$c))]>; } multiclass INT3<string opcstr, SDNode opnode> { - def rr16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, RRegu16:$b), + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; - def ri16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, i16imm:$b), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), !strconcat(opcstr, ".u16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, RRegu32:$b), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; - def ri32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, i32imm:$b), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), !strconcat(opcstr, ".u32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, RRegu64:$b), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; - def ri64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, i64imm:$b), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), !strconcat(opcstr, ".u64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } multiclass PTX_LOGIC<string opcstr, SDNode opnode> { - def ripreds : InstPTX<(outs Preds:$d), - (ins Preds:$a, i1imm:$b), + def ripreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, i1imm:$b), !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set Preds:$d, (opnode Preds:$a, imm:$b))]>; - def rrpreds : InstPTX<(outs Preds:$d), - (ins Preds:$a, Preds:$b), + [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>; + def rrpreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, RegPred:$b), !strconcat(opcstr, ".pred\t$d, $a, $b"), - [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>; - def rr16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, RRegu16:$b), + [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>; + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; - def ri16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, i16imm:$b), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), !strconcat(opcstr, ".b16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; - def rr32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, RRegu32:$b), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; - def ri32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, i32imm:$b), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), !strconcat(opcstr, ".b32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; - def rr64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, RRegu64:$b), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; - def ri64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, i64imm:$b), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), !strconcat(opcstr, ".b64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } multiclass INT3ntnc<string opcstr, SDNode opnode> { - def rr16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, RRegu16:$b), + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; - def rr32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, RRegu32:$b), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; - def rr64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, RRegu64:$b), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; - def ri16 : InstPTX<(outs RRegu16:$d), - (ins RRegu16:$a, i16imm:$b), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; - def ri32 : InstPTX<(outs RRegu32:$d), - (ins RRegu32:$a, i32imm:$b), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; - def ri64 : InstPTX<(outs RRegu64:$d), - (ins RRegu64:$a, i64imm:$b), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; - def ir16 : InstPTX<(outs RRegu16:$d), - (ins i16imm:$a, RRegu16:$b), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; + def ir16 : InstPTX<(outs RegI16:$d), + (ins i16imm:$a, RegI16:$b), !strconcat(opcstr, "16\t$d, $a, $b"), - [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>; - def ir32 : InstPTX<(outs RRegu32:$d), - (ins i32imm:$a, RRegu32:$b), + [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>; + def ir32 : InstPTX<(outs RegI32:$d), + (ins i32imm:$a, RegI32:$b), !strconcat(opcstr, "32\t$d, $a, $b"), - [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>; - def ir64 : InstPTX<(outs RRegu64:$d), - (ins i64imm:$a, RRegu64:$b), + [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>; + def ir64 : InstPTX<(outs RegI64:$d), + (ins i64imm:$a, RegI64:$b), !strconcat(opcstr, "64\t$d, $a, $b"), - [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>; + [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; } multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, @@ -362,63 +375,63 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, // TODO support 5-operand format: p|q, a, b, c def rr - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>; + [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>; def ri - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>; + [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>; def rr_and_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_and_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_or_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_or_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_xor_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_xor_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_and_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; def ri_and_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; def rr_or_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; def ri_or_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; def rr_xor_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; def ri_xor_not_r - : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; } multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, @@ -426,74 +439,74 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, // TODO support 5-operand format: p|q, a, b, c def rr_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>; + [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>; def rr_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), - [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>; + [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; def rr_and_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_and_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_or_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_or_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_xor_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_xor_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_and_not_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; def rr_and_not_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; def rr_or_not_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; def rr_or_not_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; def rr_xor_not_r_u - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; def rr_xor_not_r_o - : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; } multiclass PTX_SELP<RegisterClass RC, string regclsname> { def rr - : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c), + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), - [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>; + [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; } multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { @@ -524,11 +537,11 @@ multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_lo } multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { - defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>; - defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>; - defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>; - defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>; - defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>; + defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; } multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { @@ -559,11 +572,11 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_st } multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { - defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>; - defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>; - defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>; - defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>; - defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>; + defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; } //===----------------------------------------------------------------------===// @@ -584,44 +597,59 @@ defm REM : INT3<"rem", urem>; defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; // Standard Binary Operations -defm FADD : PTX_FLOAT_3OP<"add", fadd>; -defm FSUB : PTX_FLOAT_3OP<"sub", fsub>; -defm FMUL : PTX_FLOAT_3OP<"mul", fmul>; - -// TODO: Allow user selection of rounding modes for fdiv. -// For division, we need to have f32 and f64 differently. -// For f32, we just always use .approx since it is supported on all hardware -// for PTX 1.4+, which is our minimum target. -def FDIVrr32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, RRegf32:$b), - "div.approx.f32\t$d, $a, $b", - [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>; -def FDIVri32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a, f32imm:$b), - "div.approx.f32\t$d, $a, $b", - [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>; - -// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0. -def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b), +defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>; +defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>; +defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>; + +// For floating-point division: +// SM_13+ defaults to .rn for f32 and f64, +// SM10 must *not* provide a rounding + +// TODO: +// - Allow user selection of rounding modes for fdiv +// - Add support for -prec-div=false (.approx) + +def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), + "div.rn.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVri32SM13 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), + "div.rn.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), + "div.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, + Requires<[FDivNoRoundingMode]>; +def FDIVri32SM10 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), + "div.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, + Requires<[FDivNoRoundingMode]>; + +def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), "div.rn.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, - Requires<[SupportsSM13]>; -def FDIVri64SM13 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, f64imm:$b), + [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVri64SM13 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), "div.rn.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, - Requires<[SupportsSM13]>; -def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, RRegf64:$b), + [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), "div.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, - Requires<[DoesNotSupportSM13]>; -def FDIVri64SM10 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a, f64imm:$b), + [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, + Requires<[FDivNoRoundingMode]>; +def FDIVri64SM10 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), "div.f64\t$d, $a, $b", - [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, - Requires<[DoesNotSupportSM13]>; + [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, + Requires<[FDivNoRoundingMode]>; @@ -633,40 +661,42 @@ def FDIVri64SM10 : InstPTX<(outs RRegf64:$d), // In the short term, mad is supported on all PTX versions and we use a // default rounding mode no matter what shader model or PTX version. // TODO: Allow the rounding mode to be selectable through llc. -defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>; -defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>; +defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, + Requires<[FMadNeedsRoundingMode, SupportsFMA]>; +defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, + Requires<[FMadNoRoundingMode, SupportsFMA]>; ///===- Floating-Point Intrinsic Instructions -----------------------------===// -def FSQRT32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), +def FSQRT32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), "sqrt.rn.f32\t$d, $a", - [(set RRegf32:$d, (fsqrt RRegf32:$a))]>; + [(set RegF32:$d, (fsqrt RegF32:$a))]>; -def FSQRT64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), +def FSQRT64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), "sqrt.rn.f64\t$d, $a", - [(set RRegf64:$d, (fsqrt RRegf64:$a))]>; + [(set RegF64:$d, (fsqrt RegF64:$a))]>; -def FSIN32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), +def FSIN32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), "sin.approx.f32\t$d, $a", - [(set RRegf32:$d, (fsin RRegf32:$a))]>; + [(set RegF32:$d, (fsin RegF32:$a))]>; -def FSIN64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), +def FSIN64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), "sin.approx.f64\t$d, $a", - [(set RRegf64:$d, (fsin RRegf64:$a))]>; + [(set RegF64:$d, (fsin RegF64:$a))]>; -def FCOS32 : InstPTX<(outs RRegf32:$d), - (ins RRegf32:$a), +def FCOS32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), "cos.approx.f32\t$d, $a", - [(set RRegf32:$d, (fcos RRegf32:$a))]>; + [(set RegF32:$d, (fcos RegF32:$a))]>; -def FCOS64 : InstPTX<(outs RRegf64:$d), - (ins RRegf64:$a), +def FCOS64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), "cos.approx.f64\t$d, $a", - [(set RRegf64:$d, (fcos RRegf64:$a))]>; + [(set RegF64:$d, (fcos RegF64:$a))]>; ///===- Comparison and Selection Instructions -----------------------------===// @@ -675,56 +705,68 @@ def FCOS64 : InstPTX<(outs RRegf64:$d), // Compare u16 -defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ, "eq">; -defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE, "ne">; -defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">; -defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">; -defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">; -defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">; +defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">; +defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">; +defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">; +defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">; +defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">; +defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">; +defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">; +defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">; +defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">; +defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">; // Compare u32 -defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ, "eq">; -defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE, "ne">; -defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">; -defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">; -defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">; -defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">; +defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">; +defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">; +defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">; +defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">; +defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">; +defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">; +defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">; +defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">; +defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">; +defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">; // Compare u64 -defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ, "eq">; -defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE, "ne">; -defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">; -defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">; -defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">; -defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">; +defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">; +defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">; +defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">; +defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">; +defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">; +defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">; +defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">; +defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">; +defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">; +defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; // Compare f32 -defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">; -defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">; -defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">; -defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">; -defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">; -defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">; +defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">; // Compare f64 -defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">; -defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">; -defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">; -defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">; -defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">; -defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">; +defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">; // .selp -defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">; -defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">; -defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">; -defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">; -defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">; +defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">; +defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">; +defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">; +defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">; +defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">; ///===- Logic and Shift Instructions --------------------------------------===// @@ -740,47 +782,47 @@ defm XOR : PTX_LOGIC<"xor", xor>; let neverHasSideEffects = 1 in { def MOVPREDrr - : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; + : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; def MOVU16rr - : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>; + : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>; def MOVU32rr - : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>; + : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>; def MOVU64rr - : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>; + : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>; def MOVF32rr - : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>; + : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>; def MOVF64rr - : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>; + : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOVPREDri - : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a", - [(set Preds:$d, imm:$a)]>; + : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a", + [(set RegPred:$d, imm:$a)]>; def MOVU16ri - : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", - [(set RRegu16:$d, imm:$a)]>; + : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", + [(set RegI16:$d, imm:$a)]>; def MOVU32ri - : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RRegu32:$d, imm:$a)]>; + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, imm:$a)]>; def MOVU64ri - : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RRegu64:$d, imm:$a)]>; + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, imm:$a)]>; def MOVF32ri - : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", - [(set RRegf32:$d, fpimm:$a)]>; + : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", + [(set RegF32:$d, fpimm:$a)]>; def MOVF64ri - : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", - [(set RRegf64:$d, fpimm:$a)]>; + : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", + [(set RegF64:$d, fpimm:$a)]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOVaddr32 - : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", - [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>; + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>; def MOVaddr64 - : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", - [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>; + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; } // Loads @@ -789,17 +831,48 @@ defm LDc : PTX_LD_ALL<"ld.const", load_constant>; defm LDl : PTX_LD_ALL<"ld.local", load_local>; defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; -// This is a special instruction that is manually inserted for kernel parameters -def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a), - "ld.param.u16\t$d, [$a]", []>; -def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a), - "ld.param.u32\t$d, [$a]", []>; -def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a), - "ld.param.u64\t$d, [$a]", []>; -def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a), - "ld.param.f32\t$d, [$a]", []>; -def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a), - "ld.param.f64\t$d, [$a]", []>; +// These instructions are used to load/store from the .param space for +// device and kernel parameters + +let hasSideEffects = 1 in { + def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a), + "ld.param.pred\t$d, [$a]", + [(set RegPred:$d, (PTXloadparam timm:$a))]>; + def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", + [(set RegI16:$d, (PTXloadparam timm:$a))]>; + def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", + [(set RegI32:$d, (PTXloadparam timm:$a))]>; + def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", + [(set RegI64:$d, (PTXloadparam timm:$a))]>; + def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", + [(set RegF32:$d, (PTXloadparam timm:$a))]>; + def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", + [(set RegF64:$d, (PTXloadparam timm:$a))]>; + + def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a), + "st.param.pred\t[$d], $a", + [(PTXstoreparam timm:$d, RegPred:$a)]>; + def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a), + "st.param.u16\t[$d], $a", + [(PTXstoreparam timm:$d, RegI16:$a)]>; + def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a), + "st.param.u32\t[$d], $a", + [(PTXstoreparam timm:$d, RegI32:$a)]>; + def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a), + "st.param.u64\t[$d], $a", + [(PTXstoreparam timm:$d, RegI64:$a)]>; + def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a), + "st.param.f32\t[$d], $a", + [(PTXstoreparam timm:$d, RegF32:$a)]>; + def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a), + "st.param.f64\t[$d], $a", + [(PTXstoreparam timm:$d, RegF64:$a)]>; +} // Stores defm STg : PTX_ST_ALL<"st.global", store_global>; @@ -811,136 +884,174 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>; // TODO: Do something with st.param if/when it is needed. // Conversion to pred - +// PTX does not directly support converting to a predicate type, so we fake it +// by performing a greater-than test between the value and zero. This follows +// the C convention that any non-zero value is equivalent to 'true'. def CVT_pred_u16 - : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a", - [(set Preds:$d, (trunc RRegu16:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI16:$a))]>; def CVT_pred_u32 - : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a", - [(set Preds:$d, (trunc RRegu32:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI32:$a))]>; def CVT_pred_u64 - : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a", - [(set Preds:$d, (trunc RRegu64:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI64:$a))]>; def CVT_pred_f32 - : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a", - [(set Preds:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0", + [(set RegPred:$d, (fp_to_uint RegF32:$a))]>; def CVT_pred_f64 - : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a", - [(set Preds:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0", + [(set RegPred:$d, (fp_to_uint RegF64:$a))]>; // Conversion to u16 +// PTX does not directly support converting a predicate to a value, so we +// use a select instruction to select either 0 or 1 (integer or fp) based +// on the truth value of the predicate. +def CVT_u16_preda + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (anyext RegPred:$a))]>; def CVT_u16_pred - : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a", - [(set RRegu16:$d, (zext Preds:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (zext RegPred:$a))]>; + +def CVT_u16_preds + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (sext RegPred:$a))]>; def CVT_u16_u32 - : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a", - [(set RRegu16:$d, (trunc RRegu32:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", + [(set RegI16:$d, (trunc RegI32:$a))]>; def CVT_u16_u64 - : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a", - [(set RRegu16:$d, (trunc RRegu64:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", + [(set RegI16:$d, (trunc RegI64:$a))]>; def CVT_u16_f32 - : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a", - [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", + [(set RegI16:$d, (fp_to_uint RegF32:$a))]>; def CVT_u16_f64 - : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a", - [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", + [(set RegI16:$d, (fp_to_uint RegF64:$a))]>; // Conversion to u32 def CVT_u32_pred - : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", - [(set RRegu32:$d, (zext Preds:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", + [(set RegI32:$d, (zext RegPred:$a))]>; + +def CVT_u32_b16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", + [(set RegI32:$d, (anyext RegI16:$a))]>; def CVT_u32_u16 - : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a", - [(set RRegu32:$d, (zext RRegu16:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", + [(set RegI32:$d, (zext RegI16:$a))]>; + +def CVT_u32_preds + : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", + [(set RegI32:$d, (sext RegPred:$a))]>; + +def CVT_u32_s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a", + [(set RegI32:$d, (sext RegI16:$a))]>; def CVT_u32_u64 - : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a", - [(set RRegu32:$d, (trunc RRegu64:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", + [(set RegI32:$d, (trunc RegI64:$a))]>; def CVT_u32_f32 - : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a", - [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", + [(set RegI32:$d, (fp_to_uint RegF32:$a))]>; def CVT_u32_f64 - : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a", - [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", + [(set RegI32:$d, (fp_to_uint RegF64:$a))]>; // Conversion to u64 def CVT_u64_pred - : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a", - [(set RRegu64:$d, (zext Preds:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", + [(set RegI64:$d, (zext RegPred:$a))]>; + +def CVT_u64_preds + : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", + [(set RegI64:$d, (sext RegPred:$a))]>; def CVT_u64_u16 - : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a", - [(set RRegu64:$d, (zext RRegu16:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", + [(set RegI64:$d, (zext RegI16:$a))]>; + +def CVT_u64_s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a", + [(set RegI64:$d, (sext RegI16:$a))]>; def CVT_u64_u32 - : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a", - [(set RRegu64:$d, (zext RRegu32:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", + [(set RegI64:$d, (zext RegI32:$a))]>; + +def CVT_u64_s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a", + [(set RegI64:$d, (sext RegI32:$a))]>; def CVT_u64_f32 - : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a", - [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", + [(set RegI64:$d, (fp_to_uint RegF32:$a))]>; def CVT_u64_f64 - : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a", - [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>; + : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", + [(set RegI64:$d, (fp_to_uint RegF64:$a))]>; // Conversion to f32 def CVT_f32_pred - : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a", - [(set RRegf32:$d, (uint_to_fp Preds:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegPred:$a), + "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0 + [(set RegF32:$d, (uint_to_fp RegPred:$a))]>; def CVT_f32_u16 - : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a", - [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI16:$a))]>; def CVT_f32_u32 - : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a", - [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI32:$a))]>; def CVT_f32_u64 - : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a", - [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI64:$a))]>; def CVT_f32_f64 - : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a", - [(set RRegf32:$d, (fround RRegf64:$a))]>; + : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", + [(set RegF32:$d, (fround RegF64:$a))]>; // Conversion to f64 def CVT_f64_pred - : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a", - [(set RRegf64:$d, (uint_to_fp Preds:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegPred:$a), + "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0 + [(set RegF64:$d, (uint_to_fp RegPred:$a))]>; def CVT_f64_u16 - : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a", - [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI16:$a))]>; def CVT_f64_u32 - : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a", - [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI32:$a))]>; def CVT_f64_u64 - : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a", - [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI64:$a))]>; def CVT_f64_f32 - : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a", - [(set RRegf64:$d, (fextend RRegf32:$a))]>; + : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", + [(set RegF64:$d, (fextend RegF32:$a))]>; ///===- Control Flow Instructions -----------------------------------------===// @@ -951,7 +1062,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isBranch = 1, isTerminator = 1 in { // FIXME: The pattern part is blank because I cannot (or do not yet know - // how to) use the first operand of PredicateOperand (a Preds register) here + // how to) use the first operand of PredicateOperand (a RegPred register) here def BRAdp : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [/*(brcond pred:$_p, bb:$d)*/]>; @@ -962,6 +1073,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; } +///===- Spill Instructions ------------------------------------------------===// +// Special instructions used for stack spilling +def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a), + "mov.u16\ts$d, $a", []>; +def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a), + "mov.u32\ts$d, $a", []>; +def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a), + "mov.u64\ts$d, $a", []>; +def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a), + "mov.f32\ts$d, $a", []>; +def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a), + "mov.f64\ts$d, $a", []>; + +def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a), + "mov.u16\t$d, s$a", []>; +def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a), + "mov.u32\t$d, s$a", []>; +def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a), + "mov.u64\t$d, s$a", []>; +def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a), + "mov.f32\t$d, s$a", []>; +def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a), + "mov.f64\t$d, s$a", []>; + ///===- Intrinsic Instructions --------------------------------------------===// include "PTXIntrinsicInstrInfo.td" diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 320934a..8d97909 100644 --- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -14,14 +14,14 @@ // PTX Special Purpose Register Accessor Intrinsics class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> - : InstPTX<(outs RRegu64:$d), (ins), + : InstPTX<(outs RegI64:$d), (ins), !strconcat("mov.u64\t$d, %", regname), - [(set RRegu64:$d, (intop))]>; + [(set RegI64:$d, (intop))]>; class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> - : InstPTX<(outs RRegu32:$d), (ins), + : InstPTX<(outs RegI32:$d), (ins), !strconcat("mov.u32\t$d, %", regname), - [(set RRegu32:$d, (intop))]>; + [(set RegI32:$d, (intop))]>; // TODO Add read vector-version of special registers diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp index 1574670..b13a3da 100644 --- a/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -23,7 +23,6 @@ #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetAsmInfo.h" using namespace llvm; @@ -115,7 +114,8 @@ public: virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label); + const MCSymbol *Label, + unsigned PointerSize); virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); @@ -260,7 +260,8 @@ void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias, void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label) { + const MCSymbol *Label, + unsigned PointerSize) { report_fatal_error("Unimplemented."); } @@ -367,7 +368,7 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, int64_t IntValue; if (!Value->EvaluateAsAbsolute(IntValue)) report_fatal_error("Don't know how to emit this value."); - if (getContext().getTargetAsmInfo().isLittleEndian()) { + if (getContext().getAsmInfo().isLittleEndian()) { EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); } else { diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index c5e1910..6fe9e6c 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -54,8 +54,6 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); - unsigned retreg = MFI->retReg(); - DEBUG(dbgs() << "PTX::NoRegister == " << PTX::NoRegister << "\n" << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); @@ -68,15 +66,13 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { // FIXME: This is a slow linear scanning for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) if (MRI.isPhysRegUsed(reg) && - reg != retreg && + !MFI->isRetReg(reg) && (MFI->isKernel() || !MFI->isArgReg(reg))) MFI->addLocalVarReg(reg); // Notify MachineFunctionInfo that I've done adding local var reg MFI->doneAddLocalVar(); - DEBUG(dbgs() << "Return Reg: " << retreg << "\n"); - DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->argRegBegin(), e = MFI->argRegEnd(); i != e; ++i) diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 81df1c2..9d65f5b 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -15,6 +15,7 @@ #define PTX_MACHINE_FUNCTION_INFO_H #include "PTX.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { @@ -25,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo { private: bool is_kernel; std::vector<unsigned> reg_arg, reg_local_var; - unsigned reg_ret; + std::vector<unsigned> reg_ret; bool _isDoneAddArg; public: @@ -39,19 +40,22 @@ public: void addArgReg(unsigned reg) { reg_arg.push_back(reg); } void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void setRetReg(unsigned reg) { reg_ret = reg; } + void addRetReg(unsigned reg) { + if (!isRetReg(reg)) { + reg_ret.push_back(reg); + } + } void doneAddArg(void) { _isDoneAddArg = true; } void doneAddLocalVar(void) {} - bool isDoneAddArg(void) { return _isDoneAddArg; } - bool isKernel() const { return is_kernel; } typedef std::vector<unsigned>::const_iterator reg_iterator; typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; + typedef std::vector<unsigned>::const_iterator ret_iterator; bool argRegEmpty() const { return reg_arg.empty(); } int getNumArg() const { return reg_arg.size(); } @@ -64,12 +68,19 @@ public: reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } reg_iterator localVarRegEnd() const { return reg_local_var.end(); } - unsigned retReg() const { return reg_ret; } + bool retRegEmpty() const { return reg_ret.empty(); } + int getNumRet() const { return reg_ret.size(); } + ret_iterator retRegBegin() const { return reg_ret.begin(); } + ret_iterator retRegEnd() const { return reg_ret.end(); } bool isArgReg(unsigned reg) const { return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); } + bool isRetReg(unsigned reg) const { + return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end(); + } + bool isLocalVarReg(unsigned reg) const { return std::find(reg_local_var.begin(), reg_local_var.end(), reg) != reg_local_var.end(); diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp index 0f3e7bc..cb56ea9 100644 --- a/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/lib/Target/PTX/PTXRegisterInfo.cpp @@ -13,7 +13,39 @@ #include "PTX.h" #include "PTXRegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define GET_REGINFO_TARGET_DESC +#include "PTXGenRegisterInfo.inc" using namespace llvm; -#include "PTXGenRegisterInfo.inc" +PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &TII) + : PTXGenRegisterInfo() { +} + +void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS) const { + unsigned Index; + MachineInstr& MI = *II; + + Index = 0; + while (!MI.getOperand(Index).isFI()) { + ++Index; + assert(Index < MI.getNumOperands() && + "Instr does not have a FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(Index).getIndex(); + + DEBUG(dbgs() << "eliminateFrameIndex: " << MI); + DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); + DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); + + // This frame index is post stack slot re-use assignments + MI.getOperand(Index).ChangeToImmediate(FrameIndex); +} diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h index dc56352..0b63cb6 100644 --- a/lib/Target/PTX/PTXRegisterInfo.h +++ b/lib/Target/PTX/PTXRegisterInfo.h @@ -17,7 +17,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/BitVector.h" -#include "PTXGenRegisterInfo.h.inc" +#define GET_REGINFO_HEADER +#include "PTXGenRegisterInfo.inc" namespace llvm { class PTXTargetMachine; @@ -25,7 +26,7 @@ class MachineFunction; struct PTXRegisterInfo : public PTXGenRegisterInfo { PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII) {} + const TargetInstrInfo &TII); virtual const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const { @@ -38,11 +39,9 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo { return Reserved; // reserve no regs } - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - RegScavenger *RS = NULL) const { - llvm_unreachable("PTX does not support general function call"); - } + RegScavenger *RS = NULL) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const { llvm_unreachable("PTX does not have a frame register"); diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index f616141..1313d24 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -1,3 +1,4 @@ + //===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// // // The LLVM Compiler Infrastructure @@ -21,16 +22,16 @@ class PTXReg<string n> : Register<n> { ///===- Predicate Registers -----------------------------------------------===// -def P0 : PTXReg<"p0">; -def P1 : PTXReg<"p1">; -def P2 : PTXReg<"p2">; -def P3 : PTXReg<"p3">; -def P4 : PTXReg<"p4">; -def P5 : PTXReg<"p5">; -def P6 : PTXReg<"p6">; -def P7 : PTXReg<"p7">; -def P8 : PTXReg<"p8">; -def P9 : PTXReg<"p9">; +def P0 : PTXReg<"p0">; +def P1 : PTXReg<"p1">; +def P2 : PTXReg<"p2">; +def P3 : PTXReg<"p3">; +def P4 : PTXReg<"p4">; +def P5 : PTXReg<"p5">; +def P6 : PTXReg<"p6">; +def P7 : PTXReg<"p7">; +def P8 : PTXReg<"p8">; +def P9 : PTXReg<"p9">; def P10 : PTXReg<"p10">; def P11 : PTXReg<"p11">; def P12 : PTXReg<"p12">; @@ -85,19 +86,83 @@ def P60 : PTXReg<"p60">; def P61 : PTXReg<"p61">; def P62 : PTXReg<"p62">; def P63 : PTXReg<"p63">; +def P64 : PTXReg<"p64">; +def P65 : PTXReg<"p65">; +def P66 : PTXReg<"p66">; +def P67 : PTXReg<"p67">; +def P68 : PTXReg<"p68">; +def P69 : PTXReg<"p69">; +def P70 : PTXReg<"p70">; +def P71 : PTXReg<"p71">; +def P72 : PTXReg<"p72">; +def P73 : PTXReg<"p73">; +def P74 : PTXReg<"p74">; +def P75 : PTXReg<"p75">; +def P76 : PTXReg<"p76">; +def P77 : PTXReg<"p77">; +def P78 : PTXReg<"p78">; +def P79 : PTXReg<"p79">; +def P80 : PTXReg<"p80">; +def P81 : PTXReg<"p81">; +def P82 : PTXReg<"p82">; +def P83 : PTXReg<"p83">; +def P84 : PTXReg<"p84">; +def P85 : PTXReg<"p85">; +def P86 : PTXReg<"p86">; +def P87 : PTXReg<"p87">; +def P88 : PTXReg<"p88">; +def P89 : PTXReg<"p89">; +def P90 : PTXReg<"p90">; +def P91 : PTXReg<"p91">; +def P92 : PTXReg<"p92">; +def P93 : PTXReg<"p93">; +def P94 : PTXReg<"p94">; +def P95 : PTXReg<"p95">; +def P96 : PTXReg<"p96">; +def P97 : PTXReg<"p97">; +def P98 : PTXReg<"p98">; +def P99 : PTXReg<"p99">; +def P100 : PTXReg<"p100">; +def P101 : PTXReg<"p101">; +def P102 : PTXReg<"p102">; +def P103 : PTXReg<"p103">; +def P104 : PTXReg<"p104">; +def P105 : PTXReg<"p105">; +def P106 : PTXReg<"p106">; +def P107 : PTXReg<"p107">; +def P108 : PTXReg<"p108">; +def P109 : PTXReg<"p109">; +def P110 : PTXReg<"p110">; +def P111 : PTXReg<"p111">; +def P112 : PTXReg<"p112">; +def P113 : PTXReg<"p113">; +def P114 : PTXReg<"p114">; +def P115 : PTXReg<"p115">; +def P116 : PTXReg<"p116">; +def P117 : PTXReg<"p117">; +def P118 : PTXReg<"p118">; +def P119 : PTXReg<"p119">; +def P120 : PTXReg<"p120">; +def P121 : PTXReg<"p121">; +def P122 : PTXReg<"p122">; +def P123 : PTXReg<"p123">; +def P124 : PTXReg<"p124">; +def P125 : PTXReg<"p125">; +def P126 : PTXReg<"p126">; +def P127 : PTXReg<"p127">; -///===- 16-bit Integer Registers ------------------------------------------===// +///===- 16-Bit Registers --------------------------------------------------===// -def RH0 : PTXReg<"rh0">; -def RH1 : PTXReg<"rh1">; -def RH2 : PTXReg<"rh2">; -def RH3 : PTXReg<"rh3">; -def RH4 : PTXReg<"rh4">; -def RH5 : PTXReg<"rh5">; -def RH6 : PTXReg<"rh6">; -def RH7 : PTXReg<"rh7">; -def RH8 : PTXReg<"rh8">; -def RH9 : PTXReg<"rh9">; +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; def RH10 : PTXReg<"rh10">; def RH11 : PTXReg<"rh11">; def RH12 : PTXReg<"rh12">; @@ -152,20 +217,83 @@ def RH60 : PTXReg<"rh60">; def RH61 : PTXReg<"rh61">; def RH62 : PTXReg<"rh62">; def RH63 : PTXReg<"rh63">; +def RH64 : PTXReg<"rh64">; +def RH65 : PTXReg<"rh65">; +def RH66 : PTXReg<"rh66">; +def RH67 : PTXReg<"rh67">; +def RH68 : PTXReg<"rh68">; +def RH69 : PTXReg<"rh69">; +def RH70 : PTXReg<"rh70">; +def RH71 : PTXReg<"rh71">; +def RH72 : PTXReg<"rh72">; +def RH73 : PTXReg<"rh73">; +def RH74 : PTXReg<"rh74">; +def RH75 : PTXReg<"rh75">; +def RH76 : PTXReg<"rh76">; +def RH77 : PTXReg<"rh77">; +def RH78 : PTXReg<"rh78">; +def RH79 : PTXReg<"rh79">; +def RH80 : PTXReg<"rh80">; +def RH81 : PTXReg<"rh81">; +def RH82 : PTXReg<"rh82">; +def RH83 : PTXReg<"rh83">; +def RH84 : PTXReg<"rh84">; +def RH85 : PTXReg<"rh85">; +def RH86 : PTXReg<"rh86">; +def RH87 : PTXReg<"rh87">; +def RH88 : PTXReg<"rh88">; +def RH89 : PTXReg<"rh89">; +def RH90 : PTXReg<"rh90">; +def RH91 : PTXReg<"rh91">; +def RH92 : PTXReg<"rh92">; +def RH93 : PTXReg<"rh93">; +def RH94 : PTXReg<"rh94">; +def RH95 : PTXReg<"rh95">; +def RH96 : PTXReg<"rh96">; +def RH97 : PTXReg<"rh97">; +def RH98 : PTXReg<"rh98">; +def RH99 : PTXReg<"rh99">; +def RH100 : PTXReg<"rh100">; +def RH101 : PTXReg<"rh101">; +def RH102 : PTXReg<"rh102">; +def RH103 : PTXReg<"rh103">; +def RH104 : PTXReg<"rh104">; +def RH105 : PTXReg<"rh105">; +def RH106 : PTXReg<"rh106">; +def RH107 : PTXReg<"rh107">; +def RH108 : PTXReg<"rh108">; +def RH109 : PTXReg<"rh109">; +def RH110 : PTXReg<"rh110">; +def RH111 : PTXReg<"rh111">; +def RH112 : PTXReg<"rh112">; +def RH113 : PTXReg<"rh113">; +def RH114 : PTXReg<"rh114">; +def RH115 : PTXReg<"rh115">; +def RH116 : PTXReg<"rh116">; +def RH117 : PTXReg<"rh117">; +def RH118 : PTXReg<"rh118">; +def RH119 : PTXReg<"rh119">; +def RH120 : PTXReg<"rh120">; +def RH121 : PTXReg<"rh121">; +def RH122 : PTXReg<"rh122">; +def RH123 : PTXReg<"rh123">; +def RH124 : PTXReg<"rh124">; +def RH125 : PTXReg<"rh125">; +def RH126 : PTXReg<"rh126">; +def RH127 : PTXReg<"rh127">; +///===- 32-Bit Registers --------------------------------------------------===// -///===- 32-bit Integer Registers ------------------------------------------===// - -def R0 : PTXReg<"r0">; -def R1 : PTXReg<"r1">; -def R2 : PTXReg<"r2">; -def R3 : PTXReg<"r3">; -def R4 : PTXReg<"r4">; -def R5 : PTXReg<"r5">; -def R6 : PTXReg<"r6">; -def R7 : PTXReg<"r7">; -def R8 : PTXReg<"r8">; -def R9 : PTXReg<"r9">; +def R0 : PTXReg<"r0">; +def R1 : PTXReg<"r1">; +def R2 : PTXReg<"r2">; +def R3 : PTXReg<"r3">; +def R4 : PTXReg<"r4">; +def R5 : PTXReg<"r5">; +def R6 : PTXReg<"r6">; +def R7 : PTXReg<"r7">; +def R8 : PTXReg<"r8">; +def R9 : PTXReg<"r9">; def R10 : PTXReg<"r10">; def R11 : PTXReg<"r11">; def R12 : PTXReg<"r12">; @@ -220,20 +348,83 @@ def R60 : PTXReg<"r60">; def R61 : PTXReg<"r61">; def R62 : PTXReg<"r62">; def R63 : PTXReg<"r63">; +def R64 : PTXReg<"r64">; +def R65 : PTXReg<"r65">; +def R66 : PTXReg<"r66">; +def R67 : PTXReg<"r67">; +def R68 : PTXReg<"r68">; +def R69 : PTXReg<"r69">; +def R70 : PTXReg<"r70">; +def R71 : PTXReg<"r71">; +def R72 : PTXReg<"r72">; +def R73 : PTXReg<"r73">; +def R74 : PTXReg<"r74">; +def R75 : PTXReg<"r75">; +def R76 : PTXReg<"r76">; +def R77 : PTXReg<"r77">; +def R78 : PTXReg<"r78">; +def R79 : PTXReg<"r79">; +def R80 : PTXReg<"r80">; +def R81 : PTXReg<"r81">; +def R82 : PTXReg<"r82">; +def R83 : PTXReg<"r83">; +def R84 : PTXReg<"r84">; +def R85 : PTXReg<"r85">; +def R86 : PTXReg<"r86">; +def R87 : PTXReg<"r87">; +def R88 : PTXReg<"r88">; +def R89 : PTXReg<"r89">; +def R90 : PTXReg<"r90">; +def R91 : PTXReg<"r91">; +def R92 : PTXReg<"r92">; +def R93 : PTXReg<"r93">; +def R94 : PTXReg<"r94">; +def R95 : PTXReg<"r95">; +def R96 : PTXReg<"r96">; +def R97 : PTXReg<"r97">; +def R98 : PTXReg<"r98">; +def R99 : PTXReg<"r99">; +def R100 : PTXReg<"r100">; +def R101 : PTXReg<"r101">; +def R102 : PTXReg<"r102">; +def R103 : PTXReg<"r103">; +def R104 : PTXReg<"r104">; +def R105 : PTXReg<"r105">; +def R106 : PTXReg<"r106">; +def R107 : PTXReg<"r107">; +def R108 : PTXReg<"r108">; +def R109 : PTXReg<"r109">; +def R110 : PTXReg<"r110">; +def R111 : PTXReg<"r111">; +def R112 : PTXReg<"r112">; +def R113 : PTXReg<"r113">; +def R114 : PTXReg<"r114">; +def R115 : PTXReg<"r115">; +def R116 : PTXReg<"r116">; +def R117 : PTXReg<"r117">; +def R118 : PTXReg<"r118">; +def R119 : PTXReg<"r119">; +def R120 : PTXReg<"r120">; +def R121 : PTXReg<"r121">; +def R122 : PTXReg<"r122">; +def R123 : PTXReg<"r123">; +def R124 : PTXReg<"r124">; +def R125 : PTXReg<"r125">; +def R126 : PTXReg<"r126">; +def R127 : PTXReg<"r127">; +///===- 64-Bit Registers --------------------------------------------------===// -///===- 64-bit Integer Registers ------------------------------------------===// - -def RD0 : PTXReg<"rd0">; -def RD1 : PTXReg<"rd1">; -def RD2 : PTXReg<"rd2">; -def RD3 : PTXReg<"rd3">; -def RD4 : PTXReg<"rd4">; -def RD5 : PTXReg<"rd5">; -def RD6 : PTXReg<"rd6">; -def RD7 : PTXReg<"rd7">; -def RD8 : PTXReg<"rd8">; -def RD9 : PTXReg<"rd9">; +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; def RD10 : PTXReg<"rd10">; def RD11 : PTXReg<"rd11">; def RD12 : PTXReg<"rd12">; @@ -288,204 +479,77 @@ def RD60 : PTXReg<"rd60">; def RD61 : PTXReg<"rd61">; def RD62 : PTXReg<"rd62">; def RD63 : PTXReg<"rd63">; - - -///===- 32-bit Floating-Point Registers -----------------------------------===// - -def F0 : PTXReg<"f0">; -def F1 : PTXReg<"f1">; -def F2 : PTXReg<"f2">; -def F3 : PTXReg<"f3">; -def F4 : PTXReg<"f4">; -def F5 : PTXReg<"f5">; -def F6 : PTXReg<"f6">; -def F7 : PTXReg<"f7">; -def F8 : PTXReg<"f8">; -def F9 : PTXReg<"f9">; -def F10 : PTXReg<"f10">; -def F11 : PTXReg<"f11">; -def F12 : PTXReg<"f12">; -def F13 : PTXReg<"f13">; -def F14 : PTXReg<"f14">; -def F15 : PTXReg<"f15">; -def F16 : PTXReg<"f16">; -def F17 : PTXReg<"f17">; -def F18 : PTXReg<"f18">; -def F19 : PTXReg<"f19">; -def F20 : PTXReg<"f20">; -def F21 : PTXReg<"f21">; -def F22 : PTXReg<"f22">; -def F23 : PTXReg<"f23">; -def F24 : PTXReg<"f24">; -def F25 : PTXReg<"f25">; -def F26 : PTXReg<"f26">; -def F27 : PTXReg<"f27">; -def F28 : PTXReg<"f28">; -def F29 : PTXReg<"f29">; -def F30 : PTXReg<"f30">; -def F31 : PTXReg<"f31">; -def F32 : PTXReg<"f32">; -def F33 : PTXReg<"f33">; -def F34 : PTXReg<"f34">; -def F35 : PTXReg<"f35">; -def F36 : PTXReg<"f36">; -def F37 : PTXReg<"f37">; -def F38 : PTXReg<"f38">; -def F39 : PTXReg<"f39">; -def F40 : PTXReg<"f40">; -def F41 : PTXReg<"f41">; -def F42 : PTXReg<"f42">; -def F43 : PTXReg<"f43">; -def F44 : PTXReg<"f44">; -def F45 : PTXReg<"f45">; -def F46 : PTXReg<"f46">; -def F47 : PTXReg<"f47">; -def F48 : PTXReg<"f48">; -def F49 : PTXReg<"f49">; -def F50 : PTXReg<"f50">; -def F51 : PTXReg<"f51">; -def F52 : PTXReg<"f52">; -def F53 : PTXReg<"f53">; -def F54 : PTXReg<"f54">; -def F55 : PTXReg<"f55">; -def F56 : PTXReg<"f56">; -def F57 : PTXReg<"f57">; -def F58 : PTXReg<"f58">; -def F59 : PTXReg<"f59">; -def F60 : PTXReg<"f60">; -def F61 : PTXReg<"f61">; -def F62 : PTXReg<"f62">; -def F63 : PTXReg<"f63">; - - -///===- 64-bit Floating-Point Registers -----------------------------------===// - -def FD0 : PTXReg<"fd0">; -def FD1 : PTXReg<"fd1">; -def FD2 : PTXReg<"fd2">; -def FD3 : PTXReg<"fd3">; -def FD4 : PTXReg<"fd4">; -def FD5 : PTXReg<"fd5">; -def FD6 : PTXReg<"fd6">; -def FD7 : PTXReg<"fd7">; -def FD8 : PTXReg<"fd8">; -def FD9 : PTXReg<"fd9">; -def FD10 : PTXReg<"fd10">; -def FD11 : PTXReg<"fd11">; -def FD12 : PTXReg<"fd12">; -def FD13 : PTXReg<"fd13">; -def FD14 : PTXReg<"fd14">; -def FD15 : PTXReg<"fd15">; -def FD16 : PTXReg<"fd16">; -def FD17 : PTXReg<"fd17">; -def FD18 : PTXReg<"fd18">; -def FD19 : PTXReg<"fd19">; -def FD20 : PTXReg<"fd20">; -def FD21 : PTXReg<"fd21">; -def FD22 : PTXReg<"fd22">; -def FD23 : PTXReg<"fd23">; -def FD24 : PTXReg<"fd24">; -def FD25 : PTXReg<"fd25">; -def FD26 : PTXReg<"fd26">; -def FD27 : PTXReg<"fd27">; -def FD28 : PTXReg<"fd28">; -def FD29 : PTXReg<"fd29">; -def FD30 : PTXReg<"fd30">; -def FD31 : PTXReg<"fd31">; -def FD32 : PTXReg<"fd32">; -def FD33 : PTXReg<"fd33">; -def FD34 : PTXReg<"fd34">; -def FD35 : PTXReg<"fd35">; -def FD36 : PTXReg<"fd36">; -def FD37 : PTXReg<"fd37">; -def FD38 : PTXReg<"fd38">; -def FD39 : PTXReg<"fd39">; -def FD40 : PTXReg<"fd40">; -def FD41 : PTXReg<"fd41">; -def FD42 : PTXReg<"fd42">; -def FD43 : PTXReg<"fd43">; -def FD44 : PTXReg<"fd44">; -def FD45 : PTXReg<"fd45">; -def FD46 : PTXReg<"f4d6">; -def FD47 : PTXReg<"fd47">; -def FD48 : PTXReg<"fd48">; -def FD49 : PTXReg<"fd49">; -def FD50 : PTXReg<"fd50">; -def FD51 : PTXReg<"fd51">; -def FD52 : PTXReg<"fd52">; -def FD53 : PTXReg<"fd53">; -def FD54 : PTXReg<"fd54">; -def FD55 : PTXReg<"fd55">; -def FD56 : PTXReg<"fd56">; -def FD57 : PTXReg<"fd57">; -def FD58 : PTXReg<"fd58">; -def FD59 : PTXReg<"fd59">; -def FD60 : PTXReg<"fd60">; -def FD61 : PTXReg<"fd61">; -def FD62 : PTXReg<"fd62">; -def FD63 : PTXReg<"fd63">; - +def RD64 : PTXReg<"rd64">; +def RD65 : PTXReg<"rd65">; +def RD66 : PTXReg<"rd66">; +def RD67 : PTXReg<"rd67">; +def RD68 : PTXReg<"rd68">; +def RD69 : PTXReg<"rd69">; +def RD70 : PTXReg<"rd70">; +def RD71 : PTXReg<"rd71">; +def RD72 : PTXReg<"rd72">; +def RD73 : PTXReg<"rd73">; +def RD74 : PTXReg<"rd74">; +def RD75 : PTXReg<"rd75">; +def RD76 : PTXReg<"rd76">; +def RD77 : PTXReg<"rd77">; +def RD78 : PTXReg<"rd78">; +def RD79 : PTXReg<"rd79">; +def RD80 : PTXReg<"rd80">; +def RD81 : PTXReg<"rd81">; +def RD82 : PTXReg<"rd82">; +def RD83 : PTXReg<"rd83">; +def RD84 : PTXReg<"rd84">; +def RD85 : PTXReg<"rd85">; +def RD86 : PTXReg<"rd86">; +def RD87 : PTXReg<"rd87">; +def RD88 : PTXReg<"rd88">; +def RD89 : PTXReg<"rd89">; +def RD90 : PTXReg<"rd90">; +def RD91 : PTXReg<"rd91">; +def RD92 : PTXReg<"rd92">; +def RD93 : PTXReg<"rd93">; +def RD94 : PTXReg<"rd94">; +def RD95 : PTXReg<"rd95">; +def RD96 : PTXReg<"rd96">; +def RD97 : PTXReg<"rd97">; +def RD98 : PTXReg<"rd98">; +def RD99 : PTXReg<"rd99">; +def RD100 : PTXReg<"rd100">; +def RD101 : PTXReg<"rd101">; +def RD102 : PTXReg<"rd102">; +def RD103 : PTXReg<"rd103">; +def RD104 : PTXReg<"rd104">; +def RD105 : PTXReg<"rd105">; +def RD106 : PTXReg<"rd106">; +def RD107 : PTXReg<"rd107">; +def RD108 : PTXReg<"rd108">; +def RD109 : PTXReg<"rd109">; +def RD110 : PTXReg<"rd110">; +def RD111 : PTXReg<"rd111">; +def RD112 : PTXReg<"rd112">; +def RD113 : PTXReg<"rd113">; +def RD114 : PTXReg<"rd114">; +def RD115 : PTXReg<"rd115">; +def RD116 : PTXReg<"rd116">; +def RD117 : PTXReg<"rd117">; +def RD118 : PTXReg<"rd118">; +def RD119 : PTXReg<"rd119">; +def RD120 : PTXReg<"rd120">; +def RD121 : PTXReg<"rd121">; +def RD122 : PTXReg<"rd122">; +def RD123 : PTXReg<"rd123">; +def RD124 : PTXReg<"rd124">; +def RD125 : PTXReg<"rd125">; +def RD126 : PTXReg<"rd126">; +def RD127 : PTXReg<"rd127">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// - -def Preds : RegisterClass<"PTX", [i1], 8, - [P0, P1, P2, P3, P4, P5, P6, P7, - P8, P9, P10, P11, P12, P13, P14, P15, - P16, P17, P18, P19, P20, P21, P22, P23, - P24, P25, P26, P27, P28, P29, P30, P31, - P32, P33, P34, P35, P36, P37, P38, P39, - P40, P41, P42, P43, P44, P45, P46, P47, - P48, P49, P50, P51, P52, P53, P54, P55, - P56, P57, P58, P59, P60, P61, P62, P63]>; - -def RRegu16 : RegisterClass<"PTX", [i16], 16, - [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, - RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15, - RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, - RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, - RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, - RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, - RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, - RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>; - -def RRegu32 : RegisterClass<"PTX", [i32], 32, - [R0, R1, R2, R3, R4, R5, R6, R7, - R8, R9, R10, R11, R12, R13, R14, R15, - R16, R17, R18, R19, R20, R21, R22, R23, - R24, R25, R26, R27, R28, R29, R30, R31, - R32, R33, R34, R35, R36, R37, R38, R39, - R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, - R56, R57, R58, R59, R60, R61, R62, R63]>; - -def RRegu64 : RegisterClass<"PTX", [i64], 64, - [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, - RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15, - RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, - RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, - RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, - RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, - RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, - RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>; - -def RRegf32 : RegisterClass<"PTX", [f32], 32, - [F0, F1, F2, F3, F4, F5, F6, F7, - F8, F9, F10, F11, F12, F13, F14, F15, - F16, F17, F18, F19, F20, F21, F22, F23, - F24, F25, F26, F27, F28, F29, F30, F31, - F32, F33, F34, F35, F36, F37, F38, F39, - F40, F41, F42, F43, F44, F45, F46, F47, - F48, F49, F50, F51, F52, F53, F54, F55, - F56, F57, F58, F59, F60, F61, F62, F63]>; - -def RRegf64 : RegisterClass<"PTX", [f64], 64, - [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7, - FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15, - FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23, - FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31, - FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39, - FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47, - FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55, - FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>; +def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index e8a1dfe..8ec646e 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -7,32 +7,51 @@ // //===----------------------------------------------------------------------===// // -// This file implements the PTX specific subclass of TargetSubtarget. +// This file implements the PTX specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #include "PTXSubtarget.h" +#include "PTX.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "PTXGenSubtargetInfo.inc" using namespace llvm; -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS, - bool is64Bit) - : PTXShaderModel(PTX_SM_1_0), +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) + : PTXGenSubtargetInfo(TT, CPU, FS), + PTXTarget(PTX_COMPUTE_1_0), PTXVersion(PTX_VERSION_2_0), SupportsDouble(false), SupportsFMA(true), - Is64Bit(is64Bit) { - std::string TARGET = "generic"; - ParseSubtargetFeatures(FS, TARGET); + Is64Bit(is64Bit) { + std::string TARGET = CPU; + if (TARGET.empty()) + TARGET = "generic"; + ParseSubtargetFeatures(TARGET, FS); } std::string PTXSubtarget::getTargetString() const { - switch(PTXShaderModel) { - default: llvm_unreachable("Unknown shader model"); + switch(PTXTarget) { + default: llvm_unreachable("Unknown PTX target"); case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_1: return "sm_11"; + case PTX_SM_1_2: return "sm_12"; case PTX_SM_1_3: return "sm_13"; case PTX_SM_2_0: return "sm_20"; + case PTX_SM_2_1: return "sm_21"; + case PTX_SM_2_2: return "sm_22"; + case PTX_SM_2_3: return "sm_23"; + case PTX_COMPUTE_1_0: return "compute_10"; + case PTX_COMPUTE_1_1: return "compute_11"; + case PTX_COMPUTE_1_2: return "compute_12"; + case PTX_COMPUTE_1_3: return "compute_13"; + case PTX_COMPUTE_2_0: return "compute_20"; } } @@ -45,5 +64,3 @@ std::string PTXSubtarget::getPTXVersionString() const { case PTX_VERSION_2_3: return "2.3"; } } - -#include "PTXGenSubtarget.inc" diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index 59fa696..0921f1f 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -7,26 +7,44 @@ // //===----------------------------------------------------------------------===// // -// This file declares the PTX specific subclass of TargetSubtarget. +// This file declares the PTX specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef PTX_SUBTARGET_H #define PTX_SUBTARGET_H -#include "llvm/Target/TargetSubtarget.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "PTXGenSubtargetInfo.inc" namespace llvm { - class PTXSubtarget : public TargetSubtarget { - private: +class StringRef; + + class PTXSubtarget : public PTXGenSubtargetInfo { + public: /** * Enumeration of Shader Models supported by the back-end. */ - enum PTXShaderModelEnum { + enum PTXTargetEnum { + PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ + PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ + PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ + PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ + PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ + PTX_LAST_COMPUTE, + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_1, /*< Shader Model 1.1 */ + PTX_SM_1_2, /*< Shader Model 1.2 */ PTX_SM_1_3, /*< Shader Model 1.3 */ - PTX_SM_2_0 /*< Shader Model 2.0 */ + PTX_SM_2_0, /*< Shader Model 2.0 */ + PTX_SM_2_1, /*< Shader Model 2.1 */ + PTX_SM_2_2, /*< Shader Model 2.2 */ + PTX_SM_2_3, /*< Shader Model 2.3 */ + PTX_LAST_SM }; /** @@ -41,24 +59,30 @@ namespace llvm { PTX_VERSION_2_3 /*< PTX Version 2.3 */ }; + private: + /// Shader Model supported on the target GPU. - PTXShaderModelEnum PTXShaderModel; + PTXTargetEnum PTXTarget; /// PTX Language Version. PTXVersionEnum PTXVersion; // The native .f64 type is supported on the hardware. bool SupportsDouble; - - // Support the fused-multiply add (FMA) and multiply-add (MAD) instructions + + // Support the fused-multiply add (FMA) and multiply-add (MAD) + // instructions bool SupportsFMA; - + // Use .u64 instead of .u32 for addresses. bool Is64Bit; public: - PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); + PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + // Target architecture accessors std::string getTargetString() const; std::string getPTXVersionString() const; @@ -68,10 +92,6 @@ namespace llvm { bool is64Bit() const { return Is64Bit; } bool supportsFMA() const { return SupportsFMA; } - - bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; } - - bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; } bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } @@ -79,8 +99,22 @@ namespace llvm { bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } - std::string ParseSubtargetFeatures(const std::string &FS, - const std::string &CPU); + bool fdivNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool fmadNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool useParamSpaceForDeviceArgs() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; // class PTXSubtarget } // namespace llvm diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index 1b737c9..ab926e0 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "PTX.h" -#include "PTXMCAsmInfo.h" #include "PTXTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" @@ -35,9 +34,6 @@ extern "C" void LLVMInitializePTXTarget() { RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target); RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target); - RegisterAsmInfo<PTXMCAsmInfo> Z(ThePTX32Target); - RegisterAsmInfo<PTXMCAsmInfo> W(ThePTX64Target); - TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); } @@ -52,11 +48,12 @@ namespace { // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, bool is64Bit) - : LLVMTargetMachine(T, TT), + : LLVMTargetMachine(T, TT, CPU, FS), DataLayout(is64Bit ? DataLayout64 : DataLayout32), - Subtarget(TT, FS, is64Bit), + Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), TLInfo(*this) { @@ -64,14 +61,16 @@ PTXTargetMachine::PTXTargetMachine(const Target &T, PTX32TargetMachine::PTX32TargetMachine(const Target &T, const std::string& TT, + const std::string& CPU, const std::string& FS) - : PTXTargetMachine(T, TT, FS, false) { + : PTXTargetMachine(T, TT, CPU, FS, false) { } PTX64TargetMachine::PTX64TargetMachine(const Target &T, const std::string& TT, + const std::string& CPU, const std::string& FS) - : PTXTargetMachine(T, TT, FS, true) { + : PTXTargetMachine(T, TT, CPU, FS, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 149be8e..ae42153 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -33,7 +33,8 @@ class PTXTargetMachine : public LLVMTargetMachine { public: PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &FS, bool is64Bit); + const std::string &CPU, const std::string &FS, + bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -61,14 +62,14 @@ class PTX32TargetMachine : public PTXTargetMachine { public: PTX32TargetMachine(const Target &T, const std::string &TT, - const std::string& FS); + const std::string& CPU, const std::string& FS); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { public: PTX64TargetMachine(const Target &T, const std::string &TT, - const std::string& FS); + const std::string& CPU, const std::string& FS); }; // class PTX32TargetMachine } // namespace llvm diff --git a/lib/Target/PTX/generate-register-td.py b/lib/Target/PTX/generate-register-td.py new file mode 100755 index 0000000..1528690 --- /dev/null +++ b/lib/Target/PTX/generate-register-td.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +##===- generate-register-td.py --------------------------------*-python-*--===## +## +## The LLVM Compiler Infrastructure +## +## This file is distributed under the University of Illinois Open Source +## License. See LICENSE.TXT for details. +## +##===----------------------------------------------------------------------===## +## +## This file describes the PTX register file generator. +## +##===----------------------------------------------------------------------===## + +from sys import argv, exit, stdout + + +if len(argv) != 5: + print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>') + exit(1) + +try: + num_pred = int(argv[1]) + num_16bit = int(argv[2]) + num_32bit = int(argv[3]) + num_64bit = int(argv[4]) +except: + print('ERROR: Invalid integer parameter') + exit(1) + +## Print the register definition file +td_file = open('PTXRegisterInfo.td', 'w') + +td_file.write(''' +//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg<string n> : Register<n> { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +''') + + +# Print predicate registers +td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') +for r in range(0, num_pred): + td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) + +# Print 16-bit registers +td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_16bit): + td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) + +# Print 32-bit registers +td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_32bit): + td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) + +# Print 64-bit registers +td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_64bit): + td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) + + +td_file.write(''' +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +''') + + +# Print register classes + +td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) +td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) +td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) +td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) + + +td_file.close() + +## Now write the PTXCallingConv.td file +td_file = open('PTXCallingConv.td', 'w') + +# Reserve 10% of the available registers for return values, and the other 90% +# for parameters +num_ret_pred = int(0.1 * num_pred) +num_ret_16bit = int(0.1 * num_16bit) +num_ret_32bit = int(0.1 * num_32bit) +num_ret_64bit = int(0.1 * num_64bit) +num_param_pred = num_pred - num_ret_pred +num_param_16bit = num_16bit - num_ret_16bit +num_param_32bit = num_32bit - num_ret_32bit +num_param_64bit = num_64bit - num_ret_64bit + +param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] +ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] +param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] +ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] +param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] +ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] +param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] +ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] + +param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) +ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) +param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) +ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) +param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) +ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) +param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) +ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) + +td_file.write(''' +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; +''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, + ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) + + +td_file.close() |