diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PTX')
35 files changed, 5400 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt new file mode 100644 index 0000000..331266d --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/CMakeLists.txt @@ -0,0 +1,26 @@ +set(LLVM_TARGET_DEFINITIONS PTX.td) + +tablegen(PTXGenAsmWriter.inc -gen-asm-writer) +tablegen(PTXGenDAGISel.inc -gen-dag-isel) +tablegen(PTXGenInstrInfo.inc -gen-instr-desc) +tablegen(PTXGenInstrNames.inc -gen-instr-enums) +tablegen(PTXGenRegisterInfo.inc -gen-register-desc) +tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) +tablegen(PTXGenRegisterNames.inc -gen-register-enums) +tablegen(PTXGenSubtarget.inc -gen-subtarget) + +add_llvm_target(PTXCodeGen + PTXAsmPrinter.cpp + PTXISelDAGToDAG.cpp + PTXISelLowering.cpp + PTXInstrInfo.cpp + PTXFrameLowering.cpp + PTXMCAsmInfo.cpp + PTXMCAsmStreamer.cpp + PTXMFInfoExtract.cpp + PTXRegisterInfo.cpp + PTXSubtarget.cpp + PTXTargetMachine.cpp + ) + +add_subdirectory(TargetInfo) diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt new file mode 100644 index 0000000..df0f63f --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMPTXDesc + PTXMCTargetDesc.cpp + PTXMCAsmInfo.cpp + ) diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile new file mode 100644 index 0000000..35f5a7b --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMPTXDesc + +# Hack: we need to include 'main' target directory to grab private headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp new file mode 100644 index 0000000..efefead --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp @@ -0,0 +1,35 @@ +//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the PTXMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "PTXMCAsmInfo.h" +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) { + Triple TheTriple(TT); + if (TheTriple.getArch() == Triple::ptx64) + PointerSize = 8; + + CommentString = "//"; + + PrivateGlobalPrefix = "$L__"; + + AllowPeriodsInName = false; + + HasSetDirective = false; + + HasDotTypeDotSizeDirective = false; + + HasSingleParameterDotFile = false; +} diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h new file mode 100644 index 0000000..03f5d66 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h @@ -0,0 +1,28 @@ +//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the PTXMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_MCASM_INFO_H +#define PTX_MCASM_INFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + class StringRef; + + struct PTXMCAsmInfo : public MCAsmInfo { + explicit PTXMCAsmInfo(const Target &T, const StringRef &TT); + }; +} // namespace llvm + +#endif // PTX_MCASM_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp new file mode 100644 index 0000000..23f70bd --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -0,0 +1,60 @@ +//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "PTXMCTargetDesc.h" +#include "PTXMCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "PTXGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "PTXGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createPTXMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitPTXMCInstrInfo(X); + return X; +} + +extern "C" void LLVMInitializePTXMCInstrInfo() { + TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); +} + +static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitPTXMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +extern "C" void LLVMInitializePTXMCSubtargetInfo() { + TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, + createPTXMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, + createPTXMCSubtargetInfo); +} + +extern "C" void LLVMInitializePTXMCAsmInfo() { + RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); + RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); +} diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h new file mode 100644 index 0000000..1003b0b --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides PTX specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXMCTARGETDESC_H +#define PTXMCTARGETDESC_H + +namespace llvm { +class MCSubtargetInfo; +class Target; +class StringRef; + +extern Target ThePTX32Target; +extern Target ThePTX64Target; + +} // End llvm namespace + +// Defines symbolic names for PTX registers. +#define GET_REGINFO_ENUM +#include "PTXGenRegisterInfo.inc" + +// Defines symbolic names for the PTX instructions. +#define GET_INSTRINFO_ENUM +#include "PTXGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "PTXGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h new file mode 100644 index 0000000..28cab24 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTX.h @@ -0,0 +1,48 @@ +//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// PTX back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_H +#define PTX_H + +#include "MCTargetDesc/PTXMCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class PTXTargetMachine; + class FunctionPass; + + namespace PTX { + enum StateSpace { + GLOBAL = 0, // default to global state space + CONSTANT = 1, + LOCAL = 2, + PARAMETER = 3, + SHARED = 4 + }; + + enum Predicate { + PRED_NORMAL = 0, + PRED_NEGATE = 1 + }; + } // namespace PTX + + FunctionPass *createPTXISelDag(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + +} // namespace llvm; + +#endif // PTX_H diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td new file mode 100644 index 0000000..f6fbe9f --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTX.td @@ -0,0 +1,135 @@ +//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This is the top level entry point for the PTX target. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Subtarget Features +//===----------------------------------------------------------------------===// + +//===- Architectural Features ---------------------------------------------===// + +def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true", + "Do not demote .f64 to .f32">; + +def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false", + "Disable Fused-Multiply Add">; + +//===- PTX Version --------------------------------------------------------===// + +def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", + "Use PTX Language Version 2.0">; + +def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", + "Use PTX Language Version 2.1">; + +def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", + "Use PTX Language Version 2.2">; + +def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3", + "Use PTX Language Version 2.3">; + +//===- PTX Target ---------------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0", + "Use Shader Model 1.0">; +def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1", + "Use Shader Model 1.1">; +def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", + "Use Shader Model 1.2">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", + "Use Shader Model 1.3">; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", + "Use Shader Model 2.0">; +def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", + "Use Shader Model 2.1">; +def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", + "Use Shader Model 2.2">; +def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", + "Use Shader Model 2.3">; + +def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", + "PTX_COMPUTE_1_0", + "Use Compute Compatibility 1.0">; +def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget", + "PTX_COMPUTE_1_1", + "Use Compute Compatibility 1.1">; +def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget", + "PTX_COMPUTE_1_2", + "Use Compute Compatibility 1.2">; +def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", + "PTX_COMPUTE_1_3", + "Use Compute Compatibility 1.3">; +def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", + "PTX_COMPUTE_2_0", + "Use Compute Compatibility 2.0">; + +//===----------------------------------------------------------------------===// +// PTX supported processors +//===----------------------------------------------------------------------===// + +class Proc<string Name, list<SubtargetFeature> Features> + : Processor<Name, NoItineraries, Features>; + +def : Proc<"generic", []>; + +// Processor definitions for compute/shader models +def : Proc<"compute_10", [FeatureCOMPUTE10]>; +def : Proc<"compute_11", [FeatureCOMPUTE11]>; +def : Proc<"compute_12", [FeatureCOMPUTE12]>; +def : Proc<"compute_13", [FeatureCOMPUTE13]>; +def : Proc<"compute_20", [FeatureCOMPUTE20]>; +def : Proc<"sm_10", [FeatureSM10]>; +def : Proc<"sm_11", [FeatureSM11]>; +def : Proc<"sm_12", [FeatureSM12]>; +def : Proc<"sm_13", [FeatureSM13]>; +def : Proc<"sm_20", [FeatureSM20]>; +def : Proc<"sm_21", [FeatureSM21]>; +def : Proc<"sm_22", [FeatureSM22]>; +def : Proc<"sm_23", [FeatureSM23]>; + +// Processor definitions for common GPU architectures +def : Proc<"g80", [FeatureSM10]>; +def : Proc<"gt200", [FeatureSM13]>; +def : Proc<"gf100", [FeatureSM20, FeatureDouble]>; +def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "PTXRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Calling Conventions +//===----------------------------------------------------------------------===// + +include "PTXCallingConv.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "PTXInstrInfo.td" + +def PTXInstrInfo : InstrInfo; + +//===----------------------------------------------------------------------===// +// Target Declaration +//===----------------------------------------------------------------------===// + +def PTX : Target { + let InstructionSet = PTXInstrInfo; +} diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp new file mode 100644 index 0000000..2848d54 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp @@ -0,0 +1,605 @@ +//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to PTX assembly language. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-asm-printer" + +#include "PTX.h" +#include "PTXMachineFunctionInfo.h" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class PTXAsmPrinter : public AsmPrinter { +public: + explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + const char *getPassName() const { return "PTX Assembly Printer"; } + + bool doFinalization(Module &M); + + virtual void EmitStartOfAsmFile(Module &M); + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); } + + virtual void EmitInstruction(const MachineInstr *MI); + + void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); + void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); + void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, + const char *Modifier = 0); + void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); + + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + + // autogen'd. + void printInstruction(const MachineInstr *MI, raw_ostream &OS); + static const char *getRegisterName(unsigned RegNo); + +private: + void EmitVariableDeclaration(const GlobalVariable *gv); + void EmitFunctionDeclaration(); + + StringMap<unsigned> SourceIdMap; +}; // class PTXAsmPrinter +} // namespace + +static const char PARAM_PREFIX[] = "__param_"; +static const char RETURN_PREFIX[] = "__ret_"; + +static const char *getRegisterTypeName(unsigned RegNo) { +#define TEST_REGCLS(cls, clsstr) \ + if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; + TEST_REGCLS(RegPred, pred); + TEST_REGCLS(RegI16, b16); + TEST_REGCLS(RegI32, b32); + TEST_REGCLS(RegI64, b64); + TEST_REGCLS(RegF32, b32); + TEST_REGCLS(RegF64, b64); +#undef TEST_REGCLS + + llvm_unreachable("Not in any register class!"); + return NULL; +} + +static const char *getStateSpaceName(unsigned addressSpace) { + switch (addressSpace) { + default: llvm_unreachable("Unknown state space"); + case PTX::GLOBAL: return "global"; + case PTX::CONSTANT: return "const"; + case PTX::LOCAL: return "local"; + case PTX::PARAMETER: return "param"; + case PTX::SHARED: return "shared"; + } + return NULL; +} + +static const char *getTypeName(const Type* type) { + while (true) { + switch (type->getTypeID()) { + default: llvm_unreachable("Unknown type"); + case Type::FloatTyID: return ".f32"; + case Type::DoubleTyID: return ".f64"; + case Type::IntegerTyID: + switch (type->getPrimitiveSizeInBits()) { + default: llvm_unreachable("Unknown integer bit-width"); + case 16: return ".u16"; + case 32: return ".u32"; + case 64: return ".u64"; + } + case Type::ArrayTyID: + case Type::PointerTyID: + type = dyn_cast<const SequentialType>(type)->getElementType(); + break; + } + } + return NULL; +} + +bool PTXAsmPrinter::doFinalization(Module &M) { + // XXX Temproarily remove global variables so that doFinalization() will not + // emit them again (global variables are emitted at beginning). + + Module::GlobalListType &global_list = M.getGlobalList(); + int i, n = global_list.size(); + GlobalVariable **gv_array = new GlobalVariable* [n]; + + // first, back-up GlobalVariable in gv_array + i = 0; + for (Module::global_iterator I = global_list.begin(), E = global_list.end(); + I != E; ++I) + gv_array[i++] = &*I; + + // second, empty global_list + while (!global_list.empty()) + global_list.remove(global_list.begin()); + + // call doFinalization + bool ret = AsmPrinter::doFinalization(M); + + // now we restore global variables + for (i = 0; i < n; i ++) + global_list.insert(global_list.end(), gv_array[i]); + + delete[] gv_array; + return ret; +} + +void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) +{ + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); + OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + + (ST.supportsDouble() ? "" + : ", map_f64_to_f32"))); + // .address_size directive is optional, but it must immediately follow + // the .target directive if present within a module + if (ST.supportsPTX23()) { + std::string addrSize = ST.is64Bit() ? "64" : "32"; + OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize)); + } + + OutStreamer.AddBlankLine(); + + // Define any .file directives + DebugInfoFinder DbgFinder; + DbgFinder.processModule(M); + + for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), + E = DbgFinder.compile_unit_end(); I != E; ++I) { + DICompileUnit DIUnit(*I); + StringRef FN = DIUnit.getFilename(); + StringRef Dir = DIUnit.getDirectory(); + GetOrCreateSourceID(FN, Dir); + } + + OutStreamer.AddBlankLine(); + + // declare global variables + for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); + i != e; ++i) + EmitVariableDeclaration(i); +} + +bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + EmitFunctionDeclaration(); + EmitFunctionBody(); + return false; +} + +void PTXAsmPrinter::EmitFunctionBodyStart() { + OutStreamer.EmitRawText(Twine("{")); + + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + + // Print local variable definition + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) { + unsigned reg = *i; + + std::string def = "\t.reg ."; + def += getRegisterTypeName(reg); + def += ' '; + def += getRegisterName(reg); + def += ';'; + OutStreamer.EmitRawText(Twine(def)); + } + + const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); + DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() + << " frame object(s)\n"); + for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { + DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); + if (FrameInfo->getObjectSize(i) > 0) { + std::string def = "\t.reg .b"; + def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits + def += " s"; + def += utostr(i); + def += ";"; + OutStreamer.EmitRawText(Twine(def)); + } + } +} + +void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + std::string str; + str.reserve(64); + + raw_string_ostream OS(str); + + DebugLoc DL = MI->getDebugLoc(); + if (!DL.isUnknown()) { + + const MDNode *S = DL.getScope(MF->getFunction()->getContext()); + + // This is taken from DwarfDebug.cpp, which is conveniently not a public + // LLVM class. + StringRef Fn; + StringRef Dir; + unsigned Src = 1; + if (S) { + DIDescriptor Scope(S); + if (Scope.isCompileUnit()) { + DICompileUnit CU(S); + Fn = CU.getFilename(); + Dir = CU.getDirectory(); + } else if (Scope.isFile()) { + DIFile F(S); + Fn = F.getFilename(); + Dir = F.getDirectory(); + } else if (Scope.isSubprogram()) { + DISubprogram SP(S); + Fn = SP.getFilename(); + Dir = SP.getDirectory(); + } else if (Scope.isLexicalBlock()) { + DILexicalBlock DB(S); + Fn = DB.getFilename(); + Dir = DB.getDirectory(); + } else + assert(0 && "Unexpected scope info"); + + Src = GetOrCreateSourceID(Fn, Dir); + } + OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(), + 0, 0, 0, Fn); + + const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc(); + + OS << "\t.loc "; + OS << utostr(MDL.getFileNum()); + OS << " "; + OS << utostr(MDL.getLine()); + OS << " "; + OS << utostr(MDL.getColumn()); + OS << "\n"; + } + + + // Emit predicate + printPredicateOperand(MI, OS); + + // Write instruction to str + printInstruction(MI, OS); + OS << ';'; + OS.flush(); + + StringRef strref = StringRef(str); + OutStreamer.EmitRawText(strref); +} + +void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS) { + const MachineOperand &MO = MI->getOperand(opNum); + + switch (MO.getType()) { + default: + llvm_unreachable("<unknown operand type>"); + break; + case MachineOperand::MO_GlobalAddress: + OS << *Mang->getSymbol(MO.getGlobal()); + break; + case MachineOperand::MO_Immediate: + OS << (long) MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << *MO.getMBB()->getSymbol(); + break; + case MachineOperand::MO_Register: + OS << getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_FPImmediate: + APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); + bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID; + // Emit 0F for 32-bit floats and 0D for 64-bit doubles. + if (isFloat) { + OS << "0F"; + } + else { + OS << "0D"; + } + // Emit the encoded floating-point value. + if (constFP.getZExtValue() > 0) { + OS << constFP.toString(16, false); + } + else { + OS << "00000000"; + // If We have a double-precision zero, pad to 8-bytes. + if (!isFloat) { + OS << "00000000"; + } + } + break; + } +} + +void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + printOperand(MI, opNum, OS); + + if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) + return; // don't print "+0" + + OS << "+"; + printOperand(MI, opNum+1, OS); +} + +void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +} + +void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, + raw_ostream &OS, const char *Modifier) { + OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +} + +void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(gv)) + return; + + MCSymbol *gvsym = Mang->getSymbol(gv); + + assert(gvsym->isUndefined() && "Cannot define a symbol twice!"); + + std::string decl; + + // check if it is defined in some other translation unit + if (gv->isDeclaration()) + decl += ".extern "; + + // state space: e.g., .global + decl += "."; + decl += getStateSpaceName(gv->getType()->getAddressSpace()); + decl += " "; + + // alignment (optional) + unsigned alignment = gv->getAlignment(); + if (alignment != 0) { + decl += ".align "; + decl += utostr(Log2_32(gv->getAlignment())); + decl += " "; + } + + + if (PointerType::classof(gv->getType())) { + const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType()); + const Type* elementTy = pointerTy->getElementType(); + + decl += ".b8 "; + decl += gvsym->getName(); + decl += "["; + + if (elementTy->isArrayTy()) + { + assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); + + const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy); + elementTy = arrayTy->getElementType(); + + unsigned numElements = arrayTy->getNumElements(); + + while (elementTy->isArrayTy()) { + + arrayTy = dyn_cast<const ArrayType>(elementTy); + elementTy = arrayTy->getElementType(); + + numElements *= arrayTy->getNumElements(); + } + + // FIXME: isPrimitiveType() == false for i16? + assert(elementTy->isSingleValueType() && + "Non-primitive types are not handled"); + + // Compute the size of the array, in bytes. + uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3) + * numElements; + + decl += utostr(arraySize); + } + + decl += "]"; + + // handle string constants (assume ConstantArray means string) + + if (gv->hasInitializer()) + { + const Constant *C = gv->getInitializer(); + if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) + { + decl += " = {"; + + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + { + if (i > 0) decl += ","; + + decl += "0x" + + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue()); + } + + decl += "}"; + } + } + } + else { + // Note: this is currently the fall-through case and most likely generates + // incorrect code. + decl += getTypeName(gv->getType()); + decl += " "; + + decl += gvsym->getName(); + + if (ArrayType::classof(gv->getType()) || + PointerType::classof(gv->getType())) + decl += "[]"; + } + + decl += ";"; + + OutStreamer.EmitRawText(Twine(decl)); + + OutStreamer.AddBlankLine(); +} + +void PTXAsmPrinter::EmitFunctionDeclaration() { + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (!CurrentFnSym->isUndefined()) { + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); + return; + } + + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const bool isKernel = MFI->isKernel(); + const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + + std::string decl = isKernel ? ".entry" : ".func"; + + unsigned cnt = 0; + + if (!isKernel) { + decl += " ("; + for (PTXMachineFunctionInfo::ret_iterator + i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); + } + decl += ")"; + } + + // Print function name + decl += " "; + decl += CurrentFnSym->getName().str(); + + decl += " ("; + + cnt = 0; + + // Print parameters + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + if (isKernel || ST.useParamSpaceForDeviceArgs()) { + decl += ".param .b"; + decl += utostr(*i); + decl += " "; + decl += PARAM_PREFIX; + decl += utostr(++cnt); + } else { + decl += ".reg ."; + decl += getRegisterTypeName(*i); + decl += " "; + decl += getRegisterName(*i); + } + } + decl += ")"; + + OutStreamer.EmitRawText(Twine(decl)); +} + +void PTXAsmPrinter:: +printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { + int i = MI->findFirstPredOperandIdx(); + if (i == -1) + llvm_unreachable("missing predicate operand"); + + unsigned reg = MI->getOperand(i).getReg(); + int predOp = MI->getOperand(i+1).getImm(); + + DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n"); + + if (reg != PTX::NoRegister) { + O << '@'; + if (predOp == PTX::PRED_NEGATE) + O << '!'; + O << getRegisterName(reg); + } +} + +unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If FE did not provide a file name, then assume stdin. + if (FileName.empty()) + return GetOrCreateSourceID("<stdin>", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !sys::path::is_absolute(FileName)) { + SmallString<128> FullPathName = DirName; + sys::path::append(FullPathName, FileName); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } + + StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName); + if (Entry.getValue()) + return Entry.getValue(); + + unsigned SrcId = SourceIdMap.size(); + Entry.setValue(SrcId); + + // Print out a .file directive to specify files for .loc directives. + OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); + + return SrcId; +} + +#include "PTXGenAsmWriter.inc" + +// Force static initialization. +extern "C" void LLVMInitializePTXAsmPrinter() { + RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target); + RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td new file mode 100644 index 0000000..3e3ff48 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td @@ -0,0 +1,29 @@ + +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, + CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, + CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, + CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, + CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, + CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> +]>; diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp new file mode 100644 index 0000000..b621b9d --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp @@ -0,0 +1,24 @@ +//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PTXFrameLowering.h" +#include "llvm/CodeGen/MachineFunction.h" + +using namespace llvm; + +void PTXFrameLowering::emitPrologue(MachineFunction &MF) const { +} + +void PTXFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { +} diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h new file mode 100644 index 0000000..9320676 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h @@ -0,0 +1,44 @@ +//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_FRAMEINFO_H +#define PTX_FRAMEINFO_H + +#include "PTX.h" +#include "PTXSubtarget.h" +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { + class PTXSubtarget; + +class PTXFrameLowering : public TargetFrameLowering { +protected: + const PTXSubtarget &STI; + +public: + explicit PTXFrameLowering(const PTXSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), + STI(sti) { + } + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + bool hasFP(const MachineFunction &MF) const { return false; } +}; + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp new file mode 100644 index 0000000..9adfa62 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -0,0 +1,182 @@ +//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the PTX target. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +// PTXDAGToDAGISel - PTX specific code to select PTX machine +// instructions for SelectionDAG operations. +class PTXDAGToDAGISel : public SelectionDAGISel { + public: + PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); + + virtual const char *getPassName() const { + return "PTX DAG->DAG Pattern Instruction Selection"; + } + + SDNode *Select(SDNode *Node); + + // Complex Pattern Selectors. + bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); + bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); + + // Include the pieces auto'gened from the target description +#include "PTXGenDAGISel.inc" + + private: + // We need this only because we can't match intruction BRAdp + // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td + SDNode *SelectBRCOND(SDNode *Node); + + bool isImm(const SDValue &operand); + bool SelectImm(const SDValue &operand, SDValue &imm); + + const PTXSubtarget& getSubtarget() const; +}; // class PTXDAGToDAGISel +} // namespace + +// createPTXISelDag - This pass converts a legalized DAG into a +// PTX-specific DAG, ready for instruction scheduling +FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXDAGToDAGISel(TM, OptLevel); +} + +PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) + : SelectionDAGISel(TM, OptLevel) {} + +SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { + switch (Node->getOpcode()) { + case ISD::BRCOND: + return SelectBRCOND(Node); + default: + return SelectCode(Node); + } +} + +SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { + assert(Node->getNumOperands() >= 3); + + SDValue Chain = Node->getOperand(0); + SDValue Pred = Node->getOperand(1); + SDValue Target = Node->getOperand(2); // branch target + SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + assert(Target.getOpcode() == ISD::BasicBlock); + assert(Pred.getValueType() == MVT::i1); + + // Emit BRAdp + SDValue Ops[] = { Target, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); +} + +// Match memory operand of the form [reg+reg] +bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { + if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || + isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1))) + return false; + + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + R1 = Addr; + R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + return true; +} + +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + if (Addr.getOpcode() != ISD::ADD) { + // let SelectADDRii handle the [imm] case + if (isImm(Addr)) + return false; + // it is [reg] + + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + return true; + } + + if (Addr.getNumOperands() < 2) + return false; + + // let SelectADDRii handle the [imm+imm] case + if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1))) + return false; + + // try [reg+imm] and [imm+reg] + for (int i = 0; i < 2; i ++) + if (SelectImm(Addr.getOperand(1-i), Offset)) { + Base = Addr.getOperand(i); + return true; + } + + // neither [reg+imm] nor [imm+reg] + return false; +} + +// Match memory operand of the form [imm+imm] and [imm] +bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + // is [imm+imm]? + if (Addr.getOpcode() == ISD::ADD) { + return SelectImm(Addr.getOperand(0), Base) && + SelectImm(Addr.getOperand(1), Offset); + } + + // is [imm]? + if (SelectImm(Addr, Base)) { + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + return true; + } + + return false; +} + +bool PTXDAGToDAGISel::isImm(const SDValue &operand) { + return ConstantSDNode::classof(operand.getNode()); +} + +bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { + SDNode *node = operand.getNode(); + if (!ConstantSDNode::classof(node)) + return false; + + ConstantSDNode *CN = cast<ConstantSDNode>(node); + imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), + operand.getValueType()); + return true; +} + +const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const +{ + return TM.getSubtarget<PTXSubtarget>(); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp new file mode 100644 index 0000000..6fcf710 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp @@ -0,0 +1,347 @@ +//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXISelLowering.h" +#include "PTXMachineFunctionInfo.h" +#include "PTXRegisterInfo.h" +#include "PTXSubtarget.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +#include "PTXGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation +//===----------------------------------------------------------------------===// + +PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) + : TargetLowering(TM, new TargetLoweringObjectFileELF()) { + // Set up the register classes. + addRegisterClass(MVT::i1, PTX::RegPredRegisterClass); + addRegisterClass(MVT::i16, PTX::RegI16RegisterClass); + addRegisterClass(MVT::i32, PTX::RegI32RegisterClass); + addRegisterClass(MVT::i64, PTX::RegI64RegisterClass); + addRegisterClass(MVT::f32, PTX::RegF32RegisterClass); + addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); + + setBooleanContents(ZeroOrOneBooleanContent); + setMinFunctionAlignment(2); + + //////////////////////////////////// + /////////// Expansion ////////////// + //////////////////////////////////// + + // (any/zero/sign) extload => load + (any/zero/sign) extend + + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); + + // f32 extload => load + fextend + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // f64 truncstore => trunc + store + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // sign_extend_inreg => sign_extend + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // br_cc => brcond + + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // select_cc => setcc + + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + //////////////////////////////////// + //////////// Legal ///////////////// + //////////////////////////////////// + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + //////////////////////////////////// + //////////// Custom //////////////// + //////////////////////////////////// + + // customise setcc to use bitwise logic if possible + + setOperationAction(ISD::SETCC, MVT::i1, Custom); + + // customize translation of memory addresses + + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + + // Compute derived properties from the register classes + computeRegisterProperties(); +} + +MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i1; +} + +SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unimplemented operand"); + case ISD::SETCC: + return LowerSETCC(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + } +} + +const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: + llvm_unreachable("Unknown opcode"); + case PTXISD::COPY_ADDRESS: + return "PTXISD::COPY_ADDRESS"; + case PTXISD::LOAD_PARAM: + return "PTXISD::LOAD_PARAM"; + case PTXISD::STORE_PARAM: + return "PTXISD::STORE_PARAM"; + case PTXISD::EXIT: + return "PTXISD::EXIT"; + case PTXISD::RET: + return "PTXISD::RET"; + } +} + +//===----------------------------------------------------------------------===// +// Custom Lower Operation +//===----------------------------------------------------------------------===// + +SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + + // Look for X == 0, X == 1, X != 0, or X != 1 + // We can simplify these to bitwise logic + + if (Op1.getOpcode() == ISD::Constant && + (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || + cast<ConstantSDNode>(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + + return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + } + + return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); +} + +SDValue PTXTargetLowering:: +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + + assert(PtrVT.isSimple() && "Pointer must be to primitive type."); + + SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS, + dl, + PtrVT.getSimpleVT(), + targetGlobal); + + return movInstr; +} + +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// + +SDValue PTXTargetLowering:: + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + if (isVarArg) llvm_unreachable("PTX does not support varargs"); + + MachineFunction &MF = DAG.getMachineFunction(); + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention"); + break; + case CallingConv::PTX_Kernel: + MFI->setKernel(true); + break; + case CallingConv::PTX_Device: + MFI->setKernel(false); + break; + } + + // We do one of two things here: + // IsKernel || SM >= 2.0 -> Use param space for arguments + // SM < 2.0 -> Use registers for arguments + if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { + // We just need to emit the proper LOAD_PARAM ISDs + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + + assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && + "Kernels cannot take pred operands"); + + SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + DAG.getTargetConstant(i, MVT::i32)); + InVals.push_back(ArgValue); + + // Instead of storing a physical register in our argument list, we just + // store the total size of the parameter, in bits. The ASM printer + // knows how to process this. + MFI->addArgReg(Ins[i].VT.getStoreSizeInBits()); + } + } + else { + // For device functions, we use the PTX calling convention to do register + // assignments then create CopyFromReg ISDs for the allocated registers + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, + *DAG.getContext()); + + CCInfo.AnalyzeFormalArguments(Ins, CC_PTX); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + + CCValAssign& VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + TargetRegisterClass* TRC = 0; + + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + } + else { + llvm_unreachable("Unknown parameter type"); + } + + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); + MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); + InVals.push_back(ArgValue); + + MFI->addArgReg(VA.getLocReg()); + } + } + + return Chain; +} + +SDValue PTXTargetLowering:: + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, + SelectionDAG &DAG) const { + if (isVarArg) llvm_unreachable("PTX does not support varargs"); + + switch (CallConv) { + default: + llvm_unreachable("Unsupported calling convention."); + case CallingConv::PTX_Kernel: + assert(Outs.size() == 0 && "Kernel must return void."); + return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); + case CallingConv::PTX_Device: + //assert(Outs.size() <= 1 && "Can at most return one value."); + break; + } + + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + + SDValue Flag; + + // Even though we could use the .param space for return arguments for + // device functions if SM >= 2.0 and the number of return arguments is + // only 1, we just always use registers since this makes the codegen + // easier. + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + CCInfo.AnalyzeReturn(Outs, RetCC_PTX); + + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign& VA = RVLocs[i]; + + assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + + unsigned Reg = VA.getLocReg(); + + DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); + + Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together, + // avoiding something bad + Flag = Chain.getValue(1); + + MFI->addRetReg(Reg); + } + + if (Flag.getNode() == 0) { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); + } + else { + return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); + } +} diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h new file mode 100644 index 0000000..4318541 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h @@ -0,0 +1,70 @@ +//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that PTX uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_ISEL_LOWERING_H +#define PTX_ISEL_LOWERING_H + +#include "llvm/Target/TargetLowering.h" + +namespace llvm { +class PTXSubtarget; +class PTXTargetMachine; + +namespace PTXISD { + enum NodeType { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + LOAD_PARAM, + STORE_PARAM, + EXIT, + RET, + COPY_ADDRESS + }; +} // namespace PTXISD + +class PTXTargetLowering : public TargetLowering { + public: + explicit PTXTargetLowering(TargetMachine &TM); + + virtual const char *getTargetNodeName(unsigned Opcode) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, + SelectionDAG &DAG) const; + + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + + private: + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; +}; // class PTXTargetLowering +} // namespace llvm + +#endif // PTX_ISEL_LOWERING_H diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td new file mode 100644 index 0000000..8cee351 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td @@ -0,0 +1,24 @@ +//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// PTX Predicate operand, default to (0, 0) = (zero-reg, always). +// Leave PrintMethod empty; predicate printing is defined elsewhere. +def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), + (ops (i1 zero_reg), (i32 0))>; + +let Namespace = "PTX" in { + class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern> + : Instruction { + dag OutOperandList = oops; + dag InOperandList = !con(iops, (ins pred:$_p)); + let AsmString = asmstr; // Predicate printing is defined elsewhere. + let Pattern = pattern; + let isPredicable = 1; + } +} diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp new file mode 100644 index 0000000..425265a --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp @@ -0,0 +1,410 @@ +//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-instrinfo" + +#include "PTX.h" +#include "PTXInstrInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define GET_INSTRINFO_CTOR +#include "PTXGenInstrInfo.inc" + +using namespace llvm; + +PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM) + : PTXGenInstrInfo(), + RI(_TM, *this), TM(_TM) {} + +static const struct map_entry { + const TargetRegisterClass *cls; + const int opcode; +} map[] = { + { &PTX::RegI16RegClass, PTX::MOVU16rr }, + { &PTX::RegI32RegClass, PTX::MOVU32rr }, + { &PTX::RegI64RegClass, PTX::MOVU64rr }, + { &PTX::RegF32RegClass, PTX::MOVF32rr }, + { &PTX::RegF64RegClass, PTX::MOVF64rr }, + { &PTX::RegPredRegClass, PTX::MOVPREDrr } +}; + +void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const { + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { + if (map[i].cls->contains(DstReg, SrcReg)) { + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). + addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPredicate(MI); + return; + } + } + + llvm_unreachable("Impossible reg-to-reg copy"); +} + +bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const { + if (DstRC != SrcRC) + return false; + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) + if (DstRC == map[i].cls) { + const MCInstrDesc &MCID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg); + AddDefaultPredicate(MI); + return true; + } + + return false; +} + +bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const { + switch (MI.getOpcode()) { + default: + return false; + case PTX::MOVU16rr: + case PTX::MOVU32rr: + case PTX::MOVU64rr: + case PTX::MOVF32rr: + case PTX::MOVF64rr: + case PTX::MOVPREDrr: + assert(MI.getNumOperands() >= 2 && + MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && + "Invalid register-register move instruction"); + SrcSubIdx = DstSubIdx = 0; // No sub-registers + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + return true; + } +} + +// predicate support + +bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { + int i = MI->findFirstPredOperandIdx(); + return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister; +} + +bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + return !isPredicated(MI) && get(MI->getOpcode()).isTerminator(); +} + +bool PTXInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const { + if (Pred.size() < 2) + llvm_unreachable("lesser than 2 predicate operands are provided"); + + int i = MI->findFirstPredOperandIdx(); + if (i == -1) + llvm_unreachable("missing predicate operand"); + + MI->getOperand(i).setReg(Pred[0].getReg()); + MI->getOperand(i+1).setImm(Pred[1].getImm()); + + return true; +} + +bool PTXInstrInfo:: +SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const { + const MachineOperand &PredReg1 = Pred1[0]; + const MachineOperand &PredReg2 = Pred2[0]; + if (PredReg1.getReg() != PredReg2.getReg()) + return false; + + const MachineOperand &PredOp1 = Pred1[1]; + const MachineOperand &PredOp2 = Pred2[1]; + if (PredOp1.getImm() != PredOp2.getImm()) + return false; + + return true; +} + +bool PTXInstrInfo:: +DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const { + // If an instruction sets a predicate register, it defines a predicate. + + // TODO supprot 5-operand format of setp instruction + + if (MI->getNumOperands() < 1) + return false; + + const MachineOperand &MO = MI->getOperand(0); + + if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass) + return false; + + Pred.push_back(MO); + Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + return true; +} + +// branch support + +bool PTXInstrInfo:: +AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // TODO implement cases when AllowModify is true + + if (MBB.empty()) + return true; + + MachineBasicBlock::const_iterator iter = MBB.end(); + const MachineInstr& instLast1 = *--iter; + const MCInstrDesc &desc1 = instLast1.getDesc(); + // for special case that MBB has only 1 instruction + const bool IsSizeOne = MBB.size() == 1; + // if IsSizeOne is true, *--iter and instLast2 are invalid + // we put a dummy value in instLast2 and desc2 since they are used + const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; + const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); + + DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n"); + + // this block ends with no branches + if (!IsAnyKindOfBranch(instLast1)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n"); + return false; + } + + // this block ends with only an unconditional branch + if (desc1.isUnconditionalBranch() && + // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 + (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { + DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); + TBB = GetBranchTarget(instLast1); + return false; + } + + // this block ends with a conditional branch and + // it falls through to a successor block + if (desc1.isConditionalBranch() && + IsAnySuccessorAlsoLayoutSuccessor(MBB)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); + TBB = GetBranchTarget(instLast1); + int i = instLast1.findFirstPredOperandIdx(); + Cond.push_back(instLast1.getOperand(i)); + Cond.push_back(instLast1.getOperand(i+1)); + return false; + } + + // when IsSizeOne is true, we are done + if (IsSizeOne) + return true; + + // this block ends with a conditional branch + // followed by an unconditional branch + if (desc2.isConditionalBranch() && + desc1.isUnconditionalBranch()) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); + TBB = GetBranchTarget(instLast2); + FBB = GetBranchTarget(instLast1); + int i = instLast2.findFirstPredOperandIdx(); + Cond.push_back(instLast2.getOperand(i)); + Cond.push_back(instLast2.getOperand(i+1)); + return false; + } + + // branch cannot be understood + DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n"); + return true; +} + +unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + unsigned count = 0; + while (!MBB.empty()) + if (IsAnyKindOfBranch(MBB.back())) { + MBB.pop_back(); + ++count; + } else + break; + DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n"); + return count; +} + +unsigned PTXInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: TBB: (NULL)\n"); + DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: FBB: (NULL)\n"); + DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n"); + + assert(TBB && "TBB is NULL"); + + if (FBB) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + return 2; + } else if (Cond.size()) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + return 1; + } else { + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + return 1; + } +} + +// Memory operand folding for spills +void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineInstr& MI = *MII; + DebugLoc DL = MI.getDebugLoc(); + + DEBUG(dbgs() << "storeRegToStackSlot: " << MI); + + int OpCode; + + // Select the appropriate opcode based on the register class + if (RC == PTX::RegI16RegisterClass) { + OpCode = PTX::STACKSTOREI16; + } else if (RC == PTX::RegI32RegisterClass) { + OpCode = PTX::STACKSTOREI32; + } else if (RC == PTX::RegI64RegisterClass) { + OpCode = PTX::STACKSTOREI32; + } else if (RC == PTX::RegF32RegisterClass) { + OpCode = PTX::STACKSTOREF32; + } else if (RC == PTX::RegF64RegisterClass) { + OpCode = PTX::STACKSTOREF64; + } else { + llvm_unreachable("Unknown PTX register class!"); + } + + // Build the store instruction (really a mov) + MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); + MIB.addFrameIndex(FrameIdx); + MIB.addReg(SrcReg); + + AddDefaultPredicate(MIB); +} + +void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + MachineInstr& MI = *MII; + DebugLoc DL = MI.getDebugLoc(); + + DEBUG(dbgs() << "loadRegToStackSlot: " << MI); + + int OpCode; + + // Select the appropriate opcode based on the register class + if (RC == PTX::RegI16RegisterClass) { + OpCode = PTX::STACKLOADI16; + } else if (RC == PTX::RegI32RegisterClass) { + OpCode = PTX::STACKLOADI32; + } else if (RC == PTX::RegI64RegisterClass) { + OpCode = PTX::STACKLOADI32; + } else if (RC == PTX::RegF32RegisterClass) { + OpCode = PTX::STACKLOADF32; + } else if (RC == PTX::RegF64RegisterClass) { + OpCode = PTX::STACKLOADF64; + } else { + llvm_unreachable("Unknown PTX register class!"); + } + + // Build the load instruction (really a mov) + MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); + MIB.addReg(DestReg); + MIB.addFrameIndex(FrameIdx); + + AddDefaultPredicate(MIB); +} + +// static helper routines + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue ops[] = { Op1, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue ops[] = { Op1, Op2, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { + if (MI->findFirstPredOperandIdx() == -1) { + MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + } +} + +bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { + const MCInstrDesc &desc = inst.getDesc(); + return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); +} + +bool PTXInstrInfo:: +IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) { + for (MachineBasicBlock::const_succ_iterator + i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i) + if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i)) + return true; + return false; +} + +MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) { + // FIXME So far all branch instructions put destination in 1st operand + const MachineOperand& target = inst.getOperand(0); + assert(target.isMBB() && "FIXME: detect branch target operand"); + return target.getMBB(); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h new file mode 100644 index 0000000..871f1ac --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h @@ -0,0 +1,133 @@ +//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_INSTR_INFO_H +#define PTX_INSTR_INFO_H + +#include "PTXRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "PTXGenInstrInfo.inc" + +namespace llvm { +class PTXTargetMachine; + +class MachineSDNode; +class SDValue; +class SelectionDAG; + +class PTXInstrInfo : public PTXGenInstrInfo { +private: + const PTXRegisterInfo RI; + PTXTargetMachine &TM; + +public: + explicit PTXInstrInfo(PTXTargetMachine &_TM); + + virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const; + + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; + + virtual bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + + // predicate support + + virtual bool isPredicated(const MachineInstr *MI) const; + + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl<MachineOperand> &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, + const SmallVectorImpl<MachineOperand> &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector<MachineOperand> &Pred) const; + + // PTX is fully-predicable + virtual bool isPredicable(MachineInstr *MI) const { return true; } + + // branch support + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify = false) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + // Memory operand folding for spills + // TODO: Implement this eventually and get rid of storeRegToStackSlot and + // loadRegFromStackSlot. Doing so will get rid of the "stack" registers + // we currently use to spill, though I doubt the overall effect on ptxas + // output will be large. I have yet to see a case where ptxas is unable + // to see through the "stack" register usage and hence generates + // efficient code anyway. + // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + // MachineInstr* MI, + // const SmallVectorImpl<unsigned> &Ops, + // int FrameIndex) const; + + virtual void storeRegToStackSlot(MachineBasicBlock& MBB, + MachineBasicBlock::iterator MII, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass* RC, + const TargetRegisterInfo* TRI) const; + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MII, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + // static helper routines + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1); + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2); + + static void AddDefaultPredicate(MachineInstr *MI); + + static bool IsAnyKindOfBranch(const MachineInstr& inst); + + static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB); + + static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst); +}; // class PTXInstrInfo +} // namespace llvm + +#endif // PTX_INSTR_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td new file mode 100644 index 0000000..6bfe906 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td @@ -0,0 +1,1102 @@ +//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "PTXInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Code Generation Predicates +//===----------------------------------------------------------------------===// + +// Addressing +def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; +def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; + +// Shader Model Support +def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; +def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; +def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">; +def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">; + +// PTX Version Support +def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; +def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">; +def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">; +def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">; +def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">; +def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">; + +// Fused-Multiply Add +def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">; +def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; + +//===----------------------------------------------------------------------===// +// Instruction Pattern Stuff +//===----------------------------------------------------------------------===// + +def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::GLOBAL; + return false; +}]>; + +def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::CONSTANT; + return false; +}]>; + +def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::LOCAL; + return false; +}]>; + +def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::PARAMETER; + return false; +}]>; + +def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::SHARED; + return false; +}]>; + +def store_global + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::GLOBAL; + return false; +}]>; + +def store_local + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::LOCAL; + return false; +}]>; + +def store_parameter + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::PARAMETER; + return false; +}]>; + +def store_shared + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTX::SHARED; + return false; +}]>; + +// Addressing modes. +def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; +def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; +def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; + +// Address operands +def MEMri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI32, i32imm); +} +def MEMri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI64, i64imm); +} +def MEMii32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def MEMii64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +// The operand here does not correspond to an actual address, so we +// can use i32 in 64-bit address modes. +def MEMpi : Operand<i32> { + let PrintMethod = "printParamOperand"; + let MIOperandInfo = (ops i32imm); +} +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} + +// Branch & call targets have OtherVT type. +def brtarget : Operand<OtherVT>; +def calltarget : Operand<i32>; + +//===----------------------------------------------------------------------===// +// PTX Specific Node Definitions +//===----------------------------------------------------------------------===// + +// PTX allow generic 3-reg shifts like shl r0, r1, r2 +def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>; +def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>; +def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>; + +def PTXexit + : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; +def PTXret + : SDNode<"PTXISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PTXcopyaddress + : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; + +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// + +//===- Floating-Point Instructions - 2 Operand Form -----------------------===// +multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> { + def rr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), + !strconcat(opcstr, ".f32\t$d, $a"), + [(set RegF32:$d, (opnode RegF32:$a))]>; + def ri32 : InstPTX<(outs RegF32:$d), + (ins f32imm:$a), + !strconcat(opcstr, ".f32\t$d, $a"), + [(set RegF32:$d, (opnode fpimm:$a))]>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), + !strconcat(opcstr, ".f64\t$d, $a"), + [(set RegF64:$d, (opnode RegF64:$a))]>; + def ri64 : InstPTX<(outs RegF64:$d), + (ins f64imm:$a), + !strconcat(opcstr, ".f64\t$d, $a"), + [(set RegF64:$d, (opnode fpimm:$a))]>; +} + +//===- Floating-Point Instructions - 3 Operand Form -----------------------===// +multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> { + def rr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>; + def ri32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>; + def rr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>; + def ri64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>; +} + +//===- Floating-Point Instructions - 4 Operand Form -----------------------===// +multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> { + def rrr32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b, RegF32:$c), + !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), + [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, + RegF32:$b), + RegF32:$c))]>; + def rri32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b, f32imm:$c), + !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), + [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, + RegF32:$b), + fpimm:$c))]>; + def rrr64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b, RegF64:$c), + !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), + [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, + RegF64:$b), + RegF64:$c))]>; + def rri64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b, f64imm:$c), + !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), + [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, + RegF64:$b), + fpimm:$c))]>; +} + +multiclass INT3<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +multiclass PTX_LOGIC<string opcstr, SDNode opnode> { + def ripreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, i1imm:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>; + def rrpreds : InstPTX<(outs RegPred:$d), + (ins RegPred:$a, RegPred:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>; + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +multiclass INT3ntnc<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; + def ir16 : InstPTX<(outs RegI16:$d), + (ins i16imm:$a, RegI16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>; + def ir32 : InstPTX<(outs RegI32:$d), + (ins i32imm:$a, RegI32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>; + def ir64 : InstPTX<(outs RegI64:$d), + (ins i64imm:$a, RegI64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; +} + +multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, + CondCode cmp, string cmpstr> { + // TODO support 5-operand format: p|q, a, b, c + + def rr + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>; + def ri + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>; + + def rr_and_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; + def ri_and_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + def rr_or_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; + def ri_or_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + def rr_xor_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; + def ri_xor_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + + def rr_and_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + def ri_and_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + def rr_or_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + def ri_or_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + def rr_xor_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + def ri_xor_not_r + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; +} + +multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, + CondCode ucmp, CondCode ocmp, string cmpstr> { + // TODO support 5-operand format: p|q, a, b, c + + def rr_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>; + def rr_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; + + def rr_and_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + def rr_and_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + + def rr_or_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + def rr_or_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + + def rr_xor_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + def rr_xor_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + + def rr_and_not_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + def rr_and_not_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + + def rr_or_not_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + def rr_or_not_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + + def rr_xor_not_r_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + def rr_xor_not_r_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; +} + +multiclass PTX_SELP<RegisterClass RC, string regclsname> { + def rr + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; +} + +multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { + def rr32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs RC:$d), + (ins MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs RC:$d), + (ins MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>; +} + +multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { + defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; +} + +multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { + def rr32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs), + (ins RC:$d, MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs), + (ins RC:$d, MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>; +} + +multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { + defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; +} + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +///===- Integer Arithmetic Instructions -----------------------------------===// + +defm ADD : INT3<"add", add>; +defm SUB : INT3<"sub", sub>; +defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies +defm DIV : INT3<"div", udiv>; +defm REM : INT3<"rem", urem>; + +///===- Floating-Point Arithmetic Instructions ----------------------------===// + +// Standard Unary Operations +defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; + +// Standard Binary Operations +defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>; +defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>; +defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>; + +// For floating-point division: +// SM_13+ defaults to .rn for f32 and f64, +// SM10 must *not* provide a rounding + +// TODO: +// - Allow user selection of rounding modes for fdiv +// - Add support for -prec-div=false (.approx) + +def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), + "div.rn.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVri32SM13 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), + "div.rn.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, RegF32:$b), + "div.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, + Requires<[FDivNoRoundingMode]>; +def FDIVri32SM10 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a, f32imm:$b), + "div.f32\t$d, $a, $b", + [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, + Requires<[FDivNoRoundingMode]>; + +def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), + "div.rn.f64\t$d, $a, $b", + [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVri64SM13 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), + "div.rn.f64\t$d, $a, $b", + [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, + Requires<[FDivNeedsRoundingMode]>; +def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, RegF64:$b), + "div.f64\t$d, $a, $b", + [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, + Requires<[FDivNoRoundingMode]>; +def FDIVri64SM10 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a, f64imm:$b), + "div.f64\t$d, $a, $b", + [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, + Requires<[FDivNoRoundingMode]>; + + + +// Multi-operation hybrid instructions + +// The selection of mad/fma is tricky. In some cases, they are the *same* +// instruction, but in other cases we may prefer one or the other. Also, +// different PTX versions differ on whether rounding mode flags are required. +// In the short term, mad is supported on all PTX versions and we use a +// default rounding mode no matter what shader model or PTX version. +// TODO: Allow the rounding mode to be selectable through llc. +defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, + Requires<[FMadNeedsRoundingMode, SupportsFMA]>; +defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, + Requires<[FMadNoRoundingMode, SupportsFMA]>; + +///===- Floating-Point Intrinsic Instructions -----------------------------===// + +def FSQRT32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), + "sqrt.rn.f32\t$d, $a", + [(set RegF32:$d, (fsqrt RegF32:$a))]>; + +def FSQRT64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), + "sqrt.rn.f64\t$d, $a", + [(set RegF64:$d, (fsqrt RegF64:$a))]>; + +def FSIN32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), + "sin.approx.f32\t$d, $a", + [(set RegF32:$d, (fsin RegF32:$a))]>; + +def FSIN64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), + "sin.approx.f64\t$d, $a", + [(set RegF64:$d, (fsin RegF64:$a))]>; + +def FCOS32 : InstPTX<(outs RegF32:$d), + (ins RegF32:$a), + "cos.approx.f32\t$d, $a", + [(set RegF32:$d, (fcos RegF32:$a))]>; + +def FCOS64 : InstPTX<(outs RegF64:$d), + (ins RegF64:$a), + "cos.approx.f64\t$d, $a", + [(set RegF64:$d, (fcos RegF64:$a))]>; + + +///===- Comparison and Selection Instructions -----------------------------===// + +// .setp + +// Compare u16 + +defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">; +defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">; +defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">; +defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">; +defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">; +defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">; +defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">; +defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">; +defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">; +defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">; + +// Compare u32 + +defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">; +defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">; +defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">; +defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">; +defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">; +defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">; +defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">; +defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">; +defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">; +defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">; + +// Compare u64 + +defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">; +defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">; +defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">; +defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">; +defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">; +defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">; +defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">; +defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">; +defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">; +defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; + +// Compare f32 + +defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">; + +// Compare f64 + +defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">; + +// .selp + +defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">; +defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">; +defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">; +defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">; +defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">; + +///===- Logic and Shift Instructions --------------------------------------===// + +defm SHL : INT3ntnc<"shl.b", PTXshl>; +defm SRL : INT3ntnc<"shr.u", PTXsrl>; +defm SRA : INT3ntnc<"shr.s", PTXsra>; + +defm AND : PTX_LOGIC<"and", and>; +defm OR : PTX_LOGIC<"or", or>; +defm XOR : PTX_LOGIC<"xor", xor>; + +///===- Data Movement and Conversion Instructions -------------------------===// + +let neverHasSideEffects = 1 in { + def MOVPREDrr + : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; + def MOVU16rr + : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>; + def MOVU32rr + : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>; + def MOVU64rr + : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>; + def MOVF32rr + : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>; + def MOVF64rr + : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVPREDri + : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a", + [(set RegPred:$d, imm:$a)]>; + def MOVU16ri + : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", + [(set RegI16:$d, imm:$a)]>; + def MOVU32ri + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, imm:$a)]>; + def MOVU64ri + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, imm:$a)]>; + def MOVF32ri + : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", + [(set RegF32:$d, fpimm:$a)]>; + def MOVF64ri + : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", + [(set RegF64:$d, fpimm:$a)]>; +} + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVaddr32 + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>; + def MOVaddr64 + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; +} + +// Loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDl : PTX_LD_ALL<"ld.local", load_local>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; + +// These instructions are used to load/store from the .param space for +// device and kernel parameters + +let hasSideEffects = 1 in { + def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a), + "ld.param.pred\t$d, [$a]", + [(set RegPred:$d, (PTXloadparam timm:$a))]>; + def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", + [(set RegI16:$d, (PTXloadparam timm:$a))]>; + def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", + [(set RegI32:$d, (PTXloadparam timm:$a))]>; + def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", + [(set RegI64:$d, (PTXloadparam timm:$a))]>; + def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", + [(set RegF32:$d, (PTXloadparam timm:$a))]>; + def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", + [(set RegF64:$d, (PTXloadparam timm:$a))]>; + + def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a), + "st.param.pred\t[$d], $a", + [(PTXstoreparam timm:$d, RegPred:$a)]>; + def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a), + "st.param.u16\t[$d], $a", + [(PTXstoreparam timm:$d, RegI16:$a)]>; + def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a), + "st.param.u32\t[$d], $a", + [(PTXstoreparam timm:$d, RegI32:$a)]>; + def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a), + "st.param.u64\t[$d], $a", + [(PTXstoreparam timm:$d, RegI64:$a)]>; + def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a), + "st.param.f32\t[$d], $a", + [(PTXstoreparam timm:$d, RegF32:$a)]>; + def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a), + "st.param.f64\t[$d], $a", + [(PTXstoreparam timm:$d, RegF64:$a)]>; +} + +// Stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STl : PTX_ST_ALL<"st.local", store_local>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// defm STp : PTX_ST_ALL<"st.param", store_parameter>; +// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; +// TODO: Do something with st.param if/when it is needed. + +// Conversion to pred +// PTX does not directly support converting to a predicate type, so we fake it +// by performing a greater-than test between the value and zero. This follows +// the C convention that any non-zero value is equivalent to 'true'. +def CVT_pred_u16 + : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI16:$a))]>; + +def CVT_pred_u32 + : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI32:$a))]>; + +def CVT_pred_u64 + : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0", + [(set RegPred:$d, (trunc RegI64:$a))]>; + +def CVT_pred_f32 + : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0", + [(set RegPred:$d, (fp_to_uint RegF32:$a))]>; + +def CVT_pred_f64 + : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0", + [(set RegPred:$d, (fp_to_uint RegF64:$a))]>; + +// Conversion to u16 +// PTX does not directly support converting a predicate to a value, so we +// use a select instruction to select either 0 or 1 (integer or fp) based +// on the truth value of the predicate. +def CVT_u16_preda + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (anyext RegPred:$a))]>; + +def CVT_u16_pred + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (zext RegPred:$a))]>; + +def CVT_u16_preds + : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", + [(set RegI16:$d, (sext RegPred:$a))]>; + +def CVT_u16_u32 + : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", + [(set RegI16:$d, (trunc RegI32:$a))]>; + +def CVT_u16_u64 + : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", + [(set RegI16:$d, (trunc RegI64:$a))]>; + +def CVT_u16_f32 + : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", + [(set RegI16:$d, (fp_to_uint RegF32:$a))]>; + +def CVT_u16_f64 + : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", + [(set RegI16:$d, (fp_to_uint RegF64:$a))]>; + +// Conversion to u32 + +def CVT_u32_pred + : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", + [(set RegI32:$d, (zext RegPred:$a))]>; + +def CVT_u32_b16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", + [(set RegI32:$d, (anyext RegI16:$a))]>; + +def CVT_u32_u16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", + [(set RegI32:$d, (zext RegI16:$a))]>; + +def CVT_u32_preds + : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", + [(set RegI32:$d, (sext RegPred:$a))]>; + +def CVT_u32_s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a", + [(set RegI32:$d, (sext RegI16:$a))]>; + +def CVT_u32_u64 + : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", + [(set RegI32:$d, (trunc RegI64:$a))]>; + +def CVT_u32_f32 + : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", + [(set RegI32:$d, (fp_to_uint RegF32:$a))]>; + +def CVT_u32_f64 + : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", + [(set RegI32:$d, (fp_to_uint RegF64:$a))]>; + +// Conversion to u64 + +def CVT_u64_pred + : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", + [(set RegI64:$d, (zext RegPred:$a))]>; + +def CVT_u64_preds + : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", + [(set RegI64:$d, (sext RegPred:$a))]>; + +def CVT_u64_u16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", + [(set RegI64:$d, (zext RegI16:$a))]>; + +def CVT_u64_s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a", + [(set RegI64:$d, (sext RegI16:$a))]>; + +def CVT_u64_u32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", + [(set RegI64:$d, (zext RegI32:$a))]>; + +def CVT_u64_s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a", + [(set RegI64:$d, (sext RegI32:$a))]>; + +def CVT_u64_f32 + : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", + [(set RegI64:$d, (fp_to_uint RegF32:$a))]>; + +def CVT_u64_f64 + : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", + [(set RegI64:$d, (fp_to_uint RegF64:$a))]>; + +// Conversion to f32 + +def CVT_f32_pred + : InstPTX<(outs RegF32:$d), (ins RegPred:$a), + "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0 + [(set RegF32:$d, (uint_to_fp RegPred:$a))]>; + +def CVT_f32_u16 + : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI16:$a))]>; + +def CVT_f32_u32 + : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI32:$a))]>; + +def CVT_f32_u64 + : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", + [(set RegF32:$d, (uint_to_fp RegI64:$a))]>; + +def CVT_f32_f64 + : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", + [(set RegF32:$d, (fround RegF64:$a))]>; + +// Conversion to f64 + +def CVT_f64_pred + : InstPTX<(outs RegF64:$d), (ins RegPred:$a), + "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0 + [(set RegF64:$d, (uint_to_fp RegPred:$a))]>; + +def CVT_f64_u16 + : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI16:$a))]>; + +def CVT_f64_u32 + : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI32:$a))]>; + +def CVT_f64_u64 + : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", + [(set RegF64:$d, (uint_to_fp RegI64:$a))]>; + +def CVT_f64_f32 + : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", + [(set RegF64:$d, (fextend RegF32:$a))]>; + +///===- Control Flow Instructions -----------------------------------------===// + +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def BRAd + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; +} + +let isBranch = 1, isTerminator = 1 in { + // FIXME: The pattern part is blank because I cannot (or do not yet know + // how to) use the first operand of PredicateOperand (a RegPred register) here + def BRAdp + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", + [/*(brcond pred:$_p, bb:$d)*/]>; +} + +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; + def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; +} + +///===- Spill Instructions ------------------------------------------------===// +// Special instructions used for stack spilling +def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a), + "mov.u16\ts$d, $a", []>; +def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a), + "mov.u32\ts$d, $a", []>; +def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a), + "mov.u64\ts$d, $a", []>; +def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a), + "mov.f32\ts$d, $a", []>; +def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a), + "mov.f64\ts$d, $a", []>; + +def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a), + "mov.u16\t$d, s$a", []>; +def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a), + "mov.u32\t$d, s$a", []>; +def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a), + "mov.u64\t$d, s$a", []>; +def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a), + "mov.f32\t$d, s$a", []>; +def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a), + "mov.f64\t$d, s$a", []>; + +///===- Intrinsic Instructions --------------------------------------------===// + +include "PTXIntrinsicInstrInfo.td" diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td new file mode 100644 index 0000000..8d97909 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -0,0 +1,84 @@ +//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the PTX-specific intrinsic instructions. +// +//===----------------------------------------------------------------------===// + +// PTX Special Purpose Register Accessor Intrinsics + +class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop> + : InstPTX<(outs RegI64:$d), (ins), + !strconcat("mov.u64\t$d, %", regname), + [(set RegI64:$d, (intop))]>; + +class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> + : InstPTX<(outs RegI32:$d), (ins), + !strconcat("mov.u32\t$d, %", regname), + [(set RegI32:$d, (intop))]>; + +// TODO Add read vector-version of special registers + +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>; + +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>; + +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>; + +//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>; + +//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>; + +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>; + +def PTX_READ_LANEMASK_EQ + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; +def PTX_READ_LANEMASK_LE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; +def PTX_READ_LANEMASK_LT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; +def PTX_READ_LANEMASK_GE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; +def PTX_READ_LANEMASK_GT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; + +def PTX_READ_CLOCK + : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; +def PTX_READ_CLOCK64 + : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; + +def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; +def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; +def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; +def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; + +// PTX Parallel Synchronization and Communication Intrinsics + +def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i", + [(int_ptx_bar_sync imm:$i)]>; diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp new file mode 100644 index 0000000..b13a3da --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -0,0 +1,541 @@ +//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class PTXMCAsmStreamer : public MCStreamer { + formatted_raw_ostream &OS; + const MCAsmInfo &MAI; + OwningPtr<MCInstPrinter> InstPrinter; + OwningPtr<MCCodeEmitter> Emitter; + + SmallString<128> CommentToEmit; + raw_svector_ostream CommentStream; + + unsigned IsVerboseAsm : 1; + unsigned ShowInst : 1; + +public: + PTXMCAsmStreamer(MCContext &Context, + formatted_raw_ostream &os, + bool isVerboseAsm, bool useLoc, + MCInstPrinter *printer, + MCCodeEmitter *emitter, + bool showInst) + : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), + InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit), + IsVerboseAsm(isVerboseAsm), + ShowInst(showInst) { + if (InstPrinter && IsVerboseAsm) + InstPrinter->setCommentStream(CommentStream); + } + + ~PTXMCAsmStreamer() {} + + inline void EmitEOL() { + // If we don't have any comments, just emit a \n. + if (!IsVerboseAsm) { + OS << '\n'; + return; + } + EmitCommentsAndEOL(); + } + void EmitCommentsAndEOL(); + + /// isVerboseAsm - Return true if this streamer supports verbose assembly at + /// all. + virtual bool isVerboseAsm() const { return IsVerboseAsm; } + + /// hasRawTextSupport - We support EmitRawText. + virtual bool hasRawTextSupport() const { return true; } + + /// AddComment - Add a comment that can be emitted to the generated .s + /// file if applicable as a QoI issue to make the output of the compiler + /// more readable. This only affects the MCAsmStreamer, and only when + /// verbose assembly output is enabled. + virtual void AddComment(const Twine &T); + + /// AddEncodingComment - Add a comment showing the encoding of an instruction. + virtual void AddEncodingComment(const MCInst &Inst); + + /// GetCommentOS - Return a raw_ostream that comments can be written to. + /// Unlike AddComment, you are required to terminate comments with \n if you + /// use this method. + virtual raw_ostream &GetCommentOS() { + if (!IsVerboseAsm) + return nulls(); // Discard comments unless in verbose asm mode. + return CommentStream; + } + + /// AddBlankLine - Emit a blank line to a .s file to pretty it up. + virtual void AddBlankLine() { + EmitEOL(); + } + + /// @name MCStreamer Interface + /// @{ + + virtual void ChangeSection(const MCSection *Section); + virtual void InitSections() {} + + virtual void EmitLabel(MCSymbol *Symbol); + + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + + virtual void EmitThumbFunc(MCSymbol *Func); + + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + + virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label, + unsigned PointerSize); + + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + + /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol. + /// + /// @param Symbol - The common symbol to emit. + /// @param Size - The size of the common symbol. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0); + + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0); + + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace); + virtual void EmitULEB128Value(const MCExpr *Value); + virtual void EmitSLEB128Value(const MCExpr *Value); + virtual void EmitGPRel32Value(const MCExpr *Value); + + + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace); + + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0); + + virtual void EmitFileDirective(StringRef Filename); + virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename); + + virtual void EmitInstruction(const MCInst &Inst); + + /// EmitRawText - If this file is backed by an assembly streamer, this dumps + /// the specified string in the output .s file. This capability is + /// indicated by the hasRawTextSupport() predicate. + virtual void EmitRawText(StringRef String); + + virtual void Finish(); + + /// @} + +}; // class PTXMCAsmStreamer + +} + +/// TODO: Add appropriate implementation of Emit*() methods when needed + +void PTXMCAsmStreamer::AddComment(const Twine &T) { + if (!IsVerboseAsm) return; + + // Make sure that CommentStream is flushed. + CommentStream.flush(); + + T.toVector(CommentToEmit); + // Each comment goes on its own line. + CommentToEmit.push_back('\n'); + + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +void PTXMCAsmStreamer::EmitCommentsAndEOL() { + if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) { + OS << '\n'; + return; + } + + CommentStream.flush(); + StringRef Comments = CommentToEmit.str(); + + assert(Comments.back() == '\n' && + "Comment array not newline terminated"); + do { + // Emit a line of comments. + OS.PadToColumn(MAI.getCommentColumn()); + size_t Position = Comments.find('\n'); + OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; + + Comments = Comments.substr(Position+1); + } while (!Comments.empty()); + + CommentToEmit.clear(); + // Tell the comment stream that the vector changed underneath it. + CommentStream.resync(); +} + +static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { + assert(Bytes && "Invalid size!"); + return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8)); +} + +void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); +} + +void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { + assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + //assert(getCurrentSection() && "Cannot emit before setting section!"); + + OS << *Symbol << MAI.getLabelSuffix(); + EmitEOL(); + Symbol->setSection(*getCurrentSection()); +} + +void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} + +void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {} + +void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + OS << *Symbol << " = " << *Value; + EmitEOL(); + + // FIXME: Lift context changes into super class. + Symbol->setVariableValue(Value); +} + +void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias, + const MCSymbol *Symbol) { + OS << ".weakref " << *Alias << ", " << *Symbol; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label, + unsigned PointerSize) { + report_fatal_error("Unimplemented."); +} + +void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) {} + +void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + +void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + +void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {} + +void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {} + +void PTXMCAsmStreamer::EndCOFFSymbolDef() {} + +void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} + +void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} + +void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} + +void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size, unsigned ByteAlignment) {} + +void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, + MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} + +static inline char toOctal(int X) { return (X&7)+'0'; } + +static void PrintQuotedString(StringRef Data, raw_ostream &OS) { + OS << '"'; + + for (unsigned i = 0, e = Data.size(); i != e; ++i) { + unsigned char C = Data[i]; + if (C == '"' || C == '\\') { + OS << '\\' << (char)C; + continue; + } + + if (isprint((unsigned char)C)) { + OS << (char)C; + continue; + } + + switch (C) { + case '\b': OS << "\\b"; break; + case '\f': OS << "\\f"; break; + case '\n': OS << "\\n"; break; + case '\r': OS << "\\r"; break; + case '\t': OS << "\\t"; break; + default: + OS << '\\'; + OS << toOctal(C >> 6); + OS << toOctal(C >> 3); + OS << toOctal(C >> 0); + break; + } + } + + OS << '"'; +} + +void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + if (Data.empty()) return; + + if (Data.size() == 1) { + OS << MAI.getData8bitsDirective(AddrSpace); + OS << (unsigned)(unsigned char)Data[0]; + EmitEOL(); + return; + } + + // If the data ends with 0 and the target supports .asciz, use it, otherwise + // use .ascii + if (MAI.getAscizDirective() && Data.back() == 0) { + OS << MAI.getAscizDirective(); + Data = Data.substr(0, Data.size()-1); + } else { + OS << MAI.getAsciiDirective(); + } + + OS << ' '; + PrintQuotedString(Data, OS); + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + const char *Directive = 0; + switch (Size) { + default: break; + case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; + case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; + case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 8: + Directive = MAI.getData64bitsDirective(AddrSpace); + // If the target doesn't support 64-bit data, emit as two 32-bit halves. + if (Directive) break; + int64_t IntValue; + if (!Value->EvaluateAsAbsolute(IntValue)) + report_fatal_error("Don't know how to emit this value."); + if (getContext().getAsmInfo().isLittleEndian()) { + EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); + EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); + } else { + EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); + EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); + } + return; + } + + assert(Directive && "Invalid size for machine code value!"); + OS << Directive << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { + assert(MAI.hasLEB128() && "Cannot print a .uleb"); + OS << ".uleb128 " << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { + assert(MAI.hasLEB128() && "Cannot print a .sleb"); + OS << ".sleb128 " << *Value; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { + assert(MAI.getGPRel32Directive() != 0); + OS << MAI.getGPRel32Directive() << *Value; + EmitEOL(); +} + + +/// EmitFill - Emit NumBytes bytes worth of the value specified by +/// FillValue. This implements directives such as '.space'. +void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, + unsigned AddrSpace) { + if (NumBytes == 0) return; + + if (AddrSpace == 0) + if (const char *ZeroDirective = MAI.getZeroDirective()) { + OS << ZeroDirective << NumBytes; + if (FillValue != 0) + OS << ',' << (int)FillValue; + EmitEOL(); + return; + } + + // Emit a byte at a time. + MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); +} + +void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { + // Some assemblers don't support non-power of two alignments, so we always + // emit alignments as a power of two if possible. + if (isPowerOf2_32(ByteAlignment)) { + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << MAI.getAlignDirective(); break; + // FIXME: use MAI for this! + case 2: OS << ".p2alignw "; break; + case 4: OS << ".p2alignl "; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + if (MAI.getAlignmentIsInBytes()) + OS << ByteAlignment; + else + OS << Log2_32(ByteAlignment); + + if (Value || MaxBytesToEmit) { + OS << ", 0x"; + OS.write_hex(truncateToSize(Value, ValueSize)); + + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + } + EmitEOL(); + return; + } + + // Non-power of two alignment. This is not widely supported by assemblers. + // FIXME: Parameterize this based on MAI. + switch (ValueSize) { + default: llvm_unreachable("Invalid size for machine code value!"); + case 1: OS << ".balign"; break; + case 2: OS << ".balignw"; break; + case 4: OS << ".balignl"; break; + case 8: llvm_unreachable("Unsupported alignment size!"); + } + + OS << ' ' << ByteAlignment; + OS << ", " << truncateToSize(Value, ValueSize); + if (MaxBytesToEmit) + OS << ", " << MaxBytesToEmit; + EmitEOL(); +} + +void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) {} + +void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value) {} + + +void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) { + assert(MAI.hasSingleParameterDotFile()); + OS << "\t.file\t"; + PrintQuotedString(Filename, OS); + EmitEOL(); +} + +// FIXME: should we inherit from MCAsmStreamer? +bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, + StringRef Filename){ + OS << "\t.file\t" << FileNo << ' '; + PrintQuotedString(Filename, OS); + EmitEOL(); + return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename); +} + +void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {} + +void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { + assert(getCurrentSection() && "Cannot emit contents before setting section!"); + + // Show the encoding in a comment if we have a code emitter. + if (Emitter) + AddEncodingComment(Inst); + + // Show the MCInst if enabled. + if (ShowInst) { + Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); + GetCommentOS() << "\n"; + } + + // If we have an AsmPrinter, use that to print, otherwise print the MCInst. + if (InstPrinter) + InstPrinter->printInst(&Inst, OS); + else + Inst.print(OS, &MAI); + EmitEOL(); +} + +/// EmitRawText - If this file is backed by an assembly streamer, this dumps +/// the specified string in the output .s file. This capability is +/// indicated by the hasRawTextSupport() predicate. +void PTXMCAsmStreamer::EmitRawText(StringRef String) { + if (!String.empty() && String.back() == '\n') + String = String.substr(0, String.size()-1); + OS << String; + EmitEOL(); +} + +void PTXMCAsmStreamer::Finish() {} + +namespace llvm { + MCStreamer *createPTXAsmStreamer(MCContext &Context, + formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, bool useCFI, + MCInstPrinter *IP, + MCCodeEmitter *CE, TargetAsmBackend *TAB, + bool ShowInst) { + return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, + IP, CE, ShowInst); + } +} diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp new file mode 100644 index 0000000..6fe9e6c --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -0,0 +1,92 @@ +//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an information extractor for PTX machine functions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-mf-info-extract" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "PTXMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +// NOTE: PTXMFInfoExtract must after register allocation! + +namespace llvm { + /// PTXMFInfoExtract - PTX specific code to extract of PTX machine + /// function information for PTXAsmPrinter + /// + class PTXMFInfoExtract : public MachineFunctionPass { + private: + static char ID; + + public: + PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX Machine Function Info Extractor"; + } + }; // class PTXMFInfoExtract +} // namespace llvm + +using namespace llvm; + +char PTXMFInfoExtract::ID = 0; + +bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); + + DEBUG(dbgs() + << "PTX::NoRegister == " << PTX::NoRegister << "\n" + << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); + + DEBUG(for (unsigned reg = PTX::NoRegister + 1; + reg < PTX::NUM_TARGET_REGS; ++reg) + if (MRI.isPhysRegUsed(reg)) + dbgs() << "Used Reg: " << reg << "\n";); + + // FIXME: This is a slow linear scanning + for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) + if (MRI.isPhysRegUsed(reg) && + !MFI->isRetReg(reg) && + (MFI->isKernel() || !MFI->isArgReg(reg))) + MFI->addLocalVarReg(reg); + + // Notify MachineFunctionInfo that I've done adding local var reg + MFI->doneAddLocalVar(); + + DEBUG(for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(); + i != e; ++i) + dbgs() << "Arg Reg: " << *i << "\n";); + + DEBUG(for (PTXMachineFunctionInfo::reg_iterator + i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); + i != e; ++i) + dbgs() << "Local Var Reg: " << *i << "\n";); + + return false; +} + +FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXMFInfoExtract(TM, OptLevel); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h new file mode 100644 index 0000000..9d65f5b --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -0,0 +1,91 @@ +//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares PTX-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_MACHINE_FUNCTION_INFO_H +#define PTX_MACHINE_FUNCTION_INFO_H + +#include "PTX.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { +/// PTXMachineFunctionInfo - This class is derived from MachineFunction and +/// contains private PTX target-specific information for each MachineFunction. +/// +class PTXMachineFunctionInfo : public MachineFunctionInfo { +private: + bool is_kernel; + std::vector<unsigned> reg_arg, reg_local_var; + std::vector<unsigned> reg_ret; + bool _isDoneAddArg; + +public: + PTXMachineFunctionInfo(MachineFunction &MF) + : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) { + reg_arg.reserve(8); + reg_local_var.reserve(32); + } + + void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; } + + void addArgReg(unsigned reg) { reg_arg.push_back(reg); } + void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } + void addRetReg(unsigned reg) { + if (!isRetReg(reg)) { + reg_ret.push_back(reg); + } + } + + void doneAddArg(void) { + _isDoneAddArg = true; + } + void doneAddLocalVar(void) {} + + bool isKernel() const { return is_kernel; } + + typedef std::vector<unsigned>::const_iterator reg_iterator; + typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; + typedef std::vector<unsigned>::const_iterator ret_iterator; + + bool argRegEmpty() const { return reg_arg.empty(); } + int getNumArg() const { return reg_arg.size(); } + reg_iterator argRegBegin() const { return reg_arg.begin(); } + reg_iterator argRegEnd() const { return reg_arg.end(); } + reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); } + reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); } + + bool localVarRegEmpty() const { return reg_local_var.empty(); } + reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } + reg_iterator localVarRegEnd() const { return reg_local_var.end(); } + + bool retRegEmpty() const { return reg_ret.empty(); } + int getNumRet() const { return reg_ret.size(); } + ret_iterator retRegBegin() const { return reg_ret.begin(); } + ret_iterator retRegEnd() const { return reg_ret.end(); } + + bool isArgReg(unsigned reg) const { + return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); + } + + bool isRetReg(unsigned reg) const { + return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end(); + } + + bool isLocalVarReg(unsigned reg) const { + return std::find(reg_local_var.begin(), reg_local_var.end(), reg) + != reg_local_var.end(); + } +}; // class PTXMachineFunctionInfo +} // namespace llvm + +#endif // PTX_MACHINE_FUNCTION_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp new file mode 100644 index 0000000..cb56ea9 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp @@ -0,0 +1,51 @@ +//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXRegisterInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define GET_REGINFO_TARGET_DESC +#include "PTXGenRegisterInfo.inc" + +using namespace llvm; + +PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &TII) + : PTXGenRegisterInfo() { +} + +void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS) const { + unsigned Index; + MachineInstr& MI = *II; + + Index = 0; + while (!MI.getOperand(Index).isFI()) { + ++Index; + assert(Index < MI.getNumOperands() && + "Instr does not have a FrameIndex operand!"); + } + + int FrameIndex = MI.getOperand(Index).getIndex(); + + DEBUG(dbgs() << "eliminateFrameIndex: " << MI); + DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); + DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); + + // This frame index is post stack slot re-use assignments + MI.getOperand(Index).ChangeToImmediate(FrameIndex); +} diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h new file mode 100644 index 0000000..0b63cb6 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h @@ -0,0 +1,65 @@ +//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the PTX implementation of the MRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_REGISTER_INFO_H +#define PTX_REGISTER_INFO_H + +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/BitVector.h" + +#define GET_REGINFO_HEADER +#include "PTXGenRegisterInfo.inc" + +namespace llvm { +class PTXTargetMachine; +class MachineFunction; + +struct PTXRegisterInfo : public PTXGenRegisterInfo { + PTXRegisterInfo(PTXTargetMachine &TM, + const TargetInstrInfo &TII); + + virtual const unsigned + *getCalleeSavedRegs(const MachineFunction *MF = 0) const { + static const unsigned CalleeSavedRegs[] = { 0 }; + return CalleeSavedRegs; // save nothing + } + + virtual BitVector getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + return Reserved; // reserve no regs + } + + virtual void eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, + RegScavenger *RS = NULL) const; + + virtual unsigned getFrameRegister(const MachineFunction &MF) const { + llvm_unreachable("PTX does not have a frame register"); + return 0; + } + + virtual unsigned getRARegister() const { + llvm_unreachable("PTX does not have a return address register"); + return 0; + } + + virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const { + return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); + } + virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const { + return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0); + } +}; // struct PTXRegisterInfo +} // namespace llvm + +#endif // PTX_REGISTER_INFO_H diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td new file mode 100644 index 0000000..1313d24 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td @@ -0,0 +1,555 @@ + +//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg<string n> : Register<n> { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +///===- Predicate Registers -----------------------------------------------===// + +def P0 : PTXReg<"p0">; +def P1 : PTXReg<"p1">; +def P2 : PTXReg<"p2">; +def P3 : PTXReg<"p3">; +def P4 : PTXReg<"p4">; +def P5 : PTXReg<"p5">; +def P6 : PTXReg<"p6">; +def P7 : PTXReg<"p7">; +def P8 : PTXReg<"p8">; +def P9 : PTXReg<"p9">; +def P10 : PTXReg<"p10">; +def P11 : PTXReg<"p11">; +def P12 : PTXReg<"p12">; +def P13 : PTXReg<"p13">; +def P14 : PTXReg<"p14">; +def P15 : PTXReg<"p15">; +def P16 : PTXReg<"p16">; +def P17 : PTXReg<"p17">; +def P18 : PTXReg<"p18">; +def P19 : PTXReg<"p19">; +def P20 : PTXReg<"p20">; +def P21 : PTXReg<"p21">; +def P22 : PTXReg<"p22">; +def P23 : PTXReg<"p23">; +def P24 : PTXReg<"p24">; +def P25 : PTXReg<"p25">; +def P26 : PTXReg<"p26">; +def P27 : PTXReg<"p27">; +def P28 : PTXReg<"p28">; +def P29 : PTXReg<"p29">; +def P30 : PTXReg<"p30">; +def P31 : PTXReg<"p31">; +def P32 : PTXReg<"p32">; +def P33 : PTXReg<"p33">; +def P34 : PTXReg<"p34">; +def P35 : PTXReg<"p35">; +def P36 : PTXReg<"p36">; +def P37 : PTXReg<"p37">; +def P38 : PTXReg<"p38">; +def P39 : PTXReg<"p39">; +def P40 : PTXReg<"p40">; +def P41 : PTXReg<"p41">; +def P42 : PTXReg<"p42">; +def P43 : PTXReg<"p43">; +def P44 : PTXReg<"p44">; +def P45 : PTXReg<"p45">; +def P46 : PTXReg<"p46">; +def P47 : PTXReg<"p47">; +def P48 : PTXReg<"p48">; +def P49 : PTXReg<"p49">; +def P50 : PTXReg<"p50">; +def P51 : PTXReg<"p51">; +def P52 : PTXReg<"p52">; +def P53 : PTXReg<"p53">; +def P54 : PTXReg<"p54">; +def P55 : PTXReg<"p55">; +def P56 : PTXReg<"p56">; +def P57 : PTXReg<"p57">; +def P58 : PTXReg<"p58">; +def P59 : PTXReg<"p59">; +def P60 : PTXReg<"p60">; +def P61 : PTXReg<"p61">; +def P62 : PTXReg<"p62">; +def P63 : PTXReg<"p63">; +def P64 : PTXReg<"p64">; +def P65 : PTXReg<"p65">; +def P66 : PTXReg<"p66">; +def P67 : PTXReg<"p67">; +def P68 : PTXReg<"p68">; +def P69 : PTXReg<"p69">; +def P70 : PTXReg<"p70">; +def P71 : PTXReg<"p71">; +def P72 : PTXReg<"p72">; +def P73 : PTXReg<"p73">; +def P74 : PTXReg<"p74">; +def P75 : PTXReg<"p75">; +def P76 : PTXReg<"p76">; +def P77 : PTXReg<"p77">; +def P78 : PTXReg<"p78">; +def P79 : PTXReg<"p79">; +def P80 : PTXReg<"p80">; +def P81 : PTXReg<"p81">; +def P82 : PTXReg<"p82">; +def P83 : PTXReg<"p83">; +def P84 : PTXReg<"p84">; +def P85 : PTXReg<"p85">; +def P86 : PTXReg<"p86">; +def P87 : PTXReg<"p87">; +def P88 : PTXReg<"p88">; +def P89 : PTXReg<"p89">; +def P90 : PTXReg<"p90">; +def P91 : PTXReg<"p91">; +def P92 : PTXReg<"p92">; +def P93 : PTXReg<"p93">; +def P94 : PTXReg<"p94">; +def P95 : PTXReg<"p95">; +def P96 : PTXReg<"p96">; +def P97 : PTXReg<"p97">; +def P98 : PTXReg<"p98">; +def P99 : PTXReg<"p99">; +def P100 : PTXReg<"p100">; +def P101 : PTXReg<"p101">; +def P102 : PTXReg<"p102">; +def P103 : PTXReg<"p103">; +def P104 : PTXReg<"p104">; +def P105 : PTXReg<"p105">; +def P106 : PTXReg<"p106">; +def P107 : PTXReg<"p107">; +def P108 : PTXReg<"p108">; +def P109 : PTXReg<"p109">; +def P110 : PTXReg<"p110">; +def P111 : PTXReg<"p111">; +def P112 : PTXReg<"p112">; +def P113 : PTXReg<"p113">; +def P114 : PTXReg<"p114">; +def P115 : PTXReg<"p115">; +def P116 : PTXReg<"p116">; +def P117 : PTXReg<"p117">; +def P118 : PTXReg<"p118">; +def P119 : PTXReg<"p119">; +def P120 : PTXReg<"p120">; +def P121 : PTXReg<"p121">; +def P122 : PTXReg<"p122">; +def P123 : PTXReg<"p123">; +def P124 : PTXReg<"p124">; +def P125 : PTXReg<"p125">; +def P126 : PTXReg<"p126">; +def P127 : PTXReg<"p127">; + +///===- 16-Bit Registers --------------------------------------------------===// + +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; +def RH10 : PTXReg<"rh10">; +def RH11 : PTXReg<"rh11">; +def RH12 : PTXReg<"rh12">; +def RH13 : PTXReg<"rh13">; +def RH14 : PTXReg<"rh14">; +def RH15 : PTXReg<"rh15">; +def RH16 : PTXReg<"rh16">; +def RH17 : PTXReg<"rh17">; +def RH18 : PTXReg<"rh18">; +def RH19 : PTXReg<"rh19">; +def RH20 : PTXReg<"rh20">; +def RH21 : PTXReg<"rh21">; +def RH22 : PTXReg<"rh22">; +def RH23 : PTXReg<"rh23">; +def RH24 : PTXReg<"rh24">; +def RH25 : PTXReg<"rh25">; +def RH26 : PTXReg<"rh26">; +def RH27 : PTXReg<"rh27">; +def RH28 : PTXReg<"rh28">; +def RH29 : PTXReg<"rh29">; +def RH30 : PTXReg<"rh30">; +def RH31 : PTXReg<"rh31">; +def RH32 : PTXReg<"rh32">; +def RH33 : PTXReg<"rh33">; +def RH34 : PTXReg<"rh34">; +def RH35 : PTXReg<"rh35">; +def RH36 : PTXReg<"rh36">; +def RH37 : PTXReg<"rh37">; +def RH38 : PTXReg<"rh38">; +def RH39 : PTXReg<"rh39">; +def RH40 : PTXReg<"rh40">; +def RH41 : PTXReg<"rh41">; +def RH42 : PTXReg<"rh42">; +def RH43 : PTXReg<"rh43">; +def RH44 : PTXReg<"rh44">; +def RH45 : PTXReg<"rh45">; +def RH46 : PTXReg<"rh46">; +def RH47 : PTXReg<"rh47">; +def RH48 : PTXReg<"rh48">; +def RH49 : PTXReg<"rh49">; +def RH50 : PTXReg<"rh50">; +def RH51 : PTXReg<"rh51">; +def RH52 : PTXReg<"rh52">; +def RH53 : PTXReg<"rh53">; +def RH54 : PTXReg<"rh54">; +def RH55 : PTXReg<"rh55">; +def RH56 : PTXReg<"rh56">; +def RH57 : PTXReg<"rh57">; +def RH58 : PTXReg<"rh58">; +def RH59 : PTXReg<"rh59">; +def RH60 : PTXReg<"rh60">; +def RH61 : PTXReg<"rh61">; +def RH62 : PTXReg<"rh62">; +def RH63 : PTXReg<"rh63">; +def RH64 : PTXReg<"rh64">; +def RH65 : PTXReg<"rh65">; +def RH66 : PTXReg<"rh66">; +def RH67 : PTXReg<"rh67">; +def RH68 : PTXReg<"rh68">; +def RH69 : PTXReg<"rh69">; +def RH70 : PTXReg<"rh70">; +def RH71 : PTXReg<"rh71">; +def RH72 : PTXReg<"rh72">; +def RH73 : PTXReg<"rh73">; +def RH74 : PTXReg<"rh74">; +def RH75 : PTXReg<"rh75">; +def RH76 : PTXReg<"rh76">; +def RH77 : PTXReg<"rh77">; +def RH78 : PTXReg<"rh78">; +def RH79 : PTXReg<"rh79">; +def RH80 : PTXReg<"rh80">; +def RH81 : PTXReg<"rh81">; +def RH82 : PTXReg<"rh82">; +def RH83 : PTXReg<"rh83">; +def RH84 : PTXReg<"rh84">; +def RH85 : PTXReg<"rh85">; +def RH86 : PTXReg<"rh86">; +def RH87 : PTXReg<"rh87">; +def RH88 : PTXReg<"rh88">; +def RH89 : PTXReg<"rh89">; +def RH90 : PTXReg<"rh90">; +def RH91 : PTXReg<"rh91">; +def RH92 : PTXReg<"rh92">; +def RH93 : PTXReg<"rh93">; +def RH94 : PTXReg<"rh94">; +def RH95 : PTXReg<"rh95">; +def RH96 : PTXReg<"rh96">; +def RH97 : PTXReg<"rh97">; +def RH98 : PTXReg<"rh98">; +def RH99 : PTXReg<"rh99">; +def RH100 : PTXReg<"rh100">; +def RH101 : PTXReg<"rh101">; +def RH102 : PTXReg<"rh102">; +def RH103 : PTXReg<"rh103">; +def RH104 : PTXReg<"rh104">; +def RH105 : PTXReg<"rh105">; +def RH106 : PTXReg<"rh106">; +def RH107 : PTXReg<"rh107">; +def RH108 : PTXReg<"rh108">; +def RH109 : PTXReg<"rh109">; +def RH110 : PTXReg<"rh110">; +def RH111 : PTXReg<"rh111">; +def RH112 : PTXReg<"rh112">; +def RH113 : PTXReg<"rh113">; +def RH114 : PTXReg<"rh114">; +def RH115 : PTXReg<"rh115">; +def RH116 : PTXReg<"rh116">; +def RH117 : PTXReg<"rh117">; +def RH118 : PTXReg<"rh118">; +def RH119 : PTXReg<"rh119">; +def RH120 : PTXReg<"rh120">; +def RH121 : PTXReg<"rh121">; +def RH122 : PTXReg<"rh122">; +def RH123 : PTXReg<"rh123">; +def RH124 : PTXReg<"rh124">; +def RH125 : PTXReg<"rh125">; +def RH126 : PTXReg<"rh126">; +def RH127 : PTXReg<"rh127">; + +///===- 32-Bit Registers --------------------------------------------------===// + +def R0 : PTXReg<"r0">; +def R1 : PTXReg<"r1">; +def R2 : PTXReg<"r2">; +def R3 : PTXReg<"r3">; +def R4 : PTXReg<"r4">; +def R5 : PTXReg<"r5">; +def R6 : PTXReg<"r6">; +def R7 : PTXReg<"r7">; +def R8 : PTXReg<"r8">; +def R9 : PTXReg<"r9">; +def R10 : PTXReg<"r10">; +def R11 : PTXReg<"r11">; +def R12 : PTXReg<"r12">; +def R13 : PTXReg<"r13">; +def R14 : PTXReg<"r14">; +def R15 : PTXReg<"r15">; +def R16 : PTXReg<"r16">; +def R17 : PTXReg<"r17">; +def R18 : PTXReg<"r18">; +def R19 : PTXReg<"r19">; +def R20 : PTXReg<"r20">; +def R21 : PTXReg<"r21">; +def R22 : PTXReg<"r22">; +def R23 : PTXReg<"r23">; +def R24 : PTXReg<"r24">; +def R25 : PTXReg<"r25">; +def R26 : PTXReg<"r26">; +def R27 : PTXReg<"r27">; +def R28 : PTXReg<"r28">; +def R29 : PTXReg<"r29">; +def R30 : PTXReg<"r30">; +def R31 : PTXReg<"r31">; +def R32 : PTXReg<"r32">; +def R33 : PTXReg<"r33">; +def R34 : PTXReg<"r34">; +def R35 : PTXReg<"r35">; +def R36 : PTXReg<"r36">; +def R37 : PTXReg<"r37">; +def R38 : PTXReg<"r38">; +def R39 : PTXReg<"r39">; +def R40 : PTXReg<"r40">; +def R41 : PTXReg<"r41">; +def R42 : PTXReg<"r42">; +def R43 : PTXReg<"r43">; +def R44 : PTXReg<"r44">; +def R45 : PTXReg<"r45">; +def R46 : PTXReg<"r46">; +def R47 : PTXReg<"r47">; +def R48 : PTXReg<"r48">; +def R49 : PTXReg<"r49">; +def R50 : PTXReg<"r50">; +def R51 : PTXReg<"r51">; +def R52 : PTXReg<"r52">; +def R53 : PTXReg<"r53">; +def R54 : PTXReg<"r54">; +def R55 : PTXReg<"r55">; +def R56 : PTXReg<"r56">; +def R57 : PTXReg<"r57">; +def R58 : PTXReg<"r58">; +def R59 : PTXReg<"r59">; +def R60 : PTXReg<"r60">; +def R61 : PTXReg<"r61">; +def R62 : PTXReg<"r62">; +def R63 : PTXReg<"r63">; +def R64 : PTXReg<"r64">; +def R65 : PTXReg<"r65">; +def R66 : PTXReg<"r66">; +def R67 : PTXReg<"r67">; +def R68 : PTXReg<"r68">; +def R69 : PTXReg<"r69">; +def R70 : PTXReg<"r70">; +def R71 : PTXReg<"r71">; +def R72 : PTXReg<"r72">; +def R73 : PTXReg<"r73">; +def R74 : PTXReg<"r74">; +def R75 : PTXReg<"r75">; +def R76 : PTXReg<"r76">; +def R77 : PTXReg<"r77">; +def R78 : PTXReg<"r78">; +def R79 : PTXReg<"r79">; +def R80 : PTXReg<"r80">; +def R81 : PTXReg<"r81">; +def R82 : PTXReg<"r82">; +def R83 : PTXReg<"r83">; +def R84 : PTXReg<"r84">; +def R85 : PTXReg<"r85">; +def R86 : PTXReg<"r86">; +def R87 : PTXReg<"r87">; +def R88 : PTXReg<"r88">; +def R89 : PTXReg<"r89">; +def R90 : PTXReg<"r90">; +def R91 : PTXReg<"r91">; +def R92 : PTXReg<"r92">; +def R93 : PTXReg<"r93">; +def R94 : PTXReg<"r94">; +def R95 : PTXReg<"r95">; +def R96 : PTXReg<"r96">; +def R97 : PTXReg<"r97">; +def R98 : PTXReg<"r98">; +def R99 : PTXReg<"r99">; +def R100 : PTXReg<"r100">; +def R101 : PTXReg<"r101">; +def R102 : PTXReg<"r102">; +def R103 : PTXReg<"r103">; +def R104 : PTXReg<"r104">; +def R105 : PTXReg<"r105">; +def R106 : PTXReg<"r106">; +def R107 : PTXReg<"r107">; +def R108 : PTXReg<"r108">; +def R109 : PTXReg<"r109">; +def R110 : PTXReg<"r110">; +def R111 : PTXReg<"r111">; +def R112 : PTXReg<"r112">; +def R113 : PTXReg<"r113">; +def R114 : PTXReg<"r114">; +def R115 : PTXReg<"r115">; +def R116 : PTXReg<"r116">; +def R117 : PTXReg<"r117">; +def R118 : PTXReg<"r118">; +def R119 : PTXReg<"r119">; +def R120 : PTXReg<"r120">; +def R121 : PTXReg<"r121">; +def R122 : PTXReg<"r122">; +def R123 : PTXReg<"r123">; +def R124 : PTXReg<"r124">; +def R125 : PTXReg<"r125">; +def R126 : PTXReg<"r126">; +def R127 : PTXReg<"r127">; + +///===- 64-Bit Registers --------------------------------------------------===// + +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; +def RD10 : PTXReg<"rd10">; +def RD11 : PTXReg<"rd11">; +def RD12 : PTXReg<"rd12">; +def RD13 : PTXReg<"rd13">; +def RD14 : PTXReg<"rd14">; +def RD15 : PTXReg<"rd15">; +def RD16 : PTXReg<"rd16">; +def RD17 : PTXReg<"rd17">; +def RD18 : PTXReg<"rd18">; +def RD19 : PTXReg<"rd19">; +def RD20 : PTXReg<"rd20">; +def RD21 : PTXReg<"rd21">; +def RD22 : PTXReg<"rd22">; +def RD23 : PTXReg<"rd23">; +def RD24 : PTXReg<"rd24">; +def RD25 : PTXReg<"rd25">; +def RD26 : PTXReg<"rd26">; +def RD27 : PTXReg<"rd27">; +def RD28 : PTXReg<"rd28">; +def RD29 : PTXReg<"rd29">; +def RD30 : PTXReg<"rd30">; +def RD31 : PTXReg<"rd31">; +def RD32 : PTXReg<"rd32">; +def RD33 : PTXReg<"rd33">; +def RD34 : PTXReg<"rd34">; +def RD35 : PTXReg<"rd35">; +def RD36 : PTXReg<"rd36">; +def RD37 : PTXReg<"rd37">; +def RD38 : PTXReg<"rd38">; +def RD39 : PTXReg<"rd39">; +def RD40 : PTXReg<"rd40">; +def RD41 : PTXReg<"rd41">; +def RD42 : PTXReg<"rd42">; +def RD43 : PTXReg<"rd43">; +def RD44 : PTXReg<"rd44">; +def RD45 : PTXReg<"rd45">; +def RD46 : PTXReg<"rd46">; +def RD47 : PTXReg<"rd47">; +def RD48 : PTXReg<"rd48">; +def RD49 : PTXReg<"rd49">; +def RD50 : PTXReg<"rd50">; +def RD51 : PTXReg<"rd51">; +def RD52 : PTXReg<"rd52">; +def RD53 : PTXReg<"rd53">; +def RD54 : PTXReg<"rd54">; +def RD55 : PTXReg<"rd55">; +def RD56 : PTXReg<"rd56">; +def RD57 : PTXReg<"rd57">; +def RD58 : PTXReg<"rd58">; +def RD59 : PTXReg<"rd59">; +def RD60 : PTXReg<"rd60">; +def RD61 : PTXReg<"rd61">; +def RD62 : PTXReg<"rd62">; +def RD63 : PTXReg<"rd63">; +def RD64 : PTXReg<"rd64">; +def RD65 : PTXReg<"rd65">; +def RD66 : PTXReg<"rd66">; +def RD67 : PTXReg<"rd67">; +def RD68 : PTXReg<"rd68">; +def RD69 : PTXReg<"rd69">; +def RD70 : PTXReg<"rd70">; +def RD71 : PTXReg<"rd71">; +def RD72 : PTXReg<"rd72">; +def RD73 : PTXReg<"rd73">; +def RD74 : PTXReg<"rd74">; +def RD75 : PTXReg<"rd75">; +def RD76 : PTXReg<"rd76">; +def RD77 : PTXReg<"rd77">; +def RD78 : PTXReg<"rd78">; +def RD79 : PTXReg<"rd79">; +def RD80 : PTXReg<"rd80">; +def RD81 : PTXReg<"rd81">; +def RD82 : PTXReg<"rd82">; +def RD83 : PTXReg<"rd83">; +def RD84 : PTXReg<"rd84">; +def RD85 : PTXReg<"rd85">; +def RD86 : PTXReg<"rd86">; +def RD87 : PTXReg<"rd87">; +def RD88 : PTXReg<"rd88">; +def RD89 : PTXReg<"rd89">; +def RD90 : PTXReg<"rd90">; +def RD91 : PTXReg<"rd91">; +def RD92 : PTXReg<"rd92">; +def RD93 : PTXReg<"rd93">; +def RD94 : PTXReg<"rd94">; +def RD95 : PTXReg<"rd95">; +def RD96 : PTXReg<"rd96">; +def RD97 : PTXReg<"rd97">; +def RD98 : PTXReg<"rd98">; +def RD99 : PTXReg<"rd99">; +def RD100 : PTXReg<"rd100">; +def RD101 : PTXReg<"rd101">; +def RD102 : PTXReg<"rd102">; +def RD103 : PTXReg<"rd103">; +def RD104 : PTXReg<"rd104">; +def RD105 : PTXReg<"rd105">; +def RD106 : PTXReg<"rd106">; +def RD107 : PTXReg<"rd107">; +def RD108 : PTXReg<"rd108">; +def RD109 : PTXReg<"rd109">; +def RD110 : PTXReg<"rd110">; +def RD111 : PTXReg<"rd111">; +def RD112 : PTXReg<"rd112">; +def RD113 : PTXReg<"rd113">; +def RD114 : PTXReg<"rd114">; +def RD115 : PTXReg<"rd115">; +def RD116 : PTXReg<"rd116">; +def RD117 : PTXReg<"rd117">; +def RD118 : PTXReg<"rd118">; +def RD119 : PTXReg<"rd119">; +def RD120 : PTXReg<"rd120">; +def RD121 : PTXReg<"rd121">; +def RD122 : PTXReg<"rd122">; +def RD123 : PTXReg<"rd123">; +def RD124 : PTXReg<"rd124">; +def RD125 : PTXReg<"rd125">; +def RD126 : PTXReg<"rd126">; +def RD127 : PTXReg<"rd127">; + +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp new file mode 100644 index 0000000..8ec646e --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp @@ -0,0 +1,66 @@ +//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTX specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "PTXSubtarget.h" +#include "PTX.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "PTXGenSubtargetInfo.inc" + +using namespace llvm; + +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit) + : PTXGenSubtargetInfo(TT, CPU, FS), + PTXTarget(PTX_COMPUTE_1_0), + PTXVersion(PTX_VERSION_2_0), + SupportsDouble(false), + SupportsFMA(true), + Is64Bit(is64Bit) { + std::string TARGET = CPU; + if (TARGET.empty()) + TARGET = "generic"; + ParseSubtargetFeatures(TARGET, FS); +} + +std::string PTXSubtarget::getTargetString() const { + switch(PTXTarget) { + default: llvm_unreachable("Unknown PTX target"); + case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_1: return "sm_11"; + case PTX_SM_1_2: return "sm_12"; + case PTX_SM_1_3: return "sm_13"; + case PTX_SM_2_0: return "sm_20"; + case PTX_SM_2_1: return "sm_21"; + case PTX_SM_2_2: return "sm_22"; + case PTX_SM_2_3: return "sm_23"; + case PTX_COMPUTE_1_0: return "compute_10"; + case PTX_COMPUTE_1_1: return "compute_11"; + case PTX_COMPUTE_1_2: return "compute_12"; + case PTX_COMPUTE_1_3: return "compute_13"; + case PTX_COMPUTE_2_0: return "compute_20"; + } +} + +std::string PTXSubtarget::getPTXVersionString() const { + switch(PTXVersion) { + default: llvm_unreachable("Unknown PTX version"); + case PTX_VERSION_2_0: return "2.0"; + case PTX_VERSION_2_1: return "2.1"; + case PTX_VERSION_2_2: return "2.2"; + case PTX_VERSION_2_3: return "2.3"; + } +} diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h new file mode 100644 index 0000000..0921f1f --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h @@ -0,0 +1,121 @@ +//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PTX specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_SUBTARGET_H +#define PTX_SUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "PTXGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + + class PTXSubtarget : public PTXGenSubtargetInfo { + public: + + /** + * Enumeration of Shader Models supported by the back-end. + */ + enum PTXTargetEnum { + PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */ + PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */ + PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */ + PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */ + PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */ + PTX_LAST_COMPUTE, + + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_1, /*< Shader Model 1.1 */ + PTX_SM_1_2, /*< Shader Model 1.2 */ + PTX_SM_1_3, /*< Shader Model 1.3 */ + PTX_SM_2_0, /*< Shader Model 2.0 */ + PTX_SM_2_1, /*< Shader Model 2.1 */ + PTX_SM_2_2, /*< Shader Model 2.2 */ + PTX_SM_2_3, /*< Shader Model 2.3 */ + PTX_LAST_SM + }; + + /** + * Enumeration of PTX versions supported by the back-end. + * + * Currently, PTX 2.0 is the minimum supported version. + */ + enum PTXVersionEnum { + PTX_VERSION_2_0, /*< PTX Version 2.0 */ + PTX_VERSION_2_1, /*< PTX Version 2.1 */ + PTX_VERSION_2_2, /*< PTX Version 2.2 */ + PTX_VERSION_2_3 /*< PTX Version 2.3 */ + }; + + private: + + /// Shader Model supported on the target GPU. + PTXTargetEnum PTXTarget; + + /// PTX Language Version. + PTXVersionEnum PTXVersion; + + // The native .f64 type is supported on the hardware. + bool SupportsDouble; + + // Support the fused-multiply add (FMA) and multiply-add (MAD) + // instructions + bool SupportsFMA; + + // Use .u64 instead of .u32 for addresses. + bool Is64Bit; + + public: + + PTXSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool is64Bit); + + // Target architecture accessors + std::string getTargetString() const; + + std::string getPTXVersionString() const; + + bool supportsDouble() const { return SupportsDouble; } + + bool is64Bit() const { return Is64Bit; } + + bool supportsFMA() const { return SupportsFMA; } + + bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } + + bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } + + bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; } + + bool fdivNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool fmadNeedsRoundingMode() const { + return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool useParamSpaceForDeviceArgs() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + }; // class PTXSubtarget +} // namespace llvm + +#endif // PTX_SUBTARGET_H diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp new file mode 100644 index 0000000..ab926e0 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp @@ -0,0 +1,87 @@ +//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the PTX target. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/PassManager.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace llvm { + MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, + bool isVerboseAsm, bool useLoc, + bool useCFI, + MCInstPrinter *InstPrint, + MCCodeEmitter *CE, + TargetAsmBackend *TAB, + bool ShowInst); +} + +extern "C" void LLVMInitializePTXTarget() { + + RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target); + RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target); + + TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); + TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); +} + +namespace { + const char* DataLayout32 = + "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + const char* DataLayout64 = + "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; +} + +// DataLayout and FrameLowering are filled with dummy data +PTXTargetMachine::PTXTargetMachine(const Target &T, + const std::string &TT, + const std::string &CPU, + const std::string &FS, + bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS), + DataLayout(is64Bit ? DataLayout64 : DataLayout32), + Subtarget(TT, CPU, FS, is64Bit), + FrameLowering(Subtarget), + InstrInfo(*this), + TLInfo(*this) { +} + +PTX32TargetMachine::PTX32TargetMachine(const Target &T, + const std::string& TT, + const std::string& CPU, + const std::string& FS) + : PTXTargetMachine(T, TT, CPU, FS, false) { +} + +PTX64TargetMachine::PTX64TargetMachine(const Target &T, + const std::string& TT, + const std::string& CPU, + const std::string& FS) + : PTXTargetMachine(T, TT, CPU, FS, true) { +} + +bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + PM.add(createPTXISelDag(*this, OptLevel)); + return false; +} + +bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + // PTXMFInfoExtract must after register allocation! + PM.add(createPTXMFInfoExtract(*this, OptLevel)); + return false; +} diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h new file mode 100644 index 0000000..ae42153 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h @@ -0,0 +1,77 @@ +//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the PTX specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_TARGET_MACHINE_H +#define PTX_TARGET_MACHINE_H + +#include "PTXISelLowering.h" +#include "PTXInstrInfo.h" +#include "PTXFrameLowering.h" +#include "PTXSubtarget.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class PTXTargetMachine : public LLVMTargetMachine { + private: + const TargetData DataLayout; + PTXSubtarget Subtarget; // has to be initialized before FrameLowering + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; + PTXTargetLowering TLInfo; + + public: + PTXTargetMachine(const Target &T, const std::string &TT, + const std::string &CPU, const std::string &FS, + bool is64Bit); + + virtual const TargetData *getTargetData() const { return &DataLayout; } + + virtual const TargetFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + + virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); } + + virtual const PTXTargetLowering *getTargetLowering() const { + return &TLInfo; } + + virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } + + virtual bool addInstSelector(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); + virtual bool addPostRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel); +}; // class PTXTargetMachine + + +class PTX32TargetMachine : public PTXTargetMachine { +public: + + PTX32TargetMachine(const Target &T, const std::string &TT, + const std::string& CPU, const std::string& FS); +}; // class PTX32TargetMachine + +class PTX64TargetMachine : public PTXTargetMachine { +public: + + PTX64TargetMachine(const Target &T, const std::string &TT, + const std::string& CPU, const std::string& FS); +}; // class PTX32TargetMachine + +} // namespace llvm + +#endif // PTX_TARGET_MACHINE_H diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt new file mode 100644 index 0000000..4b09cf5 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMPTXInfo + PTXTargetInfo.cpp + ) + +add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile new file mode 100644 index 0000000..8619785 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile @@ -0,0 +1,15 @@ +##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LEVEL = ../../../.. +LIBRARYNAME = LLVMPTXInfo + +# Hack: we need to include 'main' target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp new file mode 100644 index 0000000..9df6c75 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -0,0 +1,25 @@ +//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetRegistry.h" + +using namespace llvm; + +Target llvm::ThePTX32Target; +Target llvm::ThePTX64Target; + +extern "C" void LLVMInitializePTXTargetInfo() { + // see llvm/ADT/Triple.h + RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32", + "PTX (32-bit) [Experimental]"); + RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64", + "PTX (64-bit) [Experimental]"); +} diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py new file mode 100755 index 0000000..1528690 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/generate-register-td.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +##===- generate-register-td.py --------------------------------*-python-*--===## +## +## The LLVM Compiler Infrastructure +## +## This file is distributed under the University of Illinois Open Source +## License. See LICENSE.TXT for details. +## +##===----------------------------------------------------------------------===## +## +## This file describes the PTX register file generator. +## +##===----------------------------------------------------------------------===## + +from sys import argv, exit, stdout + + +if len(argv) != 5: + print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>') + exit(1) + +try: + num_pred = int(argv[1]) + num_16bit = int(argv[2]) + num_32bit = int(argv[3]) + num_64bit = int(argv[4]) +except: + print('ERROR: Invalid integer parameter') + exit(1) + +## Print the register definition file +td_file = open('PTXRegisterInfo.td', 'w') + +td_file.write(''' +//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Declarations that describe the PTX register file +//===----------------------------------------------------------------------===// + +class PTXReg<string n> : Register<n> { + let Namespace = "PTX"; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// +''') + + +# Print predicate registers +td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') +for r in range(0, num_pred): + td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) + +# Print 16-bit registers +td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_16bit): + td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) + +# Print 32-bit registers +td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_32bit): + td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) + +# Print 64-bit registers +td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') +for r in range(0, num_64bit): + td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) + + +td_file.write(''' +//===----------------------------------------------------------------------===// +// Register classes +//===----------------------------------------------------------------------===// +''') + + +# Print register classes + +td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) +td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) +td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) +td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) +td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) + + +td_file.close() + +## Now write the PTXCallingConv.td file +td_file = open('PTXCallingConv.td', 'w') + +# Reserve 10% of the available registers for return values, and the other 90% +# for parameters +num_ret_pred = int(0.1 * num_pred) +num_ret_16bit = int(0.1 * num_16bit) +num_ret_32bit = int(0.1 * num_32bit) +num_ret_64bit = int(0.1 * num_64bit) +num_param_pred = num_pred - num_ret_pred +num_param_16bit = num_16bit - num_ret_16bit +num_param_32bit = num_32bit - num_ret_32bit +num_param_64bit = num_64bit - num_ret_64bit + +param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] +ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] +param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] +ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] +param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] +ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] +param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] +ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] + +param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) +ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) +param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) +ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) +param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) +ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) +param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) +ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) + +td_file.write(''' +//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the PTX architecture. +// +//===----------------------------------------------------------------------===// + +// PTX Formal Parameter Calling Convention +def CC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; + +// PTX Return Value Calling Convention +def RetCC_PTX : CallingConv<[ + CCIfType<[i1], CCAssignToReg<[%s]>>, + CCIfType<[i16], CCAssignToReg<[%s]>>, + CCIfType<[i32,f32], CCAssignToReg<[%s]>>, + CCIfType<[i64,f64], CCAssignToReg<[%s]>> +]>; +''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, + ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) + + +td_file.close() |