diff options
Diffstat (limited to 'contrib/llvm/lib/Target/PTX')
41 files changed, 3167 insertions, 2069 deletions
diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt deleted file mode 100644 index 331266d..0000000 --- a/contrib/llvm/lib/Target/PTX/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PTX.td) - -tablegen(PTXGenAsmWriter.inc -gen-asm-writer) -tablegen(PTXGenDAGISel.inc -gen-dag-isel) -tablegen(PTXGenInstrInfo.inc -gen-instr-desc) -tablegen(PTXGenInstrNames.inc -gen-instr-enums) -tablegen(PTXGenRegisterInfo.inc -gen-register-desc) -tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header) -tablegen(PTXGenRegisterNames.inc -gen-register-enums) -tablegen(PTXGenSubtarget.inc -gen-subtarget) - -add_llvm_target(PTXCodeGen - PTXAsmPrinter.cpp - PTXISelDAGToDAG.cpp - PTXISelLowering.cpp - PTXInstrInfo.cpp - PTXFrameLowering.cpp - PTXMCAsmInfo.cpp - PTXMCAsmStreamer.cpp - PTXMFInfoExtract.cpp - PTXRegisterInfo.cpp - PTXSubtarget.cpp - PTXTargetMachine.cpp - ) - -add_subdirectory(TargetInfo) diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp new file mode 100644 index 0000000..aabb404 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -0,0 +1,192 @@ +//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints a PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "PTXInstPrinter.h" +#include "MCTargetDesc/PTXBaseInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define GET_INSTRUCTION_NAME +#include "PTXGenAsmWriter.inc" + +PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) : + MCInstPrinter(MAI) { + // Initialize the set of available features. + setAvailableFeatures(STI.getFeatureBits()); +} + +StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const { + return getInstructionName(Opcode); +} + +void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + OS << getRegisterName(RegNo); +} + +void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + printPredicate(MI, O); + switch (MI->getOpcode()) { + default: + printInstruction(MI, O); + break; + case PTX::CALL: + printCall(MI, O); + } + O << ";"; + printAnnotation(O, Annot); +} + +void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) { + // The last two operands are the predicate operands + int RegIndex; + int OpIndex; + + if (MI->getOpcode() == PTX::CALL) { + RegIndex = 0; + OpIndex = 1; + } else { + RegIndex = MI->getNumOperands()-2; + OpIndex = MI->getNumOperands()-1; + } + + int PredOp = MI->getOperand(OpIndex).getImm(); + if (PredOp == PTXPredicate::None) + return; + + if (PredOp == PTXPredicate::Negate) + O << '!'; + else + O << '@'; + + printOperand(MI, RegIndex, O); +} + +void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { + O << "\tcall.uni\t"; + // The first two operands are the predicate slot + unsigned Index = 2; + unsigned NumRets = MI->getOperand(Index++).getImm(); + + if (NumRets > 0) { + O << "("; + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumRets; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + O << "), "; + } + + O << *(MI->getOperand(Index++).getExpr()) << ", ("; + + unsigned NumArgs = MI->getOperand(Index++).getImm(); + if (NumArgs > 0) { + printOperand(MI, Index++, O); + for (unsigned i = 1; i < NumArgs; ++i) { + O << ", "; + printOperand(MI, Index++, O); + } + } + O << ")"; +} + +void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << Op.getImm(); + } else if (Op.isFPImm()) { + double Imm = Op.getFPImm(); + APFloat FPImm(Imm); + APInt FPIntImm = FPImm.bitcastToAPInt(); + O << "0D"; + // PTX requires us to output the full 64 bits, even if the number is zero + if (FPIntImm.getZExtValue() > 0) { + O << FPIntImm.toString(16, false); + } else { + O << "0000000000000000"; + } + } else { + assert(Op.isExpr() && "unknown operand kind in printOperand"); + const MCExpr *Expr = Op.getExpr(); + if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) { + const MCSymbol &Sym = SymRefExpr->getSymbol(); + O << Sym.getName(); + } else { + O << *Op.getExpr(); + } + } +} + +void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + // By definition, operand OpNo+1 is an i32imm + const MCOperand &Op2 = MI->getOperand(OpNo+1); + printOperand(MI, OpNo, O); + if (Op2.getImm() == 0) + return; // don't print "+0" + O << "+" << Op2.getImm(); +} + +void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert (Op.isImm() && "Rounding modes must be immediate values"); + switch (Op.getImm()) { + default: + llvm_unreachable("Unknown rounding mode!"); + case PTXRoundingMode::RndDefault: + llvm_unreachable("FP rounding-mode pass did not handle instruction!"); + break; + case PTXRoundingMode::RndNone: + // Do not print anything. + break; + case PTXRoundingMode::RndNearestEven: + O << ".rn"; + break; + case PTXRoundingMode::RndTowardsZero: + O << ".rz"; + break; + case PTXRoundingMode::RndNegInf: + O << ".rm"; + break; + case PTXRoundingMode::RndPosInf: + O << ".rp"; + break; + case PTXRoundingMode::RndApprox: + O << ".approx"; + break; + case PTXRoundingMode::RndNearestEvenInt: + O << ".rni"; + break; + case PTXRoundingMode::RndTowardsZeroInt: + O << ".rzi"; + break; + case PTXRoundingMode::RndNegInfInt: + O << ".rmi"; + break; + case PTXRoundingMode::RndPosInfInt: + O << ".rpi"; + break; + } +} + diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h new file mode 100644 index 0000000..86dfd48 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h @@ -0,0 +1,47 @@ +//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints n PTX MCInst to a .ptx file. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXINSTPRINTER_H +#define PTXINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCSubtargetInfo.h" + +namespace llvm { + +class MCOperand; + +class PTXInstPrinter : public MCInstPrinter { +public: + PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI); + + virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + virtual StringRef getOpcodeName(unsigned Opcode) const; + virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; + + static const char *getInstructionName(unsigned Opcode); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printPredicate(const MCInst *MI, raw_ostream &O); + void printCall(const MCInst *MI, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index df0f63f..0000000 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_llvm_library(LLVMPTXDesc - PTXMCTargetDesc.cpp - PTXMCAsmInfo.cpp - ) diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile deleted file mode 100644 index 35f5a7b..0000000 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h new file mode 100644 index 0000000..c6094be --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h @@ -0,0 +1,63 @@ +//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the PTX target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXBASEINFO_H +#define PTXBASEINFO_H + +#include "PTXMCTargetDesc.h" + +namespace llvm { + namespace PTXStateSpace { + enum { + Global = 0, // default to global state space + Constant = 1, + Local = 2, + Parameter = 3, + Shared = 4 + }; + } // namespace PTXStateSpace + + namespace PTXPredicate { + enum { + Normal = 0, + Negate = 1, + None = 2 + }; + } // namespace PTXPredicate + + /// Namespace to hold all target-specific flags. + namespace PTXRoundingMode { + // Instruction Flags + enum { + // Rounding Mode Flags + RndMask = 15, + RndDefault = 0, // --- + RndNone = 1, // <NONE> + RndNearestEven = 2, // .rn + RndTowardsZero = 3, // .rz + RndNegInf = 4, // .rm + RndPosInf = 5, // .rp + RndApprox = 6, // .approx + RndNearestEvenInt = 7, // .rni + RndTowardsZeroInt = 8, // .rzi + RndNegInfInt = 9, // .rmi + RndPosInfInt = 10 // .rpi + }; + } // namespace PTXII +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp index 23f70bd..a5af3b8 100644 --- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp @@ -13,10 +13,12 @@ #include "PTXMCTargetDesc.h" #include "PTXMCAsmInfo.h" +#include "InstPrinter/PTXInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_INSTRINFO_MC_DESC #include "PTXGenInstrInfo.inc" @@ -35,9 +37,11 @@ static MCInstrInfo *createPTXMCInstrInfo() { return X; } -extern "C" void LLVMInitializePTXMCInstrInfo() { - TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); - TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); +static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + // PTX does not have a return address register. + InitPTXMCRegisterInfo(X, 0); + return X; } static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, @@ -47,14 +51,45 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU, return X; } -extern "C" void LLVMInitializePTXMCSubtargetInfo() { +static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM); + return X; +} + +static MCInstPrinter *createPTXMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCSubtargetInfo &STI) { + assert(SyntaxVariant == 0 && "We only have one syntax variant"); + return new PTXInstPrinter(MAI, STI); +} + +extern "C" void LLVMInitializePTXTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); + RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo); + + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target, createPTXMCSubtargetInfo); TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target, createPTXMCSubtargetInfo); -} -extern "C" void LLVMInitializePTXMCAsmInfo() { - RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target); - RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target); + // Register the MCInstPrinter. + TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter); } diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h index 28cab24..7d46cce 100644 --- a/contrib/llvm/lib/Target/PTX/PTX.h +++ b/contrib/llvm/lib/Target/PTX/PTX.h @@ -15,34 +15,30 @@ #ifndef PTX_H #define PTX_H -#include "MCTargetDesc/PTXMCTargetDesc.h" +#include "MCTargetDesc/PTXBaseInfo.h" #include "llvm/Target/TargetMachine.h" namespace llvm { + class MachineInstr; + class MCInst; + class PTXAsmPrinter; class PTXTargetMachine; class FunctionPass; - namespace PTX { - enum StateSpace { - GLOBAL = 0, // default to global state space - CONSTANT = 1, - LOCAL = 2, - PARAMETER = 3, - SHARED = 4 - }; - - enum Predicate { - PRED_NORMAL = 0, - PRED_NEGATE = 1 - }; - } // namespace PTX - FunctionPass *createPTXISelDag(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); + FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel); + + FunctionPass *createPTXRegisterAllocator(); + + void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP); + } // namespace llvm; #endif // PTX_H diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td index f6fbe9f..693bb9c 100644 --- a/contrib/llvm/lib/Target/PTX/PTX.td +++ b/contrib/llvm/lib/Target/PTX/PTX.td @@ -52,13 +52,13 @@ def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2", def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3", "Use Shader Model 1.3">; def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0", - "Use Shader Model 2.0">; + "Use Shader Model 2.0", [FeatureDouble]>; def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1", - "Use Shader Model 2.1">; + "Use Shader Model 2.1", [FeatureDouble]>; def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2", - "Use Shader Model 2.2">; + "Use Shader Model 2.2", [FeatureDouble]>; def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3", - "Use Shader Model 2.3">; + "Use Shader Model 2.3", [FeatureDouble]>; def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget", "PTX_COMPUTE_1_0", @@ -74,7 +74,8 @@ def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget", "Use Compute Compatibility 1.3">; def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget", "PTX_COMPUTE_2_0", - "Use Compute Compatibility 2.0">; + "Use Compute Compatibility 2.0", + [FeatureDouble]>; //===----------------------------------------------------------------------===// // PTX supported processors @@ -113,12 +114,6 @@ def : Proc<"fermi", [FeatureSM20, FeatureDouble]>; include "PTXRegisterInfo.td" //===----------------------------------------------------------------------===// -// Calling Conventions -//===----------------------------------------------------------------------===// - -include "PTXCallingConv.td" - -//===----------------------------------------------------------------------===// // Instruction Descriptions //===----------------------------------------------------------------------===// @@ -127,9 +122,20 @@ include "PTXInstrInfo.td" def PTXInstrInfo : InstrInfo; //===----------------------------------------------------------------------===// +// Assembly printer +//===----------------------------------------------------------------------===// +// PTX uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def PTXAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + bit isMCAsmWriter = 1; +} + +//===----------------------------------------------------------------------===// // Target Declaration //===----------------------------------------------------------------------===// def PTX : Target { let InstructionSet = PTXInstrInfo; + let AssemblyWriters = [PTXAsmWriter]; } diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp index 2848d54..733744b 100644 --- a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp @@ -15,9 +15,14 @@ #define DEBUG_TYPE "ptx-asm-printer" #include "PTX.h" +#include "PTXAsmPrinter.h" #include "PTXMachineFunctionInfo.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" #include "PTXTargetMachine.h" +#include "llvm/Argument.h" #include "llvm/DerivedTypes.h" +#include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -28,69 +33,32 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace { -class PTXAsmPrinter : public AsmPrinter { -public: - explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - const char *getPassName() const { return "PTX Assembly Printer"; } - - bool doFinalization(Module &M); - - virtual void EmitStartOfAsmFile(Module &M); - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); } - - virtual void EmitInstruction(const MachineInstr *MI); - - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); - void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); - - unsigned GetOrCreateSourceID(StringRef FileName, - StringRef DirName); - - // autogen'd. - void printInstruction(const MachineInstr *MI, raw_ostream &OS); - static const char *getRegisterName(unsigned RegNo); - -private: - void EmitVariableDeclaration(const GlobalVariable *gv); - void EmitFunctionDeclaration(); - - StringMap<unsigned> SourceIdMap; -}; // class PTXAsmPrinter -} // namespace - static const char PARAM_PREFIX[] = "__param_"; static const char RETURN_PREFIX[] = "__ret_"; -static const char *getRegisterTypeName(unsigned RegNo) { -#define TEST_REGCLS(cls, clsstr) \ - if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; +static const char *getRegisterTypeName(unsigned RegNo, + const MachineRegisterInfo& MRI) { + const TargetRegisterClass *TRC = MRI.getRegClass(RegNo); + +#define TEST_REGCLS(cls, clsstr) \ + if (PTX::cls ## RegisterClass == TRC) return # clsstr; + TEST_REGCLS(RegPred, pred); TEST_REGCLS(RegI16, b16); TEST_REGCLS(RegI32, b32); @@ -106,16 +74,16 @@ static const char *getRegisterTypeName(unsigned RegNo) { static const char *getStateSpaceName(unsigned addressSpace) { switch (addressSpace) { default: llvm_unreachable("Unknown state space"); - case PTX::GLOBAL: return "global"; - case PTX::CONSTANT: return "const"; - case PTX::LOCAL: return "local"; - case PTX::PARAMETER: return "param"; - case PTX::SHARED: return "shared"; + case PTXStateSpace::Global: return "global"; + case PTXStateSpace::Constant: return "const"; + case PTXStateSpace::Local: return "local"; + case PTXStateSpace::Parameter: return "param"; + case PTXStateSpace::Shared: return "shared"; } return NULL; } -static const char *getTypeName(const Type* type) { +static const char *getTypeName(Type* type) { while (true) { switch (type->getTypeID()) { default: llvm_unreachable("Unknown type"); @@ -130,7 +98,7 @@ static const char *getTypeName(const Type* type) { } case Type::ArrayTyID: case Type::PointerTyID: - type = dyn_cast<const SequentialType>(type)->getElementType(); + type = dyn_cast<SequentialType>(type)->getElementType(); break; } } @@ -170,6 +138,7 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) { const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + // Emit the PTX .version and .target attributes OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + (ST.supportsDouble() ? "" @@ -203,177 +172,118 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) EmitVariableDeclaration(i); } -bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) { - SetupMachineFunction(MF); - EmitFunctionDeclaration(); - EmitFunctionBody(); - return false; -} - void PTXAsmPrinter::EmitFunctionBodyStart() { OutStreamer.EmitRawText(Twine("{")); const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); + + // Print register definitions + std::string regDefs; + unsigned numRegs; + + // pred + numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .pred %p<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i16 + numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b16 %rh<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i32 + numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b32 %r<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // i64 + numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .b64 %rd<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // f32 + numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .f32 %f<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } + + // f64 + numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass); + if(numRegs > 0) { + regDefs += "\t.reg .f64 %fd<"; + regDefs += utostr(numRegs); + regDefs += ">;\n"; + } - // Print local variable definition - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) { - unsigned reg = *i; - - std::string def = "\t.reg ."; - def += getRegisterTypeName(reg); - def += ' '; - def += getRegisterName(reg); - def += ';'; - OutStreamer.EmitRawText(Twine(def)); + // Local params + for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end(); + i != e; ++i) { + regDefs += "\t.param .b"; + regDefs += utostr(PM.getParamSize(*i)); + regDefs += " "; + regDefs += PM.getParamName(*i); + regDefs += ";\n"; } + OutStreamer.EmitRawText(Twine(regDefs)); + + const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects() << " frame object(s)\n"); for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); if (FrameInfo->getObjectSize(i) > 0) { - std::string def = "\t.reg .b"; - def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits - def += " s"; + std::string def = "\t.local .align "; + def += utostr(FrameInfo->getObjectAlignment(i)); + def += " .b8"; + def += " __local"; def += utostr(i); + def += "["; + def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits + def += "]"; def += ";"; OutStreamer.EmitRawText(Twine(def)); } } -} - -void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { - std::string str; - str.reserve(64); - - raw_string_ostream OS(str); - - DebugLoc DL = MI->getDebugLoc(); - if (!DL.isUnknown()) { - - const MDNode *S = DL.getScope(MF->getFunction()->getContext()); - - // This is taken from DwarfDebug.cpp, which is conveniently not a public - // LLVM class. - StringRef Fn; - StringRef Dir; - unsigned Src = 1; - if (S) { - DIDescriptor Scope(S); - if (Scope.isCompileUnit()) { - DICompileUnit CU(S); - Fn = CU.getFilename(); - Dir = CU.getDirectory(); - } else if (Scope.isFile()) { - DIFile F(S); - Fn = F.getFilename(); - Dir = F.getDirectory(); - } else if (Scope.isSubprogram()) { - DISubprogram SP(S); - Fn = SP.getFilename(); - Dir = SP.getDirectory(); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(S); - Fn = DB.getFilename(); - Dir = DB.getDirectory(); - } else - assert(0 && "Unexpected scope info"); - - Src = GetOrCreateSourceID(Fn, Dir); - } - OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(), - 0, 0, 0, Fn); - - const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc(); - - OS << "\t.loc "; - OS << utostr(MDL.getFileNum()); - OS << " "; - OS << utostr(MDL.getLine()); - OS << " "; - OS << utostr(MDL.getColumn()); - OS << "\n"; - } - - - // Emit predicate - printPredicateOperand(MI, OS); - - // Write instruction to str - printInstruction(MI, OS); - OS << ';'; - OS.flush(); - - StringRef strref = StringRef(str); - OutStreamer.EmitRawText(strref); -} - -void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS) { - const MachineOperand &MO = MI->getOperand(opNum); - - switch (MO.getType()) { - default: - llvm_unreachable("<unknown operand type>"); - break; - case MachineOperand::MO_GlobalAddress: - OS << *Mang->getSymbol(MO.getGlobal()); - break; - case MachineOperand::MO_Immediate: - OS << (long) MO.getImm(); - break; - case MachineOperand::MO_MachineBasicBlock: - OS << *MO.getMBB()->getSymbol(); - break; - case MachineOperand::MO_Register: - OS << getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_FPImmediate: - APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); - bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID; - // Emit 0F for 32-bit floats and 0D for 64-bit doubles. - if (isFloat) { - OS << "0F"; - } - else { - OS << "0D"; - } - // Emit the encoded floating-point value. - if (constFP.getZExtValue() > 0) { - OS << constFP.toString(16, false); - } - else { - OS << "00000000"; - // If We have a double-precision zero, pad to 8-bytes. - if (!isFloat) { - OS << "00000000"; - } - } - break; - } -} - -void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - printOperand(MI, opNum, OS); - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; // don't print "+0" - - OS << "+"; - printOperand(MI, opNum+1, OS); + //unsigned Index = 1; + // Print parameter passing params + //for (PTXMachineFunctionInfo::param_iterator + // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) { + // std::string def = "\t.param .b"; + // def += utostr(*i); + // def += " __ret_"; + // def += utostr(Index); + // Index++; + // def += ";"; + // OutStreamer.EmitRawText(Twine(def)); + //} } -void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +void PTXAsmPrinter::EmitFunctionBodyEnd() { + OutStreamer.EmitRawText(Twine("}")); } -void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum, - raw_ostream &OS, const char *Modifier) { - OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1; +void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MCInst TmpInst; + LowerPTXMachineInstrToMCInst(MI, TmpInst, *this); + OutStreamer.EmitInstruction(TmpInst); } void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { @@ -400,14 +310,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { unsigned alignment = gv->getAlignment(); if (alignment != 0) { decl += ".align "; - decl += utostr(Log2_32(gv->getAlignment())); + decl += utostr(gv->getAlignment()); decl += " "; } if (PointerType::classof(gv->getType())) { - const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType()); - const Type* elementTy = pointerTy->getElementType(); + PointerType* pointerTy = dyn_cast<PointerType>(gv->getType()); + Type* elementTy = pointerTy->getElementType(); decl += ".b8 "; decl += gvsym->getName(); @@ -417,14 +327,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { { assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); - const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy); + ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy); elementTy = arrayTy->getElementType(); unsigned numElements = arrayTy->getNumElements(); while (elementTy->isArrayTy()) { - arrayTy = dyn_cast<const ArrayType>(elementTy); + arrayTy = dyn_cast<ArrayType>(elementTy); elementTy = arrayTy->getElementType(); numElements *= arrayTy->getNumElements(); @@ -447,7 +357,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { if (gv->hasInitializer()) { - const Constant *C = gv->getInitializer(); + const Constant *C = gv->getInitializer(); if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) { decl += " = {"; @@ -484,7 +394,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { OutStreamer.AddBlankLine(); } -void PTXAsmPrinter::EmitFunctionDeclaration() { +void PTXAsmPrinter::EmitFunctionEntryLabel() { // The function label could have already been emitted if two symbols end up // conflicting due to asm renaming. Detect this and emit an error. if (!CurrentFnSym->isUndefined()) { @@ -494,25 +404,39 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { } const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const PTXParamManager &PM = MFI->getParamManager(); const bool isKernel = MFI->isKernel(); const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>(); + const MachineRegisterInfo& MRI = MF->getRegInfo(); std::string decl = isKernel ? ".entry" : ".func"; - unsigned cnt = 0; - if (!isKernel) { decl += " ("; - for (PTXMachineFunctionInfo::ret_iterator - i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i; - i != e; ++i) { - if (i != b) { - decl += ", "; + if (ST.useParamSpaceForDeviceArgs()) { + for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(), + b = i; i != e; ++i) { + if (i != b) { + decl += ", "; + } + + decl += ".param .b"; + decl += utostr(PM.getParamSize(*i)); + decl += " "; + decl += PM.getParamName(*i); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; + decl += getRegisterTypeName(*i, MRI); + decl += " "; + decl += MFI->getRegisterName(*i); } - decl += ".reg ."; - decl += getRegisterTypeName(*i); - decl += " "; - decl += getRegisterName(*i); } decl += ")"; } @@ -523,26 +447,65 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { decl += " ("; - cnt = 0; + const Function *F = MF->getFunction(); // Print parameters - for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; - i != e; ++i) { - if (i != b) { - decl += ", "; - } - if (isKernel || ST.useParamSpaceForDeviceArgs()) { + if (isKernel || ST.useParamSpaceForDeviceArgs()) { + /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(), + b = i; i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".param .b"; - decl += utostr(*i); + decl += utostr(PM.getParamSize(*i)); decl += " "; - decl += PARAM_PREFIX; - decl += utostr(++cnt); - } else { + decl += PM.getParamName(*i); + }*/ + int Counter = 1; + for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(), + b = i; i != e; ++i) { + if (i != b) + decl += ", "; + const Type *ArgType = (*i).getType(); + decl += ".param .b"; + if (ArgType->isPointerTy()) { + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + } else { + decl += utostr(ArgType->getPrimitiveSizeInBits()); + } + if (ArgType->isPointerTy() && ST.emitPtrAttribute()) { + const PointerType *PtrType = dyn_cast<const PointerType>(ArgType); + decl += " .ptr"; + switch (PtrType->getAddressSpace()) { + default: + llvm_unreachable("Unknown address space in argument"); + case PTXStateSpace::Global: + decl += " .global"; + break; + case PTXStateSpace::Shared: + decl += " .shared"; + break; + } + } + decl += " __param_"; + decl += utostr(Counter++); + } + } else { + for (PTXMachineFunctionInfo::reg_iterator + i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i; + i != e; ++i) { + if (i != b) { + decl += ", "; + } + decl += ".reg ."; - decl += getRegisterTypeName(*i); + decl += getRegisterTypeName(*i, MRI); decl += " "; - decl += getRegisterName(*i); + decl += MFI->getRegisterName(*i); } } decl += ")"; @@ -550,25 +513,6 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { OutStreamer.EmitRawText(Twine(decl)); } -void PTXAsmPrinter:: -printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { - int i = MI->findFirstPredOperandIdx(); - if (i == -1) - llvm_unreachable("missing predicate operand"); - - unsigned reg = MI->getOperand(i).getReg(); - int predOp = MI->getOperand(i+1).getImm(); - - DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n"); - - if (reg != PTX::NoRegister) { - O << '@'; - if (predOp == PTX::PRED_NEGATE) - O << '!'; - O << getRegisterName(reg); - } -} - unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. @@ -596,10 +540,58 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, return SrcId; } -#include "PTXGenAsmWriter.inc" +MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, + const MCSymbol *Symbol) { + const MCExpr *Expr; + Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); + return MCOperand::CreateExpr(Expr); +} + +MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) { + MCOperand MCOp; + const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + const MCExpr *Expr; + const char *RegSymbolName; + switch (MO.getType()) { + default: + llvm_unreachable("Unknown operand type"); + case MachineOperand::MO_Register: + // We create register operands as symbols, since the PTXInstPrinter class + // has no way to map virtual registers back to a name without some ugly + // hacks. + // FIXME: Figure out a better way to handle virtual register naming. + RegSymbolName = MFI->getRegisterName(MO.getReg()); + Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None, + OutContext); + MCOp = MCOperand::CreateExpr(Expr); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + MO.getMBB()->getSymbol(), OutContext)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal())); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + break; + case MachineOperand::MO_FPImmediate: + APFloat Val = MO.getFPImm()->getValueAPF(); + bool ignored; + Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + MCOp = MCOperand::CreateFPImm(Val.convertToDouble()); + break; + } + + return MCOp; +} // Force static initialization. extern "C" void LLVMInitializePTXAsmPrinter() { RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target); RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target); } + diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h new file mode 100644 index 0000000..538c080 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h @@ -0,0 +1,57 @@ +//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// PTX Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXASMPRINTER_H +#define PTXASMPRINTER_H + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class MCOperand; + +class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter { +public: + explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer) {} + + const char *getPassName() const { return "PTX Assembly Printer"; } + + bool doFinalization(Module &M); + + virtual void EmitStartOfAsmFile(Module &M); + virtual void EmitFunctionBodyStart(); + virtual void EmitFunctionBodyEnd(); + virtual void EmitFunctionEntryLabel(); + virtual void EmitInstruction(const MachineInstr *MI); + + unsigned GetOrCreateSourceID(StringRef FileName, + StringRef DirName); + + MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); + MCOperand lowerOperand(const MachineOperand &MO); + +private: + void EmitVariableDeclaration(const GlobalVariable *gv); + void EmitFunctionDeclaration(); + + StringMap<unsigned> SourceIdMap; +}; // class PTXAsmPrinter +} // namespace llvm + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td deleted file mode 100644 index 3e3ff48..0000000 --- a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td +++ /dev/null @@ -1,29 +0,0 @@ - -//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the PTX architecture. -// -//===----------------------------------------------------------------------===// - -// PTX Formal Parameter Calling Convention -def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>, - CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>, - CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>, - CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>> -]>; - -// PTX Return Value Calling Convention -def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>, - CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>, - CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>, - CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>> -]>; diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp new file mode 100644 index 0000000..0b653e0 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp @@ -0,0 +1,179 @@ +//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a machine function pass that sets appropriate FP rounding +// modes for all relevant instructions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-fp-rounding-mode" + +#include "PTX.h" +#include "PTXTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +// NOTE: PTXFPRoundingModePass should be executed just before emission. + +namespace llvm { + /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to + /// all FP instructions. Essentially, this pass just looks for all FP + /// instructions that have a rounding mode set to RndDefault, and sets an + /// appropriate rounding mode based on the target device. + /// + class PTXFPRoundingModePass : public MachineFunctionPass { + private: + static char ID; + + typedef std::pair<unsigned, unsigned> RndModeDesc; + + PTXTargetMachine& TargetMachine; + DenseMap<unsigned, RndModeDesc> Instrs; + + public: + PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel) + : MachineFunctionPass(ID), + TargetMachine(TM) { + initializeMap(); + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "PTX FP Rounding Mode Pass"; + } + + private: + + void initializeMap(); + void processInstruction(MachineInstr &MI); + }; // class PTXFPRoundingModePass +} // namespace llvm + +using namespace llvm; + +char PTXFPRoundingModePass::ID = 0; + +bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) { + // Look at each basic block + for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe; + ++bbi) { + MachineBasicBlock &MBB = *bbi; + // Look at each instruction + for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end(); + ii != ie; ++ii) { + MachineInstr &MI = *ii; + processInstruction(MI); + } + } + return false; +} + +void PTXFPRoundingModePass::initializeMap() { + using namespace PTXRoundingMode; + const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>(); + + // Build a map of default rounding mode for all instructions that need a + // rounding mode. + Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone); + Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone); + + unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode); + Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode); + + unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone; + Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode); + Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode); + + Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven); + + Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox); + Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox); + + Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt); + + Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven); + Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven); +} + +void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) { + // Is this an instruction that needs a rounding mode? + if (Instrs.count(MI.getOpcode())) { + const RndModeDesc &Desc = Instrs[MI.getOpcode()]; + // Get the rounding mode operand + MachineOperand &Op = MI.getOperand(Desc.first); + // Update the rounding mode if needed + if (Op.getImm() == PTXRoundingMode::RndDefault) { + Op.setImm(Desc.second); + } + } +} + +FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new PTXFPRoundingModePass(TM, OptLevel); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp index 9adfa62..5c7ee29 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -12,7 +12,9 @@ //===----------------------------------------------------------------------===// #include "PTX.h" +#include "PTXMachineFunctionInfo.h" #include "PTXTargetMachine.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" #include "llvm/Support/Debug.h" @@ -37,6 +39,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel { bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2); bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset); bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset); + bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset); // Include the pieces auto'gened from the target description #include "PTXGenDAGISel.inc" @@ -46,6 +49,10 @@ class PTXDAGToDAGISel : public SelectionDAGISel { // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td SDNode *SelectBRCOND(SDNode *Node); + SDNode *SelectREADPARAM(SDNode *Node); + SDNode *SelectWRITEPARAM(SDNode *Node); + SDNode *SelectFrameIndex(SDNode *Node); + bool isImm(const SDValue &operand); bool SelectImm(const SDValue &operand, SDValue &imm); @@ -68,6 +75,12 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { switch (Node->getOpcode()) { case ISD::BRCOND: return SelectBRCOND(Node); + case PTXISD::READ_PARAM: + return SelectREADPARAM(Node); + case PTXISD::WRITE_PARAM: + return SelectWRITEPARAM(Node); + case ISD::FrameIndex: + return SelectFrameIndex(Node); default: return SelectCode(Node); } @@ -79,7 +92,7 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { SDValue Chain = Node->getOperand(0); SDValue Pred = Node->getOperand(1); SDValue Target = Node->getOperand(2); // branch target - SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32); DebugLoc dl = Node->getDebugLoc(); assert(Target.getOpcode() == ISD::BasicBlock); @@ -90,6 +103,97 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); } +SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) { + SDValue Chain = Node->getOperand(0); + SDValue Index = Node->getOperand(1); + + int OpCode; + + // Get the type of parameter we are reading + EVT VT = Node->getValueType(0); + assert(VT.isSimple() && "READ_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::READPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::READPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::READPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::READPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::READPARAMF32; + else { + assert(Type == MVT::f64 && "Unexpected type!"); + OpCode = PTX::READPARAMF64; + } + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Index, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4); +} + +SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) { + + SDValue Chain = Node->getOperand(0); + SDValue Value = Node->getOperand(1); + + int OpCode; + + //Node->dumpr(CurDAG); + + // Get the type of parameter we are writing + EVT VT = Value->getValueType(0); + assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types"); + + MVT Type = VT.getSimpleVT(); + + if (Type == MVT::i1) + OpCode = PTX::WRITEPARAMPRED; + else if (Type == MVT::i16) + OpCode = PTX::WRITEPARAMI16; + else if (Type == MVT::i32) + OpCode = PTX::WRITEPARAMI32; + else if (Type == MVT::i64) + OpCode = PTX::WRITEPARAMI64; + else if (Type == MVT::f32) + OpCode = PTX::WRITEPARAMF32; + else if (Type == MVT::f64) + OpCode = PTX::WRITEPARAMF64; + else + llvm_unreachable("Invalid type in SelectWRITEPARAM"); + + SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + SDValue Ops[] = { Value, Pred, PredOp, Chain }; + SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4); + + //dbgs() << "SelectWRITEPARAM produced:\n\t"; + //Ret->dumpr(CurDAG); + + return Ret; +} + +SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) { + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + //dbgs() << "Selecting FrameIndex at index " << FI << "\n"; + //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0)); + + PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>(); + + SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI), + Node->getValueType(0)); + + return FrameSymbol.getNode(); +} + // Match memory operand of the form [reg+reg] bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 || @@ -107,14 +211,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { // Match memory operand of the form [reg], [imm+reg], and [reg+imm] bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset) { - if (Addr.getOpcode() != ISD::ADD) { + // FrameIndex addresses are handled separately + //errs() << "SelectADDRri: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + //errs() << "Failure\n"; + return false; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Success\n"; + return true; + } + + /*if (Addr.getNumOperands() == 1) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + errs() << "Success\n"; + return true; + }*/ + + //errs() << "SelectADDRri fails on: "; + //Addr.getNode()->dumpr(); + + if (isImm(Addr)) { + //errs() << "Failure\n"; + return false; + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + + //errs() << "Success\n"; + return true; + + /*if (Addr.getOpcode() != ISD::ADD) { // let SelectADDRii handle the [imm] case if (isImm(Addr)) return false; // it is [reg] assert(Addr.getValueType().isSimple() && "Type must be simple"); - Base = Addr; Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); @@ -136,7 +280,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, } // neither [reg+imm] nor [imm+reg] - return false; + return false;*/ } // Match memory operand of the form [imm+imm] and [imm] @@ -160,6 +304,36 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, return false; } +// Match memory operand of the form [reg], [imm+reg], and [reg+imm] +bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base, + SDValue &Offset) { + //errs() << "SelectADDRlocal: "; + //Addr.getNode()->dumpr(); + if (isa<FrameIndexSDNode>(Addr)) { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + //errs() << "Success\n"; + return true; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + Base = Addr.getOperand(0); + if (!isa<FrameIndexSDNode>(Base)) { + //errs() << "Failure\n"; + return false; + } + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + //errs() << "Offset: "; + //Offset.getNode()->dumpr(); + //errs() << "Success\n"; + return true; + } + + //errs() << "Failure\n"; + return false; +} + bool PTXDAGToDAGISel::isImm(const SDValue &operand) { return ConstantSDNode::classof(operand.getNode()); } diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp index 6fcf710..3307d91 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp @@ -16,23 +16,19 @@ #include "PTXMachineFunctionInfo.h" #include "PTXRegisterInfo.h" #include "PTXSubtarget.h" +#include "llvm/Function.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; //===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -#include "PTXGenCallingConv.inc" - -//===----------------------------------------------------------------------===// // TargetLowering Implementation //===----------------------------------------------------------------------===// @@ -47,57 +43,58 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) addRegisterClass(MVT::f64, PTX::RegF64RegisterClass); setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setMinFunctionAlignment(2); - + //////////////////////////////////// /////////// Expansion ////////////// //////////////////////////////////// - + // (any/zero/sign) extload => load + (any/zero/sign) extend - + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - + // f32 extload => load + fextend - - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // f64 truncstore => trunc + store - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // sign_extend_inreg => sign_extend - + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - + // br_cc => brcond - + setOperationAction(ISD::BR_CC, MVT::Other, Expand); // select_cc => setcc - + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - + //////////////////////////////////// //////////// Legal ///////////////// //////////////////////////////////// - + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - + //////////////////////////////////// //////////// Custom //////////////// //////////////////////////////////// - + // customise setcc to use bitwise logic if possible - + setOperationAction(ISD::SETCC, MVT::i1, Custom); // customize translation of memory addresses - + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); @@ -105,7 +102,7 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) computeRegisterProperties(); } -MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const { +EVT PTXTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } @@ -130,10 +127,16 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "PTXISD::LOAD_PARAM"; case PTXISD::STORE_PARAM: return "PTXISD::STORE_PARAM"; + case PTXISD::READ_PARAM: + return "PTXISD::READ_PARAM"; + case PTXISD::WRITE_PARAM: + return "PTXISD::WRITE_PARAM"; case PTXISD::EXIT: return "PTXISD::EXIT"; case PTXISD::RET: return "PTXISD::RET"; + case PTXISD::CALL: + return "PTXISD::CALL"; } } @@ -149,7 +152,7 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); - // Look for X == 0, X == 1, X != 0, or X != 1 + // Look for X == 0, X == 1, X != 0, or X != 1 // We can simplify these to bitwise logic if (Op1.getOpcode() == ISD::Constant && @@ -197,6 +200,7 @@ SDValue PTXTargetLowering:: MachineFunction &MF = DAG.getMachineFunction(); const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); switch (CallConv) { default: @@ -216,68 +220,34 @@ SDValue PTXTargetLowering:: if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) { // We just need to emit the proper LOAD_PARAM ISDs for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) && "Kernels cannot take pred operands"); + unsigned ParamSize = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addArgumentParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, - DAG.getTargetConstant(i, MVT::i32)); + ParamValue); InVals.push_back(ArgValue); - - // Instead of storing a physical register in our argument list, we just - // store the total size of the parameter, in bits. The ASM printer - // knows how to process this. - MFI->addArgReg(Ins[i].VT.getStoreSizeInBits()); } } else { - // For device functions, we use the PTX calling convention to do register - // assignments then create CopyFromReg ISDs for the allocated registers - - SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs, - *DAG.getContext()); - - CCInfo.AnalyzeFormalArguments(Ins, CC_PTX); - - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - - CCValAssign& VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - TargetRegisterClass* TRC = 0; - - assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); - - // Determine which register class we need - if (RegVT == MVT::i1) { - TRC = PTX::RegPredRegisterClass; - } - else if (RegVT == MVT::i16) { - TRC = PTX::RegI16RegisterClass; - } - else if (RegVT == MVT::i32) { - TRC = PTX::RegI32RegisterClass; - } - else if (RegVT == MVT::i64) { - TRC = PTX::RegI64RegisterClass; - } - else if (RegVT == MVT::f32) { - TRC = PTX::RegF32RegisterClass; - } - else if (RegVT == MVT::f64) { - TRC = PTX::RegF64RegisterClass; - } - else { - llvm_unreachable("Unknown parameter type"); - } + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT RegVT = Ins[i].VT; + TargetRegisterClass* TRC = getRegClassFor(RegVT); + // Use a unique index in the instruction to prevent instruction folding. + // Yes, this is a hack. + SDValue Index = DAG.getTargetConstant(i, MVT::i32); unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); + SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain, + Index); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); InVals.push_back(ArgValue); - MFI->addArgReg(VA.getLocReg()); + MFI->addArgReg(Reg); } } @@ -301,41 +271,66 @@ SDValue PTXTargetLowering:: assert(Outs.size() == 0 && "Kernel must return void."); return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain); case CallingConv::PTX_Device: - //assert(Outs.size() <= 1 && "Can at most return one value."); + assert(Outs.size() <= 1 && "Can at most return one value."); break; } MachineFunction& MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); SDValue Flag; + const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>(); - // Even though we could use the .param space for return arguments for - // device functions if SM >= 2.0 and the number of return arguments is - // only 1, we just always use registers since this makes the codegen - // easier. - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - CCInfo.AnalyzeReturn(Outs, RetCC_PTX); - - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - CCValAssign& VA = RVLocs[i]; - - assert(VA.isRegLoc() && "CCValAssign must be RegLoc"); + if (ST.useParamSpaceForDeviceArgs()) { + assert(Outs.size() < 2 && "Device functions can return at most one value"); + + if (Outs.size() == 1) { + unsigned ParamSize = OutVals[0].getValueType().getSizeInBits(); + unsigned Param = PM.addReturnParam(ParamSize); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[0]); + } + } else { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT RegVT = Outs[i].VT; + TargetRegisterClass* TRC = 0; - unsigned Reg = VA.getLocReg(); + // Determine which register class we need + if (RegVT == MVT::i1) { + TRC = PTX::RegPredRegisterClass; + } + else if (RegVT == MVT::i16) { + TRC = PTX::RegI16RegisterClass; + } + else if (RegVT == MVT::i32) { + TRC = PTX::RegI32RegisterClass; + } + else if (RegVT == MVT::i64) { + TRC = PTX::RegI64RegisterClass; + } + else if (RegVT == MVT::f32) { + TRC = PTX::RegF32RegisterClass; + } + else if (RegVT == MVT::f64) { + TRC = PTX::RegF64RegisterClass; + } + else { + llvm_unreachable("Unknown parameter type"); + } - DAG.getMachineFunction().getRegInfo().addLiveOut(Reg); + unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC); - Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag); + SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/); + SDValue OutReg = DAG.getRegister(Reg, RegVT); - // Guarantee that all emitted copies are stuck together, - // avoiding something bad - Flag = Chain.getValue(1); + Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg); - MFI->addRetReg(Reg); + MFI->addRetReg(Reg); + } } if (Flag.getNode() == 0) { @@ -345,3 +340,83 @@ SDValue PTXTargetLowering:: return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag); } } + +SDValue +PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + + MachineFunction& MF = DAG.getMachineFunction(); + PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); + PTXParamManager &PM = MFI->getParamManager(); + + assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() && + "Calls are not handled for the target device"); + + std::vector<SDValue> Ops; + // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] + Ops.resize(Outs.size() + Ins.size() + 4); + + Ops[0] = Chain; + + // Identify the callee function + const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); + assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device && + "PTX function calls must be to PTX device functions"); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); + Ops[Ins.size()+2] = Callee; + + // Generate STORE_PARAM nodes for each function argument. In PTX, function + // arguments are explicitly stored into .param variables and passed as + // arguments. There is no register/stack-based calling convention in PTX. + Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32); + for (unsigned i = 0; i != OutVals.size(); ++i) { + unsigned Size = OutVals[i].getValueType().getSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[i]); + Ops[i+Ins.size()+4] = ParamValue; + } + + std::vector<SDValue> InParams; + + // Generate list of .param variables to hold the return value(s). + Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32); + for (unsigned i = 0; i < Ins.size(); ++i) { + unsigned Size = Ins[i].VT.getStoreSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Ops[i+2] = ParamValue; + InParams.push_back(ParamValue); + } + + Ops[0] = Chain; + + // Create the CALL node. + Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size()); + + // Create the LOAD_PARAM nodes that retrieve the function return value(s). + for (unsigned i = 0; i < Ins.size(); ++i) { + SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain, + InParams[i]); + InVals.push_back(Load); + } + + return Chain; +} + +unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) { + // All arguments consist of one "register," regardless of the type. + return 1; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h index 4318541..4d25665 100644 --- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h +++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h @@ -26,9 +26,12 @@ namespace PTXISD { FIRST_NUMBER = ISD::BUILTIN_OP_END, LOAD_PARAM, STORE_PARAM, + READ_PARAM, + WRITE_PARAM, EXIT, RET, - COPY_ADDRESS + COPY_ADDRESS, + CALL }; } // namespace PTXISD @@ -60,7 +63,19 @@ class PTXTargetLowering : public TargetLowering { DebugLoc dl, SelectionDAG &DAG) const; - virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual EVT getSetCCResultType(EVT VT) const; + + virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT); private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td index 8cee351..397fdc3 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td +++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td @@ -7,12 +7,39 @@ // //===----------------------------------------------------------------------===// -// PTX Predicate operand, default to (0, 0) = (zero-reg, always). + +// Rounding Mode Specifier +/*class RoundingMode<bits<3> val> { + bits<3> Value = val; +} + +def RndDefault : RoundingMode<0>; +def RndNearestEven : RoundingMode<1>; +def RndNearestZero : RoundingMode<2>; +def RndNegInf : RoundingMode<3>; +def RndPosInf : RoundingMode<4>; +def RndApprox : RoundingMode<5>;*/ + + +// Rounding Mode Operand +def RndMode : Operand<i32> { + let PrintMethod = "printRoundingMode"; +} + +def RndDefault : PatLeaf<(i32 0)>; + +// PTX Predicate operand, default to (0, 0) = (zero-reg, none). // Leave PrintMethod empty; predicate printing is defined elsewhere. def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm), - (ops (i1 zero_reg), (i32 0))>; + (ops (i1 zero_reg), (i32 2))>; +def RndModeOperand : Operand<OtherVT> { + let MIOperandInfo = (ops i32imm); +} + +// Instruction Types let Namespace = "PTX" in { + class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern> : Instruction { dag OutOperandList = oops; diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp index 425265a..1b947a5 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp @@ -16,10 +16,11 @@ #include "PTX.h" #include "PTXInstrInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_INSTRINFO_CTOR @@ -47,8 +48,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, unsigned SrcReg, bool KillSrc) const { - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { - if (map[i].cls->contains(DstReg, SrcReg)) { + + const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo(); + //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) && + // "Invalid register copy between two register classes"); + + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) { + if (map[i].cls == MRI.getRegClass(DstReg)) { const MCInstrDesc &MCID = get(map[i].opcode); MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg). addReg(SrcReg, getKillRegState(KillSrc)); @@ -161,7 +167,7 @@ DefinesPredicate(MachineInstr *MI, return false; Pred.push_back(MO); - Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None)); return true; } @@ -277,7 +283,7 @@ InsertBranch(MachineBasicBlock &MBB, BuildMI(&MBB, DL, get(PTX::BRAdp)) .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); return 2; } else if (Cond.size()) { BuildMI(&MBB, DL, get(PTX::BRAdp)) @@ -285,7 +291,7 @@ InsertBranch(MachineBasicBlock &MBB, return 1; } else { BuildMI(&MBB, DL, get(PTX::BRAd)) - .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None); return 1; } } @@ -296,34 +302,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineInstr& MI = *MII; - DebugLoc DL = MI.getDebugLoc(); - - DEBUG(dbgs() << "storeRegToStackSlot: " << MI); - - int OpCode; - - // Select the appropriate opcode based on the register class - if (RC == PTX::RegI16RegisterClass) { - OpCode = PTX::STACKSTOREI16; - } else if (RC == PTX::RegI32RegisterClass) { - OpCode = PTX::STACKSTOREI32; - } else if (RC == PTX::RegI64RegisterClass) { - OpCode = PTX::STACKSTOREI32; - } else if (RC == PTX::RegF32RegisterClass) { - OpCode = PTX::STACKSTOREF32; - } else if (RC == PTX::RegF64RegisterClass) { - OpCode = PTX::STACKSTOREF64; - } else { - llvm_unreachable("Unknown PTX register class!"); - } - - // Build the store instruction (really a mov) - MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); - MIB.addFrameIndex(FrameIdx); - MIB.addReg(SrcReg); - - AddDefaultPredicate(MIB); + assert(false && "storeRegToStackSlot should not be called for PTX"); } void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, @@ -331,34 +310,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineInstr& MI = *MII; - DebugLoc DL = MI.getDebugLoc(); - - DEBUG(dbgs() << "loadRegToStackSlot: " << MI); - - int OpCode; - - // Select the appropriate opcode based on the register class - if (RC == PTX::RegI16RegisterClass) { - OpCode = PTX::STACKLOADI16; - } else if (RC == PTX::RegI32RegisterClass) { - OpCode = PTX::STACKLOADI32; - } else if (RC == PTX::RegI64RegisterClass) { - OpCode = PTX::STACKLOADI32; - } else if (RC == PTX::RegF32RegisterClass) { - OpCode = PTX::STACKLOADF32; - } else if (RC == PTX::RegF64RegisterClass) { - OpCode = PTX::STACKLOADF64; - } else { - llvm_unreachable("Unknown PTX register class!"); - } - - // Build the load instruction (really a mov) - MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode)); - MIB.addReg(DestReg); - MIB.addFrameIndex(FrameIdx); - - AddDefaultPredicate(MIB); + assert(false && "loadRegFromStackSlot should not be called for PTX"); } // static helper routines @@ -367,7 +319,7 @@ MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); SDValue ops[] = { Op1, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -376,7 +328,7 @@ MachineSDNode *PTXInstrInfo:: GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); - SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32); SDValue ops[] = { Op1, Op2, predReg, predOp }; return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); } @@ -384,7 +336,7 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { if (MI->findFirstPredOperandIdx() == -1) { MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); - MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None)); } } diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td index 6bfe906..a3fcea9 100644 --- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td @@ -21,10 +21,6 @@ include "PTXInstrFormats.td" // Code Generation Predicates //===----------------------------------------------------------------------===// -// Addressing -def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; -def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; - // Shader Model Support def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">; def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">; @@ -43,130 +39,19 @@ def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">; def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">; def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">; -//===----------------------------------------------------------------------===// -// Instruction Pattern Stuff -//===----------------------------------------------------------------------===// -def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::GLOBAL; - return false; -}]>; - -def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::CONSTANT; - return false; -}]>; - -def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; -}]>; - -def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::PARAMETER; - return false; -}]>; - -def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::SHARED; - return false; -}]>; - -def store_global - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::GLOBAL; - return false; -}]>; - -def store_local - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::LOCAL; - return false; -}]>; - -def store_parameter - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::PARAMETER; - return false; -}]>; - -def store_shared - : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ - const Value *Src; - const PointerType *PT; - if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && - (PT = dyn_cast<PointerType>(Src->getType()))) - return PT->getAddressSpace() == PTX::SHARED; - return false; -}]>; - -// Addressing modes. -def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; -def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; -def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; -def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; -def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; -def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; - -// Address operands -def MEMri32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI32, i32imm); -} -def MEMri64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RegI64, i64imm); -} -def MEMii32 : Operand<i32> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} -def MEMii64 : Operand<i64> { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops i64imm, i64imm); -} -// The operand here does not correspond to an actual address, so we -// can use i32 in 64-bit address modes. -def MEMpi : Operand<i32> { - let PrintMethod = "printParamOperand"; - let MIOperandInfo = (ops i32imm); -} -def MEMret : Operand<i32> { - let PrintMethod = "printReturnOperand"; - let MIOperandInfo = (ops i32imm); -} + +// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; +// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; + +// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart, +// [SDNPHasChain, SDNPOutGlue]>; +// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd, +// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def PTXcall : SDNode<"PTXISD::CALL", SDTNone, + [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>; + // Branch & call targets have OtherVT type. def brtarget : Operand<OtherVT>; @@ -189,87 +74,73 @@ def PTXret def PTXcopyaddress : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; -// Load/store .param space -def PTXloadparam - : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; -def PTXstoreparam - : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// +// For floating-point instructions, we cannot just embed the pattern into the +// instruction definition since we need to muck around with the rounding mode, +// and I do not know how to insert constants into instructions directly from +// pattern matches. + //===- Floating-Point Instructions - 2 Operand Form -----------------------===// -multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> { +multiclass PTX_FLOAT_2OP<string opcstr> { def rr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - !strconcat(opcstr, ".f32\t$d, $a"), - [(set RegF32:$d, (opnode RegF32:$a))]>; + (ins RndMode:$r, RegF32:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; def ri32 : InstPTX<(outs RegF32:$d), - (ins f32imm:$a), - !strconcat(opcstr, ".f32\t$d, $a"), - [(set RegF32:$d, (opnode fpimm:$a))]>; + (ins RndMode:$r, f32imm:$a), + !strconcat(opcstr, "$r.f32\t$d, $a"), []>; def rr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - !strconcat(opcstr, ".f64\t$d, $a"), - [(set RegF64:$d, (opnode RegF64:$a))]>; + (ins RndMode:$r, RegF64:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; def ri64 : InstPTX<(outs RegF64:$d), - (ins f64imm:$a), - !strconcat(opcstr, ".f64\t$d, $a"), - [(set RegF64:$d, (opnode fpimm:$a))]>; + (ins RndMode:$r, f64imm:$a), + !strconcat(opcstr, "$r.f64\t$d, $a"), []>; } //===- Floating-Point Instructions - 3 Operand Form -----------------------===// -multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> { +multiclass PTX_FLOAT_3OP<string opcstr> { def rr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; def ri32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - !strconcat(opcstr, ".f32\t$d, $a, $b"), - [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>; + (ins RndMode:$r, RegF32:$a, f32imm:$b), + !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>; def rr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; def ri64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - !strconcat(opcstr, ".f64\t$d, $a, $b"), - [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>; + (ins RndMode:$r, RegF64:$a, f64imm:$b), + !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>; } //===- Floating-Point Instructions - 4 Operand Form -----------------------===// -multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> { +multiclass PTX_FLOAT_4OP<string opcstr> { def rrr32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b, RegF32:$c), - !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, - RegF32:$b), - RegF32:$c))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; def rri32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b, f32imm:$c), - !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), - [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a, - RegF32:$b), - fpimm:$c))]>; + (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; + def rii32 : InstPTX<(outs RegF32:$d), + (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c), + !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>; def rrr64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b, RegF64:$c), - !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, - RegF64:$b), - RegF64:$c))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; def rri64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b, f64imm:$c), - !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), - [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a, - RegF64:$b), - fpimm:$c))]>; + (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; + def rii64 : InstPTX<(outs RegF64:$d), + (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c), + !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>; } -multiclass INT3<string opcstr, SDNode opnode> { +//===- Integer Instructions - 3 Operand Form ------------------------------===// +multiclass PTX_INT3<string opcstr, SDNode opnode> { def rr16 : InstPTX<(outs RegI16:$d), (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, ".u16\t$d, $a, $b"), @@ -296,6 +167,35 @@ multiclass INT3<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } +//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===// +multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> { + def rr16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, RegI16:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>; + def ri16 : InstPTX<(outs RegI16:$d), + (ins RegI16:$a, i16imm:$b), + !strconcat(opcstr, ".s16\t$d, $a, $b"), + [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, RegI32:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>; + def ri32 : InstPTX<(outs RegI32:$d), + (ins RegI32:$a, i32imm:$b), + !strconcat(opcstr, ".s32\t$d, $a, $b"), + [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, RegI64:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>; + def ri64 : InstPTX<(outs RegI64:$d), + (ins RegI64:$a, i64imm:$b), + !strconcat(opcstr, ".s64\t$d, $a, $b"), + [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; +} + +//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===// multiclass PTX_LOGIC<string opcstr, SDNode opnode> { def ripreds : InstPTX<(outs RegPred:$d), (ins RegPred:$a, i1imm:$b), @@ -331,7 +231,8 @@ multiclass PTX_LOGIC<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>; } -multiclass INT3ntnc<string opcstr, SDNode opnode> { +//===- Integer Shift Instructions - 3 Operand Form ------------------------===// +multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> { def rr16 : InstPTX<(outs RegI16:$d), (ins RegI16:$a, RegI16:$b), !strconcat(opcstr, "16\t$d, $a, $b"), @@ -370,6 +271,7 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> { [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>; } +//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===// multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, CondCode cmp, string cmpstr> { // TODO support 5-operand format: p|q, a, b, c @@ -385,56 +287,77 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls, def rr_and_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_and_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; def rr_or_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_or_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; def rr_xor_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>; def ri_xor_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + RegPred:$c))]>; def rr_and_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_and_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; def rr_or_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_or_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; def rr_xor_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), + (not RegPred:$c)))]>; def ri_xor_not_r : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), + (not RegPred:$c)))]>; } -multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, +//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===// +multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls, CondCode ucmp, CondCode ocmp, string cmpstr> { // TODO support 5-operand format: p|q, a, b, c @@ -447,137 +370,110 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>; + def ri_u + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>; + def ri_o + : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>; + def rr_and_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; def rr_and_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; def rr_or_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; def rr_or_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, $c"), [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; def rr_xor_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + RegPred:$c))]>; def rr_xor_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, $c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + RegPred:$c))]>; def rr_and_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_and_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".and.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; def rr_or_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_or_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".or.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; def rr_xor_not_r_u : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, "u.xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), + (not RegPred:$c)))]>; def rr_xor_not_r_o : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c), - !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), - [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>; + !strconcat("setp.", cmpstr, ".xor.", regclsname, + "\t$p, $a, $b, !$c"), + [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), + (not RegPred:$c)))]>; } -multiclass PTX_SELP<RegisterClass RC, string regclsname> { +//===- Select Predicate Instructions - 4 Operand Form ---------------------===// +multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls, + SDNode immnode> { def rr : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c), !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>; + def ri + : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>; + def ii + : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>; } -multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> { - def rr32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs RC:$d), - (ins MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs RC:$d), - (ins MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs RC:$d), - (ins MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs RC:$d), - (ins MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), - [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>; -} - -multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { - defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; - defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; - defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; - defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; - defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; -} - -multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> { - def rr32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>; - def rr64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>; - def ri32 : InstPTX<(outs), - (ins RC:$d, MEMri32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>; - def ri64 : InstPTX<(outs), - (ins RC:$d, MEMri64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>; - def ii32 : InstPTX<(outs), - (ins RC:$d, MEMii32:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>; - def ii64 : InstPTX<(outs), - (ins RC:$d, MEMii64:$a), - !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), - [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>; -} -multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { - defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; - defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; - defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; - defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; - defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; -} //===----------------------------------------------------------------------===// // Instructions @@ -585,118 +481,61 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { ///===- Integer Arithmetic Instructions -----------------------------------===// -defm ADD : INT3<"add", add>; -defm SUB : INT3<"sub", sub>; -defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies -defm DIV : INT3<"div", udiv>; -defm REM : INT3<"rem", urem>; +defm ADD : PTX_INT3<"add", add>; +defm SUB : PTX_INT3<"sub", sub>; +defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies +defm DIV : PTX_INT3<"div", udiv>; +defm SDIV : PTX_INT3_SIGNED<"div", sdiv>; +defm REM : PTX_INT3<"rem", urem>; ///===- Floating-Point Arithmetic Instructions ----------------------------===// -// Standard Unary Operations -defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; +// FNEG +defm FNEG : PTX_FLOAT_2OP<"neg">; // Standard Binary Operations -defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>; -defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>; -defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>; - -// For floating-point division: -// SM_13+ defaults to .rn for f32 and f64, -// SM10 must *not* provide a rounding - -// TODO: -// - Allow user selection of rounding modes for fdiv -// - Add support for -prec-div=false (.approx) - -def FDIVrr32SM13 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - "div.rn.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVri32SM13 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - "div.rn.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVrr32SM10 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, RegF32:$b), - "div.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>, - Requires<[FDivNoRoundingMode]>; -def FDIVri32SM10 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a, f32imm:$b), - "div.f32\t$d, $a, $b", - [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>, - Requires<[FDivNoRoundingMode]>; - -def FDIVrr64SM13 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - "div.rn.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVri64SM13 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - "div.rn.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[FDivNeedsRoundingMode]>; -def FDIVrr64SM10 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, RegF64:$b), - "div.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>, - Requires<[FDivNoRoundingMode]>; -def FDIVri64SM10 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a, f64imm:$b), - "div.f64\t$d, $a, $b", - [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>, - Requires<[FDivNoRoundingMode]>; - - +defm FADD : PTX_FLOAT_3OP<"add">; +defm FSUB : PTX_FLOAT_3OP<"sub">; +defm FMUL : PTX_FLOAT_3OP<"mul">; +defm FDIV : PTX_FLOAT_3OP<"div">; // Multi-operation hybrid instructions +defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>; -// The selection of mad/fma is tricky. In some cases, they are the *same* -// instruction, but in other cases we may prefer one or the other. Also, -// different PTX versions differ on whether rounding mode flags are required. -// In the short term, mad is supported on all PTX versions and we use a -// default rounding mode no matter what shader model or PTX version. -// TODO: Allow the rounding mode to be selectable through llc. -defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, - Requires<[FMadNeedsRoundingMode, SupportsFMA]>; -defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, - Requires<[FMadNoRoundingMode, SupportsFMA]>; ///===- Floating-Point Intrinsic Instructions -----------------------------===// -def FSQRT32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "sqrt.rn.f32\t$d, $a", - [(set RegF32:$d, (fsqrt RegF32:$a))]>; - -def FSQRT64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "sqrt.rn.f64\t$d, $a", - [(set RegF64:$d, (fsqrt RegF64:$a))]>; - -def FSIN32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "sin.approx.f32\t$d, $a", - [(set RegF32:$d, (fsin RegF32:$a))]>; +// SQRT +def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sqrt$r.f32\t$d, $a", []>; +def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sqrt$r.f64\t$d, $a", []>; +def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sqrt$r.f64\t$d, $a", []>; + +// SIN +def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "sin$r.f32\t$d, $a", []>; +def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "sin$r.f32\t$d, $a", []>; +def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "sin$r.f64\t$d, $a", []>; +def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "sin$r.f64\t$d, $a", []>; + +// COS +def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a), + "cos$r.f32\t$d, $a", []>; +def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a), + "cos$r.f64\t$d, $a", []>; +def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a), + "cos$r.f64\t$d, $a", []>; -def FSIN64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "sin.approx.f64\t$d, $a", - [(set RegF64:$d, (fsin RegF64:$a))]>; -def FCOS32 : InstPTX<(outs RegF32:$d), - (ins RegF32:$a), - "cos.approx.f32\t$d, $a", - [(set RegF32:$d, (fcos RegF32:$a))]>; - -def FCOS64 : InstPTX<(outs RegF64:$d), - (ins RegF64:$a), - "cos.approx.f64\t$d, $a", - [(set RegF64:$d, (fcos RegF64:$a))]>; ///===- Comparison and Selection Instructions -----------------------------===// @@ -744,35 +583,35 @@ defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">; // Compare f32 -defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">; -defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">; -defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">; -defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">; -defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">; -defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">; +defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">; +defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">; +defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">; +defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">; +defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">; // Compare f64 -defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">; -defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">; -defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">; -defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">; -defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">; -defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">; +defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">; +defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">; +defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">; +defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">; +defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">; +defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">; // .selp -defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">; -defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">; -defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">; -defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">; -defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">; +defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>; +defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>; +defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>; +defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>; +defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>; ///===- Logic and Shift Instructions --------------------------------------===// -defm SHL : INT3ntnc<"shl.b", PTXshl>; -defm SRL : INT3ntnc<"shr.u", PTXsrl>; -defm SRA : INT3ntnc<"shr.s", PTXsra>; +defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>; +defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>; +defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>; defm AND : PTX_LOGIC<"and", and>; defm OR : PTX_LOGIC<"or", or>; @@ -780,6 +619,24 @@ defm XOR : PTX_LOGIC<"xor", xor>; ///===- Data Movement and Conversion Instructions -------------------------===// +// any_extend +// Implement the anyext instruction in terms of the PTX cvt instructions. +//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>; +//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>; + +// bitconvert +// These instructions implement the bit-wise conversion between integer and +// floating-point types. +def MOVi32f32 + : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>; +def MOVf32i32 + : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>; +def MOVi64f64 + : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>; +def MOVf64i64 + : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>; + let neverHasSideEffects = 1 in { def MOVPREDrr : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>; @@ -825,278 +682,332 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; } -// Loads -defm LDg : PTX_LD_ALL<"ld.global", load_global>; -defm LDc : PTX_LD_ALL<"ld.const", load_constant>; -defm LDl : PTX_LD_ALL<"ld.local", load_local>; -defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; +// PTX cvt instructions +// Note all of these may actually be used, we just define all possible patterns +// here (that make sense). +// FIXME: Can we collapse this somehow into a multiclass def? + +// To i16 +def CVTu16u32 + : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>; +def CVTu16u64 + : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>; +def CVTu16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u16.f32\t$d, $a", []>; +def CVTs16f32 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s16.f32\t$d, $a", []>; +def CVTu16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u16.f64\t$d, $a", []>; +def CVTs16f64 + : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s16.f64\t$d, $a", []>; + +// To i32 +def CVTu32u16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>; +def CVTs32s16 + : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>; +def CVTu32u64 + : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>; +def CVTu32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u32.f32\t$d, $a", []>; +def CVTs32f32 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s32.f32\t$d, $a", []>; +def CVTu32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u32.f64\t$d, $a", []>; +def CVTs32f64 + : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s32.f64\t$d, $a", []>; + +// To i64 +def CVTu64u16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>; +def CVTs64s16 + : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>; +def CVTu64u32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>; +def CVTs64s32 + : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>; +def CVTu64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.u64.f32\t$d, $a", []>; +def CVTs64f32 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a), + "cvt$r.s64.f32\t$d, $a", []>; +def CVTu64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.u64.f64\t$d, $a", []>; +def CVTs64f64 + : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.s64.f64\t$d, $a", []>; + +// To f32 +def CVTf32u16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.u16\t$d, $a", []>; +def CVTf32s16 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f32.s16\t$d, $a", []>; +def CVTf32u32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.u32\t$d, $a", []>; +def CVTf32s32 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f32.s32\t$d, $a", []>; +def CVTf32u64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.u64\t$d, $a", []>; +def CVTf32s64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f32.s64\t$d, $a", []>; +def CVTf32f64 + : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a), + "cvt$r.f32.f64\t$d, $a", []>; + +// To f64 +def CVTf64u16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.u16\t$d, $a", []>; +def CVTf64s16 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a), + "cvt$r.f64.s16\t$d, $a", []>; +def CVTf64u32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.u32\t$d, $a", []>; +def CVTf64s32 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a), + "cvt$r.f64.s32\t$d, $a", []>; +def CVTf64u64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.u64\t$d, $a", []>; +def CVTf64s64 + : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a), + "cvt$r.f64.s64\t$d, $a", []>; +def CVTf64f32 + : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>; + + ///===- Control Flow Instructions -----------------------------------------===// -// These instructions are used to load/store from the .param space for -// device and kernel parameters +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def BRAd + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; +} -let hasSideEffects = 1 in { - def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a), - "ld.param.pred\t$d, [$a]", - [(set RegPred:$d, (PTXloadparam timm:$a))]>; - def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a), - "ld.param.u16\t$d, [$a]", - [(set RegI16:$d, (PTXloadparam timm:$a))]>; - def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a), - "ld.param.u32\t$d, [$a]", - [(set RegI32:$d, (PTXloadparam timm:$a))]>; - def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a), - "ld.param.u64\t$d, [$a]", - [(set RegI64:$d, (PTXloadparam timm:$a))]>; - def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a), - "ld.param.f32\t$d, [$a]", - [(set RegF32:$d, (PTXloadparam timm:$a))]>; - def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a), - "ld.param.f64\t$d, [$a]", - [(set RegF64:$d, (PTXloadparam timm:$a))]>; - - def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a), - "st.param.pred\t[$d], $a", - [(PTXstoreparam timm:$d, RegPred:$a)]>; - def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a), - "st.param.u16\t[$d], $a", - [(PTXstoreparam timm:$d, RegI16:$a)]>; - def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a), - "st.param.u32\t[$d], $a", - [(PTXstoreparam timm:$d, RegI32:$a)]>; - def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a), - "st.param.u64\t[$d], $a", - [(PTXstoreparam timm:$d, RegI64:$a)]>; - def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a), - "st.param.f32\t[$d], $a", - [(PTXstoreparam timm:$d, RegF32:$a)]>; - def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a), - "st.param.f64\t[$d], $a", - [(PTXstoreparam timm:$d, RegF64:$a)]>; +let isBranch = 1, isTerminator = 1 in { + // FIXME: The pattern part is blank because I cannot (or do not yet know + // how to) use the first operand of PredicateOperand (a RegPred register) here + def BRAdp + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", + [/*(brcond pred:$_p, bb:$d)*/]>; } -// Stores -defm STg : PTX_ST_ALL<"st.global", store_global>; -defm STl : PTX_ST_ALL<"st.local", store_local>; -defm STs : PTX_ST_ALL<"st.shared", store_shared>; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { + def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; + def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; +} -// defm STp : PTX_ST_ALL<"st.param", store_parameter>; -// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; -// TODO: Do something with st.param if/when it is needed. +let hasSideEffects = 1 in { + def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>; +} -// Conversion to pred -// PTX does not directly support converting to a predicate type, so we fake it -// by performing a greater-than test between the value and zero. This follows -// the C convention that any non-zero value is equivalent to 'true'. -def CVT_pred_u16 - : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI16:$a))]>; +///===- Parameter Passing Pseudo-Instructions -----------------------------===// + +def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b), + "mov.pred\t$a, %param$b", []>; +def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b), + "mov.b16\t$a, %param$b", []>; +def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b), + "mov.b32\t$a, %param$b", []>; +def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b), + "mov.b64\t$a, %param$b", []>; +def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b), + "mov.f32\t$a, %param$b", []>; +def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b), + "mov.f64\t$a, %param$b", []>; + +def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>; +def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>; +def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>; +def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>; +def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>; +def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>; -def CVT_pred_u32 - : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI32:$a))]>; -def CVT_pred_u64 - : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0", - [(set RegPred:$d, (trunc RegI64:$a))]>; +//===----------------------------------------------------------------------===// +// Instruction Selection Patterns +//===----------------------------------------------------------------------===// -def CVT_pred_f32 - : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0", - [(set RegPred:$d, (fp_to_uint RegF32:$a))]>; +// FADD +def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)), + (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)), + (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)), + (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)), + (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FSUB +def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)), + (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)), + (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)), + (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)), + (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL +def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)), + (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)), + (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)), + (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)), + (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FDIV +def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)), + (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>; +def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)), + (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)), + (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>; +def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)), + (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>; + +// FMUL+FADD +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)), + (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)), + (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>; +def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)), + (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)), + (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>; +def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)), + (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>; + +// FNEG +def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>; + +// FSQRT +def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>; + +// FSIN +def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>; + +// FCOS +def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>; +def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>; +def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>; +def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>; + +// Type conversion notes: +// - PTX does not directly support converting a predicate to a value, so we +// use a select instruction to select either 0 or 1 (integer or fp) based +// on the truth value of the predicate. +// - PTX does not directly support converting to a predicate type, so we fake it +// by performing a greater-than test between the value and zero. This follows +// the C convention that any non-zero value is equivalent to 'true'. -def CVT_pred_f64 - : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0", - [(set RegPred:$d, (fp_to_uint RegF64:$a))]>; +// Conversion to pred +def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>; +def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>; +def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>; +def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>; +def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>; // Conversion to u16 -// PTX does not directly support converting a predicate to a value, so we -// use a select instruction to select either 0 or 1 (integer or fp) based -// on the truth value of the predicate. -def CVT_u16_preda - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (anyext RegPred:$a))]>; - -def CVT_u16_pred - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (zext RegPred:$a))]>; - -def CVT_u16_preds - : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a", - [(set RegI16:$d, (sext RegPred:$a))]>; - -def CVT_u16_u32 - : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", - [(set RegI16:$d, (trunc RegI32:$a))]>; - -def CVT_u16_u64 - : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", - [(set RegI16:$d, (trunc RegI64:$a))]>; - -def CVT_u16_f32 - : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a", - [(set RegI16:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u16_f64 - : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a", - [(set RegI16:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>; +def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>; +def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>; +def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>; +def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>; +def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>; +def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>; // Conversion to u32 - -def CVT_u32_pred - : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", - [(set RegI32:$d, (zext RegPred:$a))]>; - -def CVT_u32_b16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", - [(set RegI32:$d, (anyext RegI16:$a))]>; - -def CVT_u32_u16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", - [(set RegI32:$d, (zext RegI16:$a))]>; - -def CVT_u32_preds - : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a", - [(set RegI32:$d, (sext RegPred:$a))]>; - -def CVT_u32_s16 - : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a", - [(set RegI32:$d, (sext RegI16:$a))]>; - -def CVT_u32_u64 - : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", - [(set RegI32:$d, (trunc RegI64:$a))]>; - -def CVT_u32_f32 - : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a", - [(set RegI32:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u32_f64 - : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a", - [(set RegI32:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>; +def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>; +def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>; +def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>; +def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>; +def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>; +def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>; +def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>; // Conversion to u64 - -def CVT_u64_pred - : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", - [(set RegI64:$d, (zext RegPred:$a))]>; - -def CVT_u64_preds - : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a", - [(set RegI64:$d, (sext RegPred:$a))]>; - -def CVT_u64_u16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", - [(set RegI64:$d, (zext RegI16:$a))]>; - -def CVT_u64_s16 - : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a", - [(set RegI64:$d, (sext RegI16:$a))]>; - -def CVT_u64_u32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", - [(set RegI64:$d, (zext RegI32:$a))]>; - -def CVT_u64_s32 - : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a", - [(set RegI64:$d, (sext RegI32:$a))]>; - -def CVT_u64_f32 - : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a", - [(set RegI64:$d, (fp_to_uint RegF32:$a))]>; - -def CVT_u64_f64 - : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a", - [(set RegI64:$d, (fp_to_uint RegF64:$a))]>; +def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a, + 0xFFFFFFFFFFFFFFFF, 0)>; +def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>; +def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>; +def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>; +def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>; +def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>; +def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>; +def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>; +def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>; // Conversion to f32 - -def CVT_f32_pred - : InstPTX<(outs RegF32:$d), (ins RegPred:$a), - "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0 - [(set RegF32:$d, (uint_to_fp RegPred:$a))]>; - -def CVT_f32_u16 - : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI16:$a))]>; - -def CVT_f32_u32 - : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI32:$a))]>; - -def CVT_f32_u64 - : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a", - [(set RegF32:$d, (uint_to_fp RegI64:$a))]>; - -def CVT_f32_f64 - : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a", - [(set RegF32:$d, (fround RegF64:$a))]>; +def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a, + (MOVf32i32 0x3F800000), (MOVf32i32 0))>; +def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>; +def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>; +def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>; +def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>; +def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>; // Conversion to f64 +def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a, + (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>; +def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>; +def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>; +def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>; +def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>; +def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>; -def CVT_f64_pred - : InstPTX<(outs RegF64:$d), (ins RegPred:$a), - "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0 - [(set RegF64:$d, (uint_to_fp RegPred:$a))]>; - -def CVT_f64_u16 - : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI16:$a))]>; - -def CVT_f64_u32 - : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI32:$a))]>; - -def CVT_f64_u64 - : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a", - [(set RegF64:$d, (uint_to_fp RegI64:$a))]>; - -def CVT_f64_f32 - : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", - [(set RegF64:$d, (fextend RegF32:$a))]>; - -///===- Control Flow Instructions -----------------------------------------===// - -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { - def BRAd - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; -} - -let isBranch = 1, isTerminator = 1 in { - // FIXME: The pattern part is blank because I cannot (or do not yet know - // how to) use the first operand of PredicateOperand (a RegPred register) here - def BRAdp - : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", - [/*(brcond pred:$_p, bb:$d)*/]>; -} - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; - def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; -} - -///===- Spill Instructions ------------------------------------------------===// -// Special instructions used for stack spilling -def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a), - "mov.u16\ts$d, $a", []>; -def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a), - "mov.u32\ts$d, $a", []>; -def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a), - "mov.u64\ts$d, $a", []>; -def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a), - "mov.f32\ts$d, $a", []>; -def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a), - "mov.f64\ts$d, $a", []>; - -def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a), - "mov.u16\t$d, s$a", []>; -def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a), - "mov.u32\t$d, s$a", []>; -def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a), - "mov.u64\t$d, s$a", []>; -def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a), - "mov.f32\t$d, s$a", []>; -def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a), - "mov.f64\t$d, s$a", []>; ///===- Intrinsic Instructions --------------------------------------------===// - include "PTXIntrinsicInstrInfo.td" + +///===- Load/Store Instructions -------------------------------------------===// +include "PTXInstrLoadStore.td" + diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td new file mode 100644 index 0000000..9b4f56c --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td @@ -0,0 +1,278 @@ +//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the PTX load/store instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + + +// Addressing Predicates +// We have to differentiate between 32- and 64-bit pointer types +def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; +def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; + +//===----------------------------------------------------------------------===// +// Pattern Fragments for Loads/Stores +//===----------------------------------------------------------------------===// + +def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Constant; + return false; +}]>; + +def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<LoadSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +def store_global + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Global; + return false; +}]>; + +def store_shared + : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{ + const Value *Src; + const PointerType *PT; + if ((Src = cast<StoreSDNode>(N)->getSrcValue()) && + (PT = dyn_cast<PointerType>(Src->getType()))) + return PT->getAddressSpace() == PTXStateSpace::Shared; + return false; +}]>; + +// Addressing modes. +def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>; +def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>; +def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>; +def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>; +def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>; +def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>; +def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>; +def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>; + +// Address operands +def MEMri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI32, i32imm); +} +def MEMri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RegI64, i64imm); +} +def LOCALri32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def LOCALri64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +def MEMii32 : Operand<i32> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i32imm, i32imm); +} +def MEMii64 : Operand<i64> { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +// The operand here does not correspond to an actual address, so we +// can use i32 in 64-bit address modes. +def MEMpi : Operand<i32> { + let PrintMethod = "printParamOperand"; + let MIOperandInfo = (ops i32imm); +} +def MEMret : Operand<i32> { + let PrintMethod = "printReturnOperand"; + let MIOperandInfo = (ops i32imm); +} + + +// Load/store .param space +def PTXloadparam + : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXstoreparam + : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + +def PTXreadparam + : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; +def PTXwriteparam + : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>; + + + +//===----------------------------------------------------------------------===// +// Classes for loads/stores +//===----------------------------------------------------------------------===// +multiclass PTX_LD<string opstr, string typestr, + RegisterClass RC, PatFrag pat_load> { + def rr32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr32:$a))]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr64:$a))]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri32:$a))]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs RC:$d), + (ins MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii32:$a))]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs RC:$d), + (ins MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii64:$a))]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, + PatFrag pat_store> { + def rr32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr32:$a)]>, + Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr64:$a)]>, + Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri32:$a)]>, + Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, + Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs), + (ins RC:$d, MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii32:$a)]>, + Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs), + (ins RC:$d, MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii64:$a)]>, + Requires<[Use64BitAddresses]>; +} + +multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> { + def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal32:$a))]>; + def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a), + !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (load_global ADDRlocal64:$a))]>; + def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal32:$a)]>; + def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a), + !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")), + [(store_global RC:$d, ADDRlocal64:$a)]>; +} + +multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> { + let hasSideEffects = 1 in { + def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a), + !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (PTXloadparam texternalsym:$a))]>; + def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a), + !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")), + [(PTXstoreparam texternalsym:$d, RC:$a)]>; + } +} + +multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> { + defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>; + defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>; + defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>; + defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>; + defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>; +} + +multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> { + defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>; + defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>; + defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>; + defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>; + defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>; +} + + + +//===----------------------------------------------------------------------===// +// Instruction definitions for loads/stores +//===----------------------------------------------------------------------===// + +// Global/shared stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// Global/shared/constant loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; + +// Param loads/stores +defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>; +defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>; +defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>; +defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>; +defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>; +defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>; + +// Local loads/stores +defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>; +defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>; +defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>; +defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>; +defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>; +defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td index 8d97909..9de1cb6 100644 --- a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -25,37 +25,63 @@ class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop> // TODO Add read vector-version of special registers -//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; -def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>; -def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>; -def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>; -def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>; +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", +// int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", + int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", + int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", + int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", + int_ptx_read_tid_w>; -//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; -def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>; -def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>; -def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>; -def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>; +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", +// int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", + int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", + int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", + int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", + int_ptx_read_ntid_w>; -def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>; -def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>; -def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>; +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", + int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", + int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", + int_ptx_read_nwarpid>; -//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; -def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>; -def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>; -def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>; -def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>; +//def PTX_READ_CTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", + int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", + int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", + int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", + int_ptx_read_ctaid_w>; -//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; -def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>; -def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>; -def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>; -def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>; +//def PTX_READ_NCTAID_R64 : +//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", + int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", + int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", + int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", + int_ptx_read_nctaid_w>; -def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>; -def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>; -def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>; +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", + int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", + int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", + int_ptx_read_gridid>; def PTX_READ_LANEMASK_EQ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp index b13a3da..468ce93 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -100,7 +100,7 @@ public: /// @{ virtual void ChangeSection(const MCSection *Section); - virtual void InitSections() {} + virtual void InitSections() { /* PTX does not use sections */ } virtual void EmitLabel(MCSymbol *Symbol); @@ -132,7 +132,9 @@ public: /// /// @param Symbol - The common symbol to emit. /// @param Size - The size of the common symbol. - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + /// @param ByteAlignment - The alignment of the common symbol in bytes. + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, unsigned Size = 0, unsigned ByteAlignment = 0); @@ -233,7 +235,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - //assert(getCurrentSection() && "Cannot emit before setting section!"); + assert(getCurrentSection() && "Cannot emit before setting section!"); OS << *Symbol << MAI.getLabelSuffix(); EmitEOL(); @@ -283,7 +285,8 @@ void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} -void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {} +void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, unsigned Size, unsigned ByteAlignment) {} @@ -510,7 +513,7 @@ void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) { // If we have an AsmPrinter, use that to print, otherwise print the MCInst. if (InstPrinter) - InstPrinter->printInst(&Inst, OS); + InstPrinter->printInst(&Inst, OS, ""); else Inst.print(OS, &MAI); EmitEOL(); @@ -533,7 +536,7 @@ namespace llvm { formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *IP, - MCCodeEmitter *CE, TargetAsmBackend *TAB, + MCCodeEmitter *CE, MCAsmBackend *MAB, bool ShowInst) { return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc, IP, CE, ShowInst); diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp new file mode 100644 index 0000000..142e639 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp @@ -0,0 +1,32 @@ +//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower PTX MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXAsmPrinter.h" +#include "llvm/Constants.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Target/Mangler.h" + +void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + PTXAsmPrinter &AP) { + OutMI.setOpcode(MI->getOpcode()); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + MCOperand MCOp; + OutMI.addOperand(AP.lowerOperand(MO)); + } +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp index 6fe9e6c..b33a273 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -52,36 +52,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n"); - - DEBUG(dbgs() - << "PTX::NoRegister == " << PTX::NoRegister << "\n" - << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n"); - - DEBUG(for (unsigned reg = PTX::NoRegister + 1; - reg < PTX::NUM_TARGET_REGS; ++reg) - if (MRI.isPhysRegUsed(reg)) - dbgs() << "Used Reg: " << reg << "\n";); - - // FIXME: This is a slow linear scanning - for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg) - if (MRI.isPhysRegUsed(reg) && - !MFI->isRetReg(reg) && - (MFI->isKernel() || !MFI->isArgReg(reg))) - MFI->addLocalVarReg(reg); - - // Notify MachineFunctionInfo that I've done adding local var reg - MFI->doneAddLocalVar(); - - DEBUG(for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(); - i != e; ++i) - dbgs() << "Arg Reg: " << *i << "\n";); - - DEBUG(for (PTXMachineFunctionInfo::reg_iterator - i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); - i != e; ++i) - dbgs() << "Local Var Reg: " << *i << "\n";); + // Generate list of all virtual registers used in this function + for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + const TargetRegisterClass *TRC = MRI.getRegClass(Reg); + MFI->addVirtualRegister(TRC, Reg); + } return false; } diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h index 9d65f5b..3b985f7 100644 --- a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -15,75 +15,148 @@ #define PTX_MACHINE_FUNCTION_INFO_H #include "PTX.h" +#include "PTXParamManager.h" +#include "PTXRegisterInfo.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { + /// PTXMachineFunctionInfo - This class is derived from MachineFunction and /// contains private PTX target-specific information for each MachineFunction. /// class PTXMachineFunctionInfo : public MachineFunctionInfo { private: - bool is_kernel; - std::vector<unsigned> reg_arg, reg_local_var; - std::vector<unsigned> reg_ret; - bool _isDoneAddArg; + bool IsKernel; + DenseSet<unsigned> RegArgs; + DenseSet<unsigned> RegRets; + + typedef std::vector<unsigned> RegisterList; + typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap; + typedef DenseMap<unsigned, std::string> RegisterNameMap; + typedef DenseMap<int, std::string> FrameMap; + + RegisterMap UsedRegs; + RegisterNameMap RegNames; + FrameMap FrameSymbols; + + PTXParamManager ParamManager; public: + typedef DenseSet<unsigned>::const_iterator reg_iterator; + PTXMachineFunctionInfo(MachineFunction &MF) - : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) { - reg_arg.reserve(8); - reg_local_var.reserve(32); + : IsKernel(false) { + UsedRegs[PTX::RegPredRegisterClass] = RegisterList(); + UsedRegs[PTX::RegI16RegisterClass] = RegisterList(); + UsedRegs[PTX::RegI32RegisterClass] = RegisterList(); + UsedRegs[PTX::RegI64RegisterClass] = RegisterList(); + UsedRegs[PTX::RegF32RegisterClass] = RegisterList(); + UsedRegs[PTX::RegF64RegisterClass] = RegisterList(); } - void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; } - - void addArgReg(unsigned reg) { reg_arg.push_back(reg); } - void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); } - void addRetReg(unsigned reg) { - if (!isRetReg(reg)) { - reg_ret.push_back(reg); + /// getParamManager - Returns the PTXParamManager instance for this function. + PTXParamManager& getParamManager() { return ParamManager; } + const PTXParamManager& getParamManager() const { return ParamManager; } + + /// setKernel/isKernel - Gets/sets a flag that indicates if this function is + /// a PTX kernel function. + void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; } + bool isKernel() const { return IsKernel; } + + /// argreg_begin/argreg_end - Returns iterators to the set of registers + /// containing function arguments. + reg_iterator argreg_begin() const { return RegArgs.begin(); } + reg_iterator argreg_end() const { return RegArgs.end(); } + + /// retreg_begin/retreg_end - Returns iterators to the set of registers + /// containing the function return values. + reg_iterator retreg_begin() const { return RegRets.begin(); } + reg_iterator retreg_end() const { return RegRets.end(); } + + /// addRetReg - Adds a register to the set of return-value registers. + void addRetReg(unsigned Reg) { + if (!RegRets.count(Reg)) { + RegRets.insert(Reg); + std::string name; + name = "%ret"; + name += utostr(RegRets.size() - 1); + RegNames[Reg] = name; } } - void doneAddArg(void) { - _isDoneAddArg = true; + /// addArgReg - Adds a register to the set of function argument registers. + void addArgReg(unsigned Reg) { + RegArgs.insert(Reg); + std::string name; + name = "%param"; + name += utostr(RegArgs.size() - 1); + RegNames[Reg] = name; } - void doneAddLocalVar(void) {} - - bool isKernel() const { return is_kernel; } - typedef std::vector<unsigned>::const_iterator reg_iterator; - typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator; - typedef std::vector<unsigned>::const_iterator ret_iterator; - - bool argRegEmpty() const { return reg_arg.empty(); } - int getNumArg() const { return reg_arg.size(); } - reg_iterator argRegBegin() const { return reg_arg.begin(); } - reg_iterator argRegEnd() const { return reg_arg.end(); } - reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); } - reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); } - - bool localVarRegEmpty() const { return reg_local_var.empty(); } - reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } - reg_iterator localVarRegEnd() const { return reg_local_var.end(); } - - bool retRegEmpty() const { return reg_ret.empty(); } - int getNumRet() const { return reg_ret.size(); } - ret_iterator retRegBegin() const { return reg_ret.begin(); } - ret_iterator retRegEnd() const { return reg_ret.end(); } + /// addVirtualRegister - Adds a virtual register to the set of all used + /// registers in the function. + void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) { + std::string name; + + // Do not count registers that are argument/return registers. + if (!RegRets.count(Reg) && !RegArgs.count(Reg)) { + UsedRegs[TRC].push_back(Reg); + if (TRC == PTX::RegPredRegisterClass) + name = "%p"; + else if (TRC == PTX::RegI16RegisterClass) + name = "%rh"; + else if (TRC == PTX::RegI32RegisterClass) + name = "%r"; + else if (TRC == PTX::RegI64RegisterClass) + name = "%rd"; + else if (TRC == PTX::RegF32RegisterClass) + name = "%f"; + else if (TRC == PTX::RegF64RegisterClass) + name = "%fd"; + else + llvm_unreachable("Invalid register class"); + + name += utostr(UsedRegs[TRC].size() - 1); + RegNames[Reg] = name; + } + } - bool isArgReg(unsigned reg) const { - return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); + /// getRegisterName - Returns the name of the specified virtual register. This + /// name is used during PTX emission. + const char *getRegisterName(unsigned Reg) const { + if (RegNames.count(Reg)) + return RegNames.find(Reg)->second.c_str(); + else if (Reg == PTX::NoRegister) + return "%noreg"; + else + llvm_unreachable("Register not in register name map"); } - bool isRetReg(unsigned reg) const { - return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end(); + /// getNumRegistersForClass - Returns the number of virtual registers that are + /// used for the specified register class. + unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const { + return UsedRegs.lookup(TRC).size(); } - bool isLocalVarReg(unsigned reg) const { - return std::find(reg_local_var.begin(), reg_local_var.end(), reg) - != reg_local_var.end(); + /// getFrameSymbol - Returns the symbol name for the given FrameIndex. + const char* getFrameSymbol(int FrameIndex) { + if (FrameSymbols.count(FrameIndex)) { + return FrameSymbols.lookup(FrameIndex).c_str(); + } else { + std::string Name = "__local"; + Name += utostr(FrameIndex); + // The whole point of caching this name is to ensure the pointer we pass + // to any getExternalSymbol() calls will remain valid for the lifetime of + // the back-end instance. This is to work around an issue in SelectionDAG + // where symbol names are expected to be life-long strings. + FrameSymbols[FrameIndex] = Name; + return FrameSymbols[FrameIndex].c_str(); + } } }; // class PTXMachineFunctionInfo } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp new file mode 100644 index 0000000..7753787 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp @@ -0,0 +1,73 @@ +//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXParamManager class. +// +//===----------------------------------------------------------------------===// + +#include "PTX.h" +#include "PTXParamManager.h" +#include "llvm/ADT/StringExtras.h" + +using namespace llvm; + +PTXParamManager::PTXParamManager() { +} + +unsigned PTXParamManager::addArgumentParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_ARGUMENT; + Param.Size = Size; + + std::string Name; + Name = "__param_"; + Name += utostr(ArgumentParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ArgumentParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addReturnParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_RETURN; + Param.Size = Size; + + std::string Name; + Name = "__ret_"; + Name += utostr(ReturnParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ReturnParams.push_back(Index); + + return Index; +} + +unsigned PTXParamManager::addLocalParam(unsigned Size) { + PTXParam Param; + Param.Type = PTX_PARAM_TYPE_LOCAL; + Param.Size = Size; + + std::string Name; + Name = "__localparam_"; + Name += utostr(LocalParams.size()+1); + Param.Name = Name; + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + LocalParams.push_back(Index); + + return Index; +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h new file mode 100644 index 0000000..9fd2de5 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h @@ -0,0 +1,86 @@ +//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTXParamManager class, which manages all defined .param +// variables for a particular function. +// +//===----------------------------------------------------------------------===// + +#ifndef PTX_PARAM_MANAGER_H +#define PTX_PARAM_MANAGER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +/// PTXParamManager - This class manages all .param variables defined for a +/// particular function. +class PTXParamManager { +private: + + /// PTXParamType - Type of a .param variable + enum PTXParamType { + PTX_PARAM_TYPE_ARGUMENT, + PTX_PARAM_TYPE_RETURN, + PTX_PARAM_TYPE_LOCAL + }; + + /// PTXParam - Definition of a PTX .param variable + struct PTXParam { + PTXParamType Type; + unsigned Size; + std::string Name; + }; + + DenseMap<unsigned, PTXParam> AllParams; + SmallVector<unsigned, 4> ArgumentParams; + SmallVector<unsigned, 4> ReturnParams; + SmallVector<unsigned, 4> LocalParams; + +public: + + typedef SmallVector<unsigned, 4>::const_iterator param_iterator; + + PTXParamManager(); + + param_iterator arg_begin() const { return ArgumentParams.begin(); } + param_iterator arg_end() const { return ArgumentParams.end(); } + param_iterator ret_begin() const { return ReturnParams.begin(); } + param_iterator ret_end() const { return ReturnParams.end(); } + param_iterator local_begin() const { return LocalParams.begin(); } + param_iterator local_end() const { return LocalParams.end(); } + + /// addArgumentParam - Returns a new .param used as an argument. + unsigned addArgumentParam(unsigned Size); + + /// addReturnParam - Returns a new .param used as a return argument. + unsigned addReturnParam(unsigned Size); + + /// addLocalParam - Returns a new .param used as a local .param variable. + unsigned addLocalParam(unsigned Size); + + /// getParamName - Returns the name of the parameter as a string. + const std::string &getParamName(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Name; + } + + /// getParamSize - Returns the size of the parameter in bits. + unsigned getParamSize(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Size; + } + +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp new file mode 100644 index 0000000..2d2d5c3 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp @@ -0,0 +1,58 @@ +//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a register allocator for PTX code. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-reg-alloc" + +#include "PTX.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegAllocRegistry.h" + +using namespace llvm; + +namespace { + // Special register allocator for PTX. + class PTXRegAlloc : public MachineFunctionPass { + public: + static char ID; + PTXRegAlloc() : MachineFunctionPass(ID) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + virtual const char* getPassName() const { + return "PTX Register Allocator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(PHIEliminationID); + AU.addRequiredID(TwoAddressInstructionPassID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + // We do not actually do anything (at least not yet). + return false; + } + }; + + char PTXRegAlloc::ID = 0; + + static RegisterRegAlloc + ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator); +} + +FunctionPass *llvm::createPTXRegisterAllocator() { + return new PTXRegAlloc(); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp index cb56ea9..c806266 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp @@ -14,6 +14,9 @@ #include "PTX.h" #include "PTXRegisterInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -23,15 +26,23 @@ using namespace llvm; PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII) - : PTXGenRegisterInfo() { + const TargetInstrInfo &tii) + // PTX does not have a return address register. + : PTXGenRegisterInfo(0), TII(tii) { } void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, RegScavenger *RS) const { unsigned Index; - MachineInstr& MI = *II; + MachineInstr &MI = *II; + //MachineBasicBlock &MBB = *MI.getParent(); + //DebugLoc dl = MI.getDebugLoc(); + //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + + //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass); + + llvm_unreachable("FrameIndex should have been previously eliminated!"); Index = 0; while (!MI.getOperand(Index).isFI()) { @@ -46,6 +57,18 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n"); DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n"); + //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32)) + //.addReg(Reg, RegState::Define).addImm(FrameIndex); + //if (MI2->findFirstPredOperandIdx() == -1) { + // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + //} + //MI2->dump(); + + //MachineOperand ESOp = MachineOperand::CreateES("__local__"); + // This frame index is post stack slot re-use assignments + //MI.getOperand(Index).ChangeToRegister(Reg, false); MI.getOperand(Index).ChangeToImmediate(FrameIndex); + //MI.getOperand(Index) = ESOp; } diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h index 0b63cb6..55fafe4 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h @@ -25,8 +25,12 @@ class PTXTargetMachine; class MachineFunction; struct PTXRegisterInfo : public PTXGenRegisterInfo { +private: + const TargetInstrInfo &TII; + +public: PTXRegisterInfo(PTXTargetMachine &TM, - const TargetInstrInfo &TII); + const TargetInstrInfo &tii); virtual const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const { @@ -47,18 +51,6 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo { llvm_unreachable("PTX does not have a frame register"); return 0; } - - virtual unsigned getRARegister() const { - llvm_unreachable("PTX does not have a return address register"); - return 0; - } - - virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const { - return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); - } - virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const { - return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0); - } }; // struct PTXRegisterInfo } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td index 1313d24..6ed6d3f 100644 --- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td +++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td @@ -20,536 +20,18 @@ class PTXReg<string n> : Register<n> { // Registers //===----------------------------------------------------------------------===// -///===- Predicate Registers -----------------------------------------------===// - -def P0 : PTXReg<"p0">; -def P1 : PTXReg<"p1">; -def P2 : PTXReg<"p2">; -def P3 : PTXReg<"p3">; -def P4 : PTXReg<"p4">; -def P5 : PTXReg<"p5">; -def P6 : PTXReg<"p6">; -def P7 : PTXReg<"p7">; -def P8 : PTXReg<"p8">; -def P9 : PTXReg<"p9">; -def P10 : PTXReg<"p10">; -def P11 : PTXReg<"p11">; -def P12 : PTXReg<"p12">; -def P13 : PTXReg<"p13">; -def P14 : PTXReg<"p14">; -def P15 : PTXReg<"p15">; -def P16 : PTXReg<"p16">; -def P17 : PTXReg<"p17">; -def P18 : PTXReg<"p18">; -def P19 : PTXReg<"p19">; -def P20 : PTXReg<"p20">; -def P21 : PTXReg<"p21">; -def P22 : PTXReg<"p22">; -def P23 : PTXReg<"p23">; -def P24 : PTXReg<"p24">; -def P25 : PTXReg<"p25">; -def P26 : PTXReg<"p26">; -def P27 : PTXReg<"p27">; -def P28 : PTXReg<"p28">; -def P29 : PTXReg<"p29">; -def P30 : PTXReg<"p30">; -def P31 : PTXReg<"p31">; -def P32 : PTXReg<"p32">; -def P33 : PTXReg<"p33">; -def P34 : PTXReg<"p34">; -def P35 : PTXReg<"p35">; -def P36 : PTXReg<"p36">; -def P37 : PTXReg<"p37">; -def P38 : PTXReg<"p38">; -def P39 : PTXReg<"p39">; -def P40 : PTXReg<"p40">; -def P41 : PTXReg<"p41">; -def P42 : PTXReg<"p42">; -def P43 : PTXReg<"p43">; -def P44 : PTXReg<"p44">; -def P45 : PTXReg<"p45">; -def P46 : PTXReg<"p46">; -def P47 : PTXReg<"p47">; -def P48 : PTXReg<"p48">; -def P49 : PTXReg<"p49">; -def P50 : PTXReg<"p50">; -def P51 : PTXReg<"p51">; -def P52 : PTXReg<"p52">; -def P53 : PTXReg<"p53">; -def P54 : PTXReg<"p54">; -def P55 : PTXReg<"p55">; -def P56 : PTXReg<"p56">; -def P57 : PTXReg<"p57">; -def P58 : PTXReg<"p58">; -def P59 : PTXReg<"p59">; -def P60 : PTXReg<"p60">; -def P61 : PTXReg<"p61">; -def P62 : PTXReg<"p62">; -def P63 : PTXReg<"p63">; -def P64 : PTXReg<"p64">; -def P65 : PTXReg<"p65">; -def P66 : PTXReg<"p66">; -def P67 : PTXReg<"p67">; -def P68 : PTXReg<"p68">; -def P69 : PTXReg<"p69">; -def P70 : PTXReg<"p70">; -def P71 : PTXReg<"p71">; -def P72 : PTXReg<"p72">; -def P73 : PTXReg<"p73">; -def P74 : PTXReg<"p74">; -def P75 : PTXReg<"p75">; -def P76 : PTXReg<"p76">; -def P77 : PTXReg<"p77">; -def P78 : PTXReg<"p78">; -def P79 : PTXReg<"p79">; -def P80 : PTXReg<"p80">; -def P81 : PTXReg<"p81">; -def P82 : PTXReg<"p82">; -def P83 : PTXReg<"p83">; -def P84 : PTXReg<"p84">; -def P85 : PTXReg<"p85">; -def P86 : PTXReg<"p86">; -def P87 : PTXReg<"p87">; -def P88 : PTXReg<"p88">; -def P89 : PTXReg<"p89">; -def P90 : PTXReg<"p90">; -def P91 : PTXReg<"p91">; -def P92 : PTXReg<"p92">; -def P93 : PTXReg<"p93">; -def P94 : PTXReg<"p94">; -def P95 : PTXReg<"p95">; -def P96 : PTXReg<"p96">; -def P97 : PTXReg<"p97">; -def P98 : PTXReg<"p98">; -def P99 : PTXReg<"p99">; -def P100 : PTXReg<"p100">; -def P101 : PTXReg<"p101">; -def P102 : PTXReg<"p102">; -def P103 : PTXReg<"p103">; -def P104 : PTXReg<"p104">; -def P105 : PTXReg<"p105">; -def P106 : PTXReg<"p106">; -def P107 : PTXReg<"p107">; -def P108 : PTXReg<"p108">; -def P109 : PTXReg<"p109">; -def P110 : PTXReg<"p110">; -def P111 : PTXReg<"p111">; -def P112 : PTXReg<"p112">; -def P113 : PTXReg<"p113">; -def P114 : PTXReg<"p114">; -def P115 : PTXReg<"p115">; -def P116 : PTXReg<"p116">; -def P117 : PTXReg<"p117">; -def P118 : PTXReg<"p118">; -def P119 : PTXReg<"p119">; -def P120 : PTXReg<"p120">; -def P121 : PTXReg<"p121">; -def P122 : PTXReg<"p122">; -def P123 : PTXReg<"p123">; -def P124 : PTXReg<"p124">; -def P125 : PTXReg<"p125">; -def P126 : PTXReg<"p126">; -def P127 : PTXReg<"p127">; - -///===- 16-Bit Registers --------------------------------------------------===// - -def RH0 : PTXReg<"rh0">; -def RH1 : PTXReg<"rh1">; -def RH2 : PTXReg<"rh2">; -def RH3 : PTXReg<"rh3">; -def RH4 : PTXReg<"rh4">; -def RH5 : PTXReg<"rh5">; -def RH6 : PTXReg<"rh6">; -def RH7 : PTXReg<"rh7">; -def RH8 : PTXReg<"rh8">; -def RH9 : PTXReg<"rh9">; -def RH10 : PTXReg<"rh10">; -def RH11 : PTXReg<"rh11">; -def RH12 : PTXReg<"rh12">; -def RH13 : PTXReg<"rh13">; -def RH14 : PTXReg<"rh14">; -def RH15 : PTXReg<"rh15">; -def RH16 : PTXReg<"rh16">; -def RH17 : PTXReg<"rh17">; -def RH18 : PTXReg<"rh18">; -def RH19 : PTXReg<"rh19">; -def RH20 : PTXReg<"rh20">; -def RH21 : PTXReg<"rh21">; -def RH22 : PTXReg<"rh22">; -def RH23 : PTXReg<"rh23">; -def RH24 : PTXReg<"rh24">; -def RH25 : PTXReg<"rh25">; -def RH26 : PTXReg<"rh26">; -def RH27 : PTXReg<"rh27">; -def RH28 : PTXReg<"rh28">; -def RH29 : PTXReg<"rh29">; -def RH30 : PTXReg<"rh30">; -def RH31 : PTXReg<"rh31">; -def RH32 : PTXReg<"rh32">; -def RH33 : PTXReg<"rh33">; -def RH34 : PTXReg<"rh34">; -def RH35 : PTXReg<"rh35">; -def RH36 : PTXReg<"rh36">; -def RH37 : PTXReg<"rh37">; -def RH38 : PTXReg<"rh38">; -def RH39 : PTXReg<"rh39">; -def RH40 : PTXReg<"rh40">; -def RH41 : PTXReg<"rh41">; -def RH42 : PTXReg<"rh42">; -def RH43 : PTXReg<"rh43">; -def RH44 : PTXReg<"rh44">; -def RH45 : PTXReg<"rh45">; -def RH46 : PTXReg<"rh46">; -def RH47 : PTXReg<"rh47">; -def RH48 : PTXReg<"rh48">; -def RH49 : PTXReg<"rh49">; -def RH50 : PTXReg<"rh50">; -def RH51 : PTXReg<"rh51">; -def RH52 : PTXReg<"rh52">; -def RH53 : PTXReg<"rh53">; -def RH54 : PTXReg<"rh54">; -def RH55 : PTXReg<"rh55">; -def RH56 : PTXReg<"rh56">; -def RH57 : PTXReg<"rh57">; -def RH58 : PTXReg<"rh58">; -def RH59 : PTXReg<"rh59">; -def RH60 : PTXReg<"rh60">; -def RH61 : PTXReg<"rh61">; -def RH62 : PTXReg<"rh62">; -def RH63 : PTXReg<"rh63">; -def RH64 : PTXReg<"rh64">; -def RH65 : PTXReg<"rh65">; -def RH66 : PTXReg<"rh66">; -def RH67 : PTXReg<"rh67">; -def RH68 : PTXReg<"rh68">; -def RH69 : PTXReg<"rh69">; -def RH70 : PTXReg<"rh70">; -def RH71 : PTXReg<"rh71">; -def RH72 : PTXReg<"rh72">; -def RH73 : PTXReg<"rh73">; -def RH74 : PTXReg<"rh74">; -def RH75 : PTXReg<"rh75">; -def RH76 : PTXReg<"rh76">; -def RH77 : PTXReg<"rh77">; -def RH78 : PTXReg<"rh78">; -def RH79 : PTXReg<"rh79">; -def RH80 : PTXReg<"rh80">; -def RH81 : PTXReg<"rh81">; -def RH82 : PTXReg<"rh82">; -def RH83 : PTXReg<"rh83">; -def RH84 : PTXReg<"rh84">; -def RH85 : PTXReg<"rh85">; -def RH86 : PTXReg<"rh86">; -def RH87 : PTXReg<"rh87">; -def RH88 : PTXReg<"rh88">; -def RH89 : PTXReg<"rh89">; -def RH90 : PTXReg<"rh90">; -def RH91 : PTXReg<"rh91">; -def RH92 : PTXReg<"rh92">; -def RH93 : PTXReg<"rh93">; -def RH94 : PTXReg<"rh94">; -def RH95 : PTXReg<"rh95">; -def RH96 : PTXReg<"rh96">; -def RH97 : PTXReg<"rh97">; -def RH98 : PTXReg<"rh98">; -def RH99 : PTXReg<"rh99">; -def RH100 : PTXReg<"rh100">; -def RH101 : PTXReg<"rh101">; -def RH102 : PTXReg<"rh102">; -def RH103 : PTXReg<"rh103">; -def RH104 : PTXReg<"rh104">; -def RH105 : PTXReg<"rh105">; -def RH106 : PTXReg<"rh106">; -def RH107 : PTXReg<"rh107">; -def RH108 : PTXReg<"rh108">; -def RH109 : PTXReg<"rh109">; -def RH110 : PTXReg<"rh110">; -def RH111 : PTXReg<"rh111">; -def RH112 : PTXReg<"rh112">; -def RH113 : PTXReg<"rh113">; -def RH114 : PTXReg<"rh114">; -def RH115 : PTXReg<"rh115">; -def RH116 : PTXReg<"rh116">; -def RH117 : PTXReg<"rh117">; -def RH118 : PTXReg<"rh118">; -def RH119 : PTXReg<"rh119">; -def RH120 : PTXReg<"rh120">; -def RH121 : PTXReg<"rh121">; -def RH122 : PTXReg<"rh122">; -def RH123 : PTXReg<"rh123">; -def RH124 : PTXReg<"rh124">; -def RH125 : PTXReg<"rh125">; -def RH126 : PTXReg<"rh126">; -def RH127 : PTXReg<"rh127">; - -///===- 32-Bit Registers --------------------------------------------------===// - -def R0 : PTXReg<"r0">; -def R1 : PTXReg<"r1">; -def R2 : PTXReg<"r2">; -def R3 : PTXReg<"r3">; -def R4 : PTXReg<"r4">; -def R5 : PTXReg<"r5">; -def R6 : PTXReg<"r6">; -def R7 : PTXReg<"r7">; -def R8 : PTXReg<"r8">; -def R9 : PTXReg<"r9">; -def R10 : PTXReg<"r10">; -def R11 : PTXReg<"r11">; -def R12 : PTXReg<"r12">; -def R13 : PTXReg<"r13">; -def R14 : PTXReg<"r14">; -def R15 : PTXReg<"r15">; -def R16 : PTXReg<"r16">; -def R17 : PTXReg<"r17">; -def R18 : PTXReg<"r18">; -def R19 : PTXReg<"r19">; -def R20 : PTXReg<"r20">; -def R21 : PTXReg<"r21">; -def R22 : PTXReg<"r22">; -def R23 : PTXReg<"r23">; -def R24 : PTXReg<"r24">; -def R25 : PTXReg<"r25">; -def R26 : PTXReg<"r26">; -def R27 : PTXReg<"r27">; -def R28 : PTXReg<"r28">; -def R29 : PTXReg<"r29">; -def R30 : PTXReg<"r30">; -def R31 : PTXReg<"r31">; -def R32 : PTXReg<"r32">; -def R33 : PTXReg<"r33">; -def R34 : PTXReg<"r34">; -def R35 : PTXReg<"r35">; -def R36 : PTXReg<"r36">; -def R37 : PTXReg<"r37">; -def R38 : PTXReg<"r38">; -def R39 : PTXReg<"r39">; -def R40 : PTXReg<"r40">; -def R41 : PTXReg<"r41">; -def R42 : PTXReg<"r42">; -def R43 : PTXReg<"r43">; -def R44 : PTXReg<"r44">; -def R45 : PTXReg<"r45">; -def R46 : PTXReg<"r46">; -def R47 : PTXReg<"r47">; -def R48 : PTXReg<"r48">; -def R49 : PTXReg<"r49">; -def R50 : PTXReg<"r50">; -def R51 : PTXReg<"r51">; -def R52 : PTXReg<"r52">; -def R53 : PTXReg<"r53">; -def R54 : PTXReg<"r54">; -def R55 : PTXReg<"r55">; -def R56 : PTXReg<"r56">; -def R57 : PTXReg<"r57">; -def R58 : PTXReg<"r58">; -def R59 : PTXReg<"r59">; -def R60 : PTXReg<"r60">; -def R61 : PTXReg<"r61">; -def R62 : PTXReg<"r62">; -def R63 : PTXReg<"r63">; -def R64 : PTXReg<"r64">; -def R65 : PTXReg<"r65">; -def R66 : PTXReg<"r66">; -def R67 : PTXReg<"r67">; -def R68 : PTXReg<"r68">; -def R69 : PTXReg<"r69">; -def R70 : PTXReg<"r70">; -def R71 : PTXReg<"r71">; -def R72 : PTXReg<"r72">; -def R73 : PTXReg<"r73">; -def R74 : PTXReg<"r74">; -def R75 : PTXReg<"r75">; -def R76 : PTXReg<"r76">; -def R77 : PTXReg<"r77">; -def R78 : PTXReg<"r78">; -def R79 : PTXReg<"r79">; -def R80 : PTXReg<"r80">; -def R81 : PTXReg<"r81">; -def R82 : PTXReg<"r82">; -def R83 : PTXReg<"r83">; -def R84 : PTXReg<"r84">; -def R85 : PTXReg<"r85">; -def R86 : PTXReg<"r86">; -def R87 : PTXReg<"r87">; -def R88 : PTXReg<"r88">; -def R89 : PTXReg<"r89">; -def R90 : PTXReg<"r90">; -def R91 : PTXReg<"r91">; -def R92 : PTXReg<"r92">; -def R93 : PTXReg<"r93">; -def R94 : PTXReg<"r94">; -def R95 : PTXReg<"r95">; -def R96 : PTXReg<"r96">; -def R97 : PTXReg<"r97">; -def R98 : PTXReg<"r98">; -def R99 : PTXReg<"r99">; -def R100 : PTXReg<"r100">; -def R101 : PTXReg<"r101">; -def R102 : PTXReg<"r102">; -def R103 : PTXReg<"r103">; -def R104 : PTXReg<"r104">; -def R105 : PTXReg<"r105">; -def R106 : PTXReg<"r106">; -def R107 : PTXReg<"r107">; -def R108 : PTXReg<"r108">; -def R109 : PTXReg<"r109">; -def R110 : PTXReg<"r110">; -def R111 : PTXReg<"r111">; -def R112 : PTXReg<"r112">; -def R113 : PTXReg<"r113">; -def R114 : PTXReg<"r114">; -def R115 : PTXReg<"r115">; -def R116 : PTXReg<"r116">; -def R117 : PTXReg<"r117">; -def R118 : PTXReg<"r118">; -def R119 : PTXReg<"r119">; -def R120 : PTXReg<"r120">; -def R121 : PTXReg<"r121">; -def R122 : PTXReg<"r122">; -def R123 : PTXReg<"r123">; -def R124 : PTXReg<"r124">; -def R125 : PTXReg<"r125">; -def R126 : PTXReg<"r126">; -def R127 : PTXReg<"r127">; - -///===- 64-Bit Registers --------------------------------------------------===// - -def RD0 : PTXReg<"rd0">; -def RD1 : PTXReg<"rd1">; -def RD2 : PTXReg<"rd2">; -def RD3 : PTXReg<"rd3">; -def RD4 : PTXReg<"rd4">; -def RD5 : PTXReg<"rd5">; -def RD6 : PTXReg<"rd6">; -def RD7 : PTXReg<"rd7">; -def RD8 : PTXReg<"rd8">; -def RD9 : PTXReg<"rd9">; -def RD10 : PTXReg<"rd10">; -def RD11 : PTXReg<"rd11">; -def RD12 : PTXReg<"rd12">; -def RD13 : PTXReg<"rd13">; -def RD14 : PTXReg<"rd14">; -def RD15 : PTXReg<"rd15">; -def RD16 : PTXReg<"rd16">; -def RD17 : PTXReg<"rd17">; -def RD18 : PTXReg<"rd18">; -def RD19 : PTXReg<"rd19">; -def RD20 : PTXReg<"rd20">; -def RD21 : PTXReg<"rd21">; -def RD22 : PTXReg<"rd22">; -def RD23 : PTXReg<"rd23">; -def RD24 : PTXReg<"rd24">; -def RD25 : PTXReg<"rd25">; -def RD26 : PTXReg<"rd26">; -def RD27 : PTXReg<"rd27">; -def RD28 : PTXReg<"rd28">; -def RD29 : PTXReg<"rd29">; -def RD30 : PTXReg<"rd30">; -def RD31 : PTXReg<"rd31">; -def RD32 : PTXReg<"rd32">; -def RD33 : PTXReg<"rd33">; -def RD34 : PTXReg<"rd34">; -def RD35 : PTXReg<"rd35">; -def RD36 : PTXReg<"rd36">; -def RD37 : PTXReg<"rd37">; -def RD38 : PTXReg<"rd38">; -def RD39 : PTXReg<"rd39">; -def RD40 : PTXReg<"rd40">; -def RD41 : PTXReg<"rd41">; -def RD42 : PTXReg<"rd42">; -def RD43 : PTXReg<"rd43">; -def RD44 : PTXReg<"rd44">; -def RD45 : PTXReg<"rd45">; -def RD46 : PTXReg<"rd46">; -def RD47 : PTXReg<"rd47">; -def RD48 : PTXReg<"rd48">; -def RD49 : PTXReg<"rd49">; -def RD50 : PTXReg<"rd50">; -def RD51 : PTXReg<"rd51">; -def RD52 : PTXReg<"rd52">; -def RD53 : PTXReg<"rd53">; -def RD54 : PTXReg<"rd54">; -def RD55 : PTXReg<"rd55">; -def RD56 : PTXReg<"rd56">; -def RD57 : PTXReg<"rd57">; -def RD58 : PTXReg<"rd58">; -def RD59 : PTXReg<"rd59">; -def RD60 : PTXReg<"rd60">; -def RD61 : PTXReg<"rd61">; -def RD62 : PTXReg<"rd62">; -def RD63 : PTXReg<"rd63">; -def RD64 : PTXReg<"rd64">; -def RD65 : PTXReg<"rd65">; -def RD66 : PTXReg<"rd66">; -def RD67 : PTXReg<"rd67">; -def RD68 : PTXReg<"rd68">; -def RD69 : PTXReg<"rd69">; -def RD70 : PTXReg<"rd70">; -def RD71 : PTXReg<"rd71">; -def RD72 : PTXReg<"rd72">; -def RD73 : PTXReg<"rd73">; -def RD74 : PTXReg<"rd74">; -def RD75 : PTXReg<"rd75">; -def RD76 : PTXReg<"rd76">; -def RD77 : PTXReg<"rd77">; -def RD78 : PTXReg<"rd78">; -def RD79 : PTXReg<"rd79">; -def RD80 : PTXReg<"rd80">; -def RD81 : PTXReg<"rd81">; -def RD82 : PTXReg<"rd82">; -def RD83 : PTXReg<"rd83">; -def RD84 : PTXReg<"rd84">; -def RD85 : PTXReg<"rd85">; -def RD86 : PTXReg<"rd86">; -def RD87 : PTXReg<"rd87">; -def RD88 : PTXReg<"rd88">; -def RD89 : PTXReg<"rd89">; -def RD90 : PTXReg<"rd90">; -def RD91 : PTXReg<"rd91">; -def RD92 : PTXReg<"rd92">; -def RD93 : PTXReg<"rd93">; -def RD94 : PTXReg<"rd94">; -def RD95 : PTXReg<"rd95">; -def RD96 : PTXReg<"rd96">; -def RD97 : PTXReg<"rd97">; -def RD98 : PTXReg<"rd98">; -def RD99 : PTXReg<"rd99">; -def RD100 : PTXReg<"rd100">; -def RD101 : PTXReg<"rd101">; -def RD102 : PTXReg<"rd102">; -def RD103 : PTXReg<"rd103">; -def RD104 : PTXReg<"rd104">; -def RD105 : PTXReg<"rd105">; -def RD106 : PTXReg<"rd106">; -def RD107 : PTXReg<"rd107">; -def RD108 : PTXReg<"rd108">; -def RD109 : PTXReg<"rd109">; -def RD110 : PTXReg<"rd110">; -def RD111 : PTXReg<"rd111">; -def RD112 : PTXReg<"rd112">; -def RD113 : PTXReg<"rd113">; -def RD114 : PTXReg<"rd114">; -def RD115 : PTXReg<"rd115">; -def RD116 : PTXReg<"rd116">; -def RD117 : PTXReg<"rd117">; -def RD118 : PTXReg<"rd118">; -def RD119 : PTXReg<"rd119">; -def RD120 : PTXReg<"rd120">; -def RD121 : PTXReg<"rd121">; -def RD122 : PTXReg<"rd122">; -def RD123 : PTXReg<"rd123">; -def RD124 : PTXReg<"rd124">; -def RD125 : PTXReg<"rd125">; -def RD126 : PTXReg<"rd126">; -def RD127 : PTXReg<"rd127">; +// The generated register info code throws warnings for empty register classes +// (e.g. zero-length arrays), so we use a dummy register here just to prevent +// these warnings. +def DUMMY_REG : PTXReg<"R0">; //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// -def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>; -def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>; -def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>; -def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>; -def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>; -def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>; +def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>; +def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>; +def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>; +def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>; +def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>; +def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>; + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp new file mode 100644 index 0000000..50ef14a --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp @@ -0,0 +1,149 @@ +//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-selectiondag-info" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget<PTXSubtarget>()) { +} + +PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { +} + +SDValue +PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const { + // Do repeated 4-byte loads and stores. To be improved. + // This requires 4-byte alignment. + if ((Align & 3) != 0) + return SDValue(); + // This requires the copy size to be a constant, preferably + // within a subtarget-specific limit. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (!ConstantSize) + return SDValue(); + uint64_t SizeVal = ConstantSize->getZExtValue(); + // Always inline memcpys. In PTX, we do not have a C library that provides + // a memcpy function. + //if (!AlwaysInline) + // return SDValue(); + + unsigned BytesLeft = SizeVal & 3; + unsigned NumMemOps = SizeVal >> 2; + unsigned EmittedNumMemOps = 0; + EVT VT = MVT::i32; + unsigned VTSize = 4; + unsigned i = 0; + const unsigned MAX_LOADS_IN_LDM = 6; + SDValue TFOps[MAX_LOADS_IN_LDM]; + SDValue Loads[MAX_LOADS_IN_LDM]; + uint64_t SrcOff = 0, DstOff = 0; + EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + + // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the + // same number of stores. The loads and stores will get combined into + // ldm/stm later on. + while (EmittedNumMemOps < NumMemOps) { + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), isVolatile, + false, 0); + TFOps[i] = Loads[i].getValue(1); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + for (i = 0; + i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), + isVolatile, false, 0); + DstOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + EmittedNumMemOps += i; + } + + if (BytesLeft == 0) + return Chain; + + // Issue loads / stores for the trailing (1 - 3) bytes. + unsigned BytesLeftSave = BytesLeft; + i = 0; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + Loads[i] = DAG.getLoad(VT, dl, Chain, + DAG.getNode(ISD::ADD, dl, PointerType, Src, + DAG.getConstant(SrcOff, PointerType)), + SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); + TFOps[i] = Loads[i].getValue(1); + ++i; + SrcOff += VTSize; + BytesLeft -= VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + + i = 0; + BytesLeft = BytesLeftSave; + while (BytesLeft) { + if (BytesLeft >= 2) { + VT = MVT::i16; + VTSize = 2; + } else { + VT = MVT::i8; + VTSize = 1; + } + + TFOps[i] = DAG.getStore(Chain, dl, Loads[i], + DAG.getNode(ISD::ADD, dl, PointerType, Dst, + DAG.getConstant(DstOff, PointerType)), + DstPtrInfo.getWithOffset(DstOff), false, false, 0); + ++i; + DstOff += VTSize; + BytesLeft -= VTSize; + } + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +SDValue PTXSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + llvm_unreachable("memset lowering not implemented for PTX yet"); +} + diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h new file mode 100644 index 0000000..e0c7167 --- /dev/null +++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h @@ -0,0 +1,53 @@ +//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTX subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXSELECTIONDAGINFO_H +#define PTXSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target. +/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo. +class PTXSelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can + /// make the right decision when generating code for different targets. + const PTXSubtarget *Subtarget; + +public: + explicit PTXSelectionDAGInfo(const TargetMachine &TM); + ~PTXSelectionDAGInfo(); + + virtual + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, + bool isVolatile, bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const; + + virtual + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, + SDValue Chain, + SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, + bool isVolatile, + MachinePointerInfo DstPtrInfo) const; +}; + +} + +#endif + diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp index 8ec646e..1eb57d2 100644 --- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp @@ -14,7 +14,7 @@ #include "PTXSubtarget.h" #include "PTX.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h index 0921f1f..b946d7c 100644 --- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h +++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h @@ -114,7 +114,16 @@ class StringRef; (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); } - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool callsAreHandled() const { + return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) || + (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE); + } + + bool emitPtrAttribute() const { + return PTXVersion >= PTX_VERSION_2_2; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; // class PTXSubtarget } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp index ab926e0..449a3d9 100644 --- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp @@ -14,8 +14,32 @@ #include "PTX.h" #include "PTXTargetMachine.h" #include "llvm/PassManager.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" + using namespace llvm; @@ -25,7 +49,7 @@ namespace llvm { bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, - TargetAsmBackend *TAB, + MCAsmBackend *MAB, bool ShowInst); } @@ -43,34 +67,47 @@ namespace { "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; const char* DataLayout64 = "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + + // Copied from LLVMTargetMachine.cpp + void printNoVerify(PassManagerBase &PM, const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + } + + void printAndVerify(PassManagerBase &PM, + const char *Banner) { + if (PrintMachineCode) + PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); + + //if (VerifyMachineCode) + // PM.add(createMachineVerifierPass(Banner)); + } } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, - const std::string &TT, - const std::string &CPU, - const std::string &FS, + StringRef TT, StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS), + : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), DataLayout(is64Bit ? DataLayout64 : DataLayout32), Subtarget(TT, CPU, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), + TSInfo(*this), TLInfo(*this) { } -PTX32TargetMachine::PTX32TargetMachine(const Target &T, - const std::string& TT, - const std::string& CPU, - const std::string& FS) - : PTXTargetMachine(T, TT, CPU, FS, false) { +PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) { } -PTX64TargetMachine::PTX64TargetMachine(const Target &T, - const std::string& TT, - const std::string& CPU, - const std::string& FS) - : PTXTargetMachine(T, TT, CPU, FS, true) { +PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM) + : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, @@ -82,6 +119,255 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // PTXMFInfoExtract must after register allocation! + //PM.add(createPTXMFInfoExtract(*this, OptLevel)); + return false; +} + +bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, + formatted_raw_ostream &Out, + CodeGenFileType FileType, + CodeGenOpt::Level OptLevel, + bool DisableVerify) { + // This is mostly based on LLVMTargetMachine::addPassesToEmitFile + + // Add common CodeGen passes. + MCContext *Context = 0; + if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) + return true; + assert(Context != 0 && "Failed to get MCContext"); + + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); + OwningPtr<MCStreamer> AsmStreamer; + + switch (FileType) { + default: return true; + case CGFT_AssemblyFile: { + MCInstPrinter *InstPrinter = + getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); + + // Create a code emitter if asked to show the encoding. + MCCodeEmitter *MCE = 0; + MCAsmBackend *MAB = 0; + + MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, + true, /* verbose asm */ + hasMCUseLoc(), + hasMCUseCFI(), + InstPrinter, + MCE, MAB, + false /* show MC encoding */); + AsmStreamer.reset(S); + break; + } + case CGFT_ObjectFile: { + llvm_unreachable("Object file emission is not supported with PTX"); + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(createNullStreamer(*Context)); + break; + } + + // MC Logging + //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + + PM.add(createGCInfoDeleter()); + return false; +} + +bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, + CodeGenOpt::Level OptLevel, + bool DisableVerify, + MCContext *&OutContext) { + // Add standard LLVM codegen passes. + // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some + // modifications for the PTX target. + + // Standard LLVM-Level Passes. + + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + PM.add(createTypeBasedAliasAnalysisPass()); + PM.add(createBasicAliasAnalysisPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (OptLevel != CodeGenOpt::None) { + PM.add(createLoopStrengthReducePass(getTargetLowering())); + //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); + } + + PM.add(createGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + PM.add(createUnreachableBlockEliminationPass()); + + PM.add(createLowerInvokePass(getTargetLowering())); + // The lower invoke pass may create unreachable code. Remove it. + PM.add(createUnreachableBlockEliminationPass()); + + if (OptLevel != CodeGenOpt::None) + PM.add(createCodeGenPreparePass(getTargetLowering())); + + PM.add(createStackProtectorPass(getTargetLowering())); + + addPreISel(PM, OptLevel); + + //PM.add(createPrintFunctionPass("\n\n" + // "*** Final LLVM Code input to ISel ***\n", + // &dbgs())); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + PM.add(createVerifierPass()); + + // Standard Lower-Level Passes. + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), + *getRegisterInfo(), + &getTargetLowering()->getObjFileLowering()); + PM.add(MMI); + OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*this, OptLevel)); + + // Ask the target for an isel. + if (addInstSelector(PM, OptLevel)) + return true; + + // Print the instruction selected machine code... + printAndVerify(PM, "After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + PM.add(createExpandISelPseudosPass()); + + // Pre-ra tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(true)); + printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); + } + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + PM.add(createLocalStackSlotAllocationPass()); + + if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass"); + + PM.add(createMachineLICMPass()); + PM.add(createMachineCSEPass()); + PM.add(createMachineSinkingPass()); + printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); + + PM.add(createPeepholeOptimizerPass()); + printAndVerify(PM, "After codegen peephole optimization pass"); + } + + // Run pre-ra passes. + if (addPreRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PreRegAlloc passes"); + + // Perform register allocation. + PM.add(createPTXRegisterAllocator()); + printAndVerify(PM, "After Register Allocation"); + + // Perform stack slot coloring and post-ra machine LICM. + if (OptLevel != CodeGenOpt::None) { + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + PM.add(createStackSlotColoringPass(false)); + + // FIXME: Post-RA LICM has asserts that fire on virtual registers. + // Run post-ra machine LICM to hoist reloads / remats. + //if (!DisablePostRAMachineLICM) + // PM.add(createMachineLICMPass(false)); + + printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); + } + + // Run post-ra passes. + if (addPostRegAlloc(PM, OptLevel)) + printAndVerify(PM, "After PostRegAlloc passes"); + + PM.add(createExpandPostRAPseudosPass()); + printAndVerify(PM, "After ExpandPostRAPseudos"); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + PM.add(createPrologEpilogCodeInserter()); + printAndVerify(PM, "After PrologEpilogCodeInserter"); + + // Run pre-sched2 passes. + if (addPreSched2(PM, OptLevel)) + printAndVerify(PM, "After PreSched2 passes"); + + // Second pass scheduler. + if (OptLevel != CodeGenOpt::None) { + PM.add(createPostRAScheduler(OptLevel)); + printAndVerify(PM, "After PostRAScheduler"); + } + + // Branch folding must be run after regalloc and prolog/epilog insertion. + if (OptLevel != CodeGenOpt::None) { + PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); + printNoVerify(PM, "After BranchFolding"); + } + + // Tail duplication. + if (OptLevel != CodeGenOpt::None) { + PM.add(createTailDuplicatePass(false)); + printNoVerify(PM, "After TailDuplicate"); + } + + PM.add(createGCMachineCodeAnalysisPass()); + + //if (PrintGCInfo) + // PM.add(createGCInfoPrinter(dbgs())); + + if (OptLevel != CodeGenOpt::None) { + PM.add(createCodePlacementOptPass()); + printNoVerify(PM, "After CodePlacementOpt"); + } + + if (addPreEmitPass(PM, OptLevel)) + printNoVerify(PM, "After PreEmit passes"); + PM.add(createPTXMFInfoExtract(*this, OptLevel)); + PM.add(createPTXFPRoundingModePass(*this, OptLevel)); + return false; } diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h index ae42153..5b7c82b 100644 --- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h +++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h @@ -17,6 +17,7 @@ #include "PTXISelLowering.h" #include "PTXInstrInfo.h" #include "PTXFrameLowering.h" +#include "PTXSelectionDAGInfo.h" #include "PTXSubtarget.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" @@ -25,15 +26,17 @@ namespace llvm { class PTXTargetMachine : public LLVMTargetMachine { private: - const TargetData DataLayout; - PTXSubtarget Subtarget; // has to be initialized before FrameLowering - PTXFrameLowering FrameLowering; - PTXInstrInfo InstrInfo; - PTXTargetLowering TLInfo; + const TargetData DataLayout; + PTXSubtarget Subtarget; // has to be initialized before FrameLowering + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; + PTXSelectionDAGInfo TSInfo; + PTXTargetLowering TLInfo; public: - PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &CPU, const std::string &FS, + PTXTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM, bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -49,27 +52,62 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual const PTXTargetLowering *getTargetLowering() const { return &TLInfo; } + virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + + // We override this method to supply our own set of codegen passes. + virtual bool addPassesToEmitFile(PassManagerBase &, + formatted_raw_ostream &, + CodeGenFileType, + CodeGenOpt::Level, + bool = true); + + // Emission of machine code through JITCodeEmitter is not supported. + virtual bool addPassesToEmitMachineCode(PassManagerBase &, + JITCodeEmitter &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + // Emission of machine code through MCJIT is not supported. + virtual bool addPassesToEmitMC(PassManagerBase &, + MCContext *&, + raw_ostream &, + CodeGenOpt::Level, + bool = true) { + return true; + } + + private: + + bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, + bool DisableVerify, MCContext *&OutCtx); }; // class PTXTargetMachine class PTX32TargetMachine : public PTXTargetMachine { public: - PTX32TargetMachine(const Target &T, const std::string &TT, - const std::string& CPU, const std::string& FS); + PTX32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; // class PTX32TargetMachine class PTX64TargetMachine : public PTXTargetMachine { public: - PTX64TargetMachine(const Target &T, const std::string &TT, - const std::string& CPU, const std::string& FS); + PTX64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + Reloc::Model RM, CodeModel::Model CM); }; // class PTX32TargetMachine } // namespace llvm diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt deleted file mode 100644 index 4b09cf5..0000000 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMPTXInfo - PTXTargetInfo.cpp - ) - -add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen) diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile deleted file mode 100644 index 8619785..0000000 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMPTXInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp index 9df6c75..09a2735 100644 --- a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp +++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -9,7 +9,7 @@ #include "PTX.h" #include "llvm/Module.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py deleted file mode 100755 index 1528690..0000000 --- a/contrib/llvm/lib/Target/PTX/generate-register-td.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python -##===- generate-register-td.py --------------------------------*-python-*--===## -## -## The LLVM Compiler Infrastructure -## -## This file is distributed under the University of Illinois Open Source -## License. See LICENSE.TXT for details. -## -##===----------------------------------------------------------------------===## -## -## This file describes the PTX register file generator. -## -##===----------------------------------------------------------------------===## - -from sys import argv, exit, stdout - - -if len(argv) != 5: - print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>') - exit(1) - -try: - num_pred = int(argv[1]) - num_16bit = int(argv[2]) - num_32bit = int(argv[3]) - num_64bit = int(argv[4]) -except: - print('ERROR: Invalid integer parameter') - exit(1) - -## Print the register definition file -td_file = open('PTXRegisterInfo.td', 'w') - -td_file.write(''' -//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the PTX register file -//===----------------------------------------------------------------------===// - -class PTXReg<string n> : Register<n> { - let Namespace = "PTX"; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// -''') - - -# Print predicate registers -td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n') -for r in range(0, num_pred): - td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r)) - -# Print 16-bit registers -td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_16bit): - td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r)) - -# Print 32-bit registers -td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_32bit): - td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r)) - -# Print 64-bit registers -td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n') -for r in range(0, num_64bit): - td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r)) - - -td_file.write(''' -//===----------------------------------------------------------------------===// -// Register classes -//===----------------------------------------------------------------------===// -''') - - -# Print register classes - -td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1)) -td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1)) -td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) -td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) -td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1)) -td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1)) - - -td_file.close() - -## Now write the PTXCallingConv.td file -td_file = open('PTXCallingConv.td', 'w') - -# Reserve 10% of the available registers for return values, and the other 90% -# for parameters -num_ret_pred = int(0.1 * num_pred) -num_ret_16bit = int(0.1 * num_16bit) -num_ret_32bit = int(0.1 * num_32bit) -num_ret_64bit = int(0.1 * num_64bit) -num_param_pred = num_pred - num_ret_pred -num_param_16bit = num_16bit - num_ret_16bit -num_param_32bit = num_32bit - num_ret_32bit -num_param_64bit = num_64bit - num_ret_64bit - -param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)] -ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)] -param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)] -ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)] -param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)] -ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)] -param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)] -ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)] - -param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred) -ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred) -param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit) -ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit) -param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit) -ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit) -param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit) -ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit) - -td_file.write(''' -//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the PTX architecture. -// -//===----------------------------------------------------------------------===// - -// PTX Formal Parameter Calling Convention -def CC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[%s]>>, - CCIfType<[i16], CCAssignToReg<[%s]>>, - CCIfType<[i32,f32], CCAssignToReg<[%s]>>, - CCIfType<[i64,f64], CCAssignToReg<[%s]>> -]>; - -// PTX Return Value Calling Convention -def RetCC_PTX : CallingConv<[ - CCIfType<[i1], CCAssignToReg<[%s]>>, - CCIfType<[i16], CCAssignToReg<[%s]>>, - CCIfType<[i32,f32], CCAssignToReg<[%s]>>, - CCIfType<[i64,f64], CCAssignToReg<[%s]>> -]>; -''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit, - ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit)) - - -td_file.close() |