summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PTX
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PTX')
-rw-r--r--contrib/llvm/lib/Target/PTX/CMakeLists.txt26
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp192
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h47
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt4
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h63
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp53
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.h28
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.td28
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp500
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h57
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXCallingConv.td29
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp179
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp182
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp273
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.h19
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrFormats.td31
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp82
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.td1241
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td278
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td78
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp15
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp32
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp36
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h163
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.cpp73
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.h86
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp58
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp29
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h18
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td540
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp149
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h53
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp2
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.h11
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp318
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.h60
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt7
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/Makefile15
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp2
-rwxr-xr-xcontrib/llvm/lib/Target/PTX/generate-register-td.py163
41 files changed, 3167 insertions, 2069 deletions
diff --git a/contrib/llvm/lib/Target/PTX/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/CMakeLists.txt
deleted file mode 100644
index 331266d..0000000
--- a/contrib/llvm/lib/Target/PTX/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS PTX.td)
-
-tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
-tablegen(PTXGenDAGISel.inc -gen-dag-isel)
-tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
-tablegen(PTXGenInstrNames.inc -gen-instr-enums)
-tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
-tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(PTXGenRegisterNames.inc -gen-register-enums)
-tablegen(PTXGenSubtarget.inc -gen-subtarget)
-
-add_llvm_target(PTXCodeGen
- PTXAsmPrinter.cpp
- PTXISelDAGToDAG.cpp
- PTXISelLowering.cpp
- PTXInstrInfo.cpp
- PTXFrameLowering.cpp
- PTXMCAsmInfo.cpp
- PTXMCAsmStreamer.cpp
- PTXMFInfoExtract.cpp
- PTXRegisterInfo.cpp
- PTXSubtarget.cpp
- PTXTargetMachine.cpp
- )
-
-add_subdirectory(TargetInfo)
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
new file mode 100644
index 0000000..aabb404
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -0,0 +1,192 @@
+//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PTXInstPrinter.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PTXGenAsmWriter.inc"
+
+PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
+ return getInstructionName(Opcode);
+}
+
+void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printPredicate(MI, O);
+ switch (MI->getOpcode()) {
+ default:
+ printInstruction(MI, O);
+ break;
+ case PTX::CALL:
+ printCall(MI, O);
+ }
+ O << ";";
+ printAnnotation(O, Annot);
+}
+
+void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
+ // The last two operands are the predicate operands
+ int RegIndex;
+ int OpIndex;
+
+ if (MI->getOpcode() == PTX::CALL) {
+ RegIndex = 0;
+ OpIndex = 1;
+ } else {
+ RegIndex = MI->getNumOperands()-2;
+ OpIndex = MI->getNumOperands()-1;
+ }
+
+ int PredOp = MI->getOperand(OpIndex).getImm();
+ if (PredOp == PTXPredicate::None)
+ return;
+
+ if (PredOp == PTXPredicate::Negate)
+ O << '!';
+ else
+ O << '@';
+
+ printOperand(MI, RegIndex, O);
+}
+
+void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
+ O << "\tcall.uni\t";
+ // The first two operands are the predicate slot
+ unsigned Index = 2;
+ unsigned NumRets = MI->getOperand(Index++).getImm();
+
+ if (NumRets > 0) {
+ O << "(";
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumRets; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ O << "), ";
+ }
+
+ O << *(MI->getOperand(Index++).getExpr()) << ", (";
+
+ unsigned NumArgs = MI->getOperand(Index++).getImm();
+ if (NumArgs > 0) {
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ }
+ O << ")";
+}
+
+void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ double Imm = Op.getFPImm();
+ APFloat FPImm(Imm);
+ APInt FPIntImm = FPImm.bitcastToAPInt();
+ O << "0D";
+ // PTX requires us to output the full 64 bits, even if the number is zero
+ if (FPIntImm.getZExtValue() > 0) {
+ O << FPIntImm.toString(16, false);
+ } else {
+ O << "0000000000000000";
+ }
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol &Sym = SymRefExpr->getSymbol();
+ O << Sym.getName();
+ } else {
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // By definition, operand OpNo+1 is an i32imm
+ const MCOperand &Op2 = MI->getOperand(OpNo+1);
+ printOperand(MI, OpNo, O);
+ if (Op2.getImm() == 0)
+ return; // don't print "+0"
+ O << "+" << Op2.getImm();
+}
+
+void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert (Op.isImm() && "Rounding modes must be immediate values");
+ switch (Op.getImm()) {
+ default:
+ llvm_unreachable("Unknown rounding mode!");
+ case PTXRoundingMode::RndDefault:
+ llvm_unreachable("FP rounding-mode pass did not handle instruction!");
+ break;
+ case PTXRoundingMode::RndNone:
+ // Do not print anything.
+ break;
+ case PTXRoundingMode::RndNearestEven:
+ O << ".rn";
+ break;
+ case PTXRoundingMode::RndTowardsZero:
+ O << ".rz";
+ break;
+ case PTXRoundingMode::RndNegInf:
+ O << ".rm";
+ break;
+ case PTXRoundingMode::RndPosInf:
+ O << ".rp";
+ break;
+ case PTXRoundingMode::RndApprox:
+ O << ".approx";
+ break;
+ case PTXRoundingMode::RndNearestEvenInt:
+ O << ".rni";
+ break;
+ case PTXRoundingMode::RndTowardsZeroInt:
+ O << ".rzi";
+ break;
+ case PTXRoundingMode::RndNegInfInt:
+ O << ".rmi";
+ break;
+ case PTXRoundingMode::RndPosInfInt:
+ O << ".rpi";
+ break;
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
new file mode 100644
index 0000000..86dfd48
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
@@ -0,0 +1,47 @@
+//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints n PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXINSTPRINTER_H
+#define PTXINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PTXInstPrinter : public MCInstPrinter {
+public:
+ PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual StringRef getOpcodeName(unsigned Opcode) const;
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ static const char *getInstructionName(unsigned Opcode);
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printPredicate(const MCInst *MI, raw_ostream &O);
+ void printCall(const MCInst *MI, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index df0f63f..0000000
--- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_llvm_library(LLVMPTXDesc
- PTXMCTargetDesc.cpp
- PTXMCAsmInfo.cpp
- )
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile b/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
deleted file mode 100644
index 35f5a7b..0000000
--- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPTXDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
new file mode 100644
index 0000000..c6094be
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -0,0 +1,63 @@
+//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXBASEINFO_H
+#define PTXBASEINFO_H
+
+#include "PTXMCTargetDesc.h"
+
+namespace llvm {
+ namespace PTXStateSpace {
+ enum {
+ Global = 0, // default to global state space
+ Constant = 1,
+ Local = 2,
+ Parameter = 3,
+ Shared = 4
+ };
+ } // namespace PTXStateSpace
+
+ namespace PTXPredicate {
+ enum {
+ Normal = 0,
+ Negate = 1,
+ None = 2
+ };
+ } // namespace PTXPredicate
+
+ /// Namespace to hold all target-specific flags.
+ namespace PTXRoundingMode {
+ // Instruction Flags
+ enum {
+ // Rounding Mode Flags
+ RndMask = 15,
+ RndDefault = 0, // ---
+ RndNone = 1, // <NONE>
+ RndNearestEven = 2, // .rn
+ RndTowardsZero = 3, // .rz
+ RndNegInf = 4, // .rm
+ RndPosInf = 5, // .rp
+ RndApprox = 6, // .approx
+ RndNearestEvenInt = 7, // .rni
+ RndTowardsZeroInt = 8, // .rzi
+ RndNegInfInt = 9, // .rmi
+ RndPosInfInt = 10 // .rpi
+ };
+ } // namespace PTXII
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
index 23f70bd..a5af3b8 100644
--- a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -13,10 +13,12 @@
#include "PTXMCTargetDesc.h"
#include "PTXMCAsmInfo.h"
+#include "InstPrinter/PTXInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "PTXGenInstrInfo.inc"
@@ -35,9 +37,11 @@ static MCInstrInfo *createPTXMCInstrInfo() {
return X;
}
-extern "C" void LLVMInitializePTXMCInstrInfo() {
- TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitPTXMCRegisterInfo(X, 0);
+ return X;
}
static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
@@ -47,14 +51,45 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-extern "C" void LLVMInitializePTXMCSubtargetInfo() {
+static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM);
+ return X;
+}
+
+static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCSubtargetInfo &STI) {
+ assert(SyntaxVariant == 0 && "We only have one syntax variant");
+ return new PTXInstPrinter(MAI, STI);
+}
+
+extern "C" void LLVMInitializePTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+ RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
createPTXMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
createPTXMCSubtargetInfo);
-}
-extern "C" void LLVMInitializePTXMCAsmInfo() {
- RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
- RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h
index 28cab24..7d46cce 100644
--- a/contrib/llvm/lib/Target/PTX/PTX.h
+++ b/contrib/llvm/lib/Target/PTX/PTX.h
@@ -15,34 +15,30 @@
#ifndef PTX_H
#define PTX_H
-#include "MCTargetDesc/PTXMCTargetDesc.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+ class MachineInstr;
+ class MCInst;
+ class PTXAsmPrinter;
class PTXTargetMachine;
class FunctionPass;
- namespace PTX {
- enum StateSpace {
- GLOBAL = 0, // default to global state space
- CONSTANT = 1,
- LOCAL = 2,
- PARAMETER = 3,
- SHARED = 4
- };
-
- enum Predicate {
- PRED_NORMAL = 0,
- PRED_NEGATE = 1
- };
- } // namespace PTX
-
FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
+ FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXRegisterAllocator();
+
+ void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP);
+
} // namespace llvm;
#endif // PTX_H
diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td
index f6fbe9f..693bb9c 100644
--- a/contrib/llvm/lib/Target/PTX/PTX.td
+++ b/contrib/llvm/lib/Target/PTX/PTX.td
@@ -52,13 +52,13 @@ def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
"Use Shader Model 1.3">;
def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
- "Use Shader Model 2.0">;
+ "Use Shader Model 2.0", [FeatureDouble]>;
def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
- "Use Shader Model 2.1">;
+ "Use Shader Model 2.1", [FeatureDouble]>;
def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
- "Use Shader Model 2.2">;
+ "Use Shader Model 2.2", [FeatureDouble]>;
def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
- "Use Shader Model 2.3">;
+ "Use Shader Model 2.3", [FeatureDouble]>;
def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
"PTX_COMPUTE_1_0",
@@ -74,7 +74,8 @@ def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
"Use Compute Compatibility 1.3">;
def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
"PTX_COMPUTE_2_0",
- "Use Compute Compatibility 2.0">;
+ "Use Compute Compatibility 2.0",
+ [FeatureDouble]>;
//===----------------------------------------------------------------------===//
// PTX supported processors
@@ -113,12 +114,6 @@ def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
include "PTXRegisterInfo.td"
//===----------------------------------------------------------------------===//
-// Calling Conventions
-//===----------------------------------------------------------------------===//
-
-include "PTXCallingConv.td"
-
-//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -127,9 +122,20 @@ include "PTXInstrInfo.td"
def PTXInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// PTX uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def PTXAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def PTX : Target {
let InstructionSet = PTXInstrInfo;
+ let AssemblyWriters = [PTXAsmWriter];
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
index 2848d54..733744b 100644
--- a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -15,9 +15,14 @@
#define DEBUG_TYPE "ptx-asm-printer"
#include "PTX.h"
+#include "PTXAsmPrinter.h"
#include "PTXMachineFunctionInfo.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
#include "PTXTargetMachine.h"
+#include "llvm/Argument.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -28,69 +33,32 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
-class PTXAsmPrinter : public AsmPrinter {
-public:
- explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {}
-
- const char *getPassName() const { return "PTX Assembly Printer"; }
-
- bool doFinalization(Module &M);
-
- virtual void EmitStartOfAsmFile(Module &M);
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void EmitFunctionBodyStart();
- virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
-
- virtual void EmitInstruction(const MachineInstr *MI);
-
- void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
- void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
- void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
-
- unsigned GetOrCreateSourceID(StringRef FileName,
- StringRef DirName);
-
- // autogen'd.
- void printInstruction(const MachineInstr *MI, raw_ostream &OS);
- static const char *getRegisterName(unsigned RegNo);
-
-private:
- void EmitVariableDeclaration(const GlobalVariable *gv);
- void EmitFunctionDeclaration();
-
- StringMap<unsigned> SourceIdMap;
-}; // class PTXAsmPrinter
-} // namespace
-
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
-static const char *getRegisterTypeName(unsigned RegNo) {
-#define TEST_REGCLS(cls, clsstr) \
- if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+static const char *getRegisterTypeName(unsigned RegNo,
+ const MachineRegisterInfo& MRI) {
+ const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
+
+#define TEST_REGCLS(cls, clsstr) \
+ if (PTX::cls ## RegisterClass == TRC) return # clsstr;
+
TEST_REGCLS(RegPred, pred);
TEST_REGCLS(RegI16, b16);
TEST_REGCLS(RegI32, b32);
@@ -106,16 +74,16 @@ static const char *getRegisterTypeName(unsigned RegNo) {
static const char *getStateSpaceName(unsigned addressSpace) {
switch (addressSpace) {
default: llvm_unreachable("Unknown state space");
- case PTX::GLOBAL: return "global";
- case PTX::CONSTANT: return "const";
- case PTX::LOCAL: return "local";
- case PTX::PARAMETER: return "param";
- case PTX::SHARED: return "shared";
+ case PTXStateSpace::Global: return "global";
+ case PTXStateSpace::Constant: return "const";
+ case PTXStateSpace::Local: return "local";
+ case PTXStateSpace::Parameter: return "param";
+ case PTXStateSpace::Shared: return "shared";
}
return NULL;
}
-static const char *getTypeName(const Type* type) {
+static const char *getTypeName(Type* type) {
while (true) {
switch (type->getTypeID()) {
default: llvm_unreachable("Unknown type");
@@ -130,7 +98,7 @@ static const char *getTypeName(const Type* type) {
}
case Type::ArrayTyID:
case Type::PointerTyID:
- type = dyn_cast<const SequentialType>(type)->getElementType();
+ type = dyn_cast<SequentialType>(type)->getElementType();
break;
}
}
@@ -170,6 +138,7 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
{
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ // Emit the PTX .version and .target attributes
OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
(ST.supportsDouble() ? ""
@@ -203,177 +172,118 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
EmitVariableDeclaration(i);
}
-bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- SetupMachineFunction(MF);
- EmitFunctionDeclaration();
- EmitFunctionBody();
- return false;
-}
-
void PTXAsmPrinter::EmitFunctionBodyStart() {
OutStreamer.EmitRawText(Twine("{"));
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
+
+ // Print register definitions
+ std::string regDefs;
+ unsigned numRegs;
+
+ // pred
+ numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .pred %p<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i16
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b16 %rh<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b32 %r<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // i64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .b64 %rd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f32
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f32 %f<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
+
+ // f64
+ numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
+ if(numRegs > 0) {
+ regDefs += "\t.reg .f64 %fd<";
+ regDefs += utostr(numRegs);
+ regDefs += ">;\n";
+ }
- // Print local variable definition
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
- unsigned reg = *i;
-
- std::string def = "\t.reg .";
- def += getRegisterTypeName(reg);
- def += ' ';
- def += getRegisterName(reg);
- def += ';';
- OutStreamer.EmitRawText(Twine(def));
+ // Local params
+ for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
+ i != e; ++i) {
+ regDefs += "\t.param .b";
+ regDefs += utostr(PM.getParamSize(*i));
+ regDefs += " ";
+ regDefs += PM.getParamName(*i);
+ regDefs += ";\n";
}
+ OutStreamer.EmitRawText(Twine(regDefs));
+
+
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
<< " frame object(s)\n");
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
if (FrameInfo->getObjectSize(i) > 0) {
- std::string def = "\t.reg .b";
- def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
- def += " s";
+ std::string def = "\t.local .align ";
+ def += utostr(FrameInfo->getObjectAlignment(i));
+ def += " .b8";
+ def += " __local";
def += utostr(i);
+ def += "[";
+ def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
+ def += "]";
def += ";";
OutStreamer.EmitRawText(Twine(def));
}
}
-}
-
-void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- std::string str;
- str.reserve(64);
-
- raw_string_ostream OS(str);
-
- DebugLoc DL = MI->getDebugLoc();
- if (!DL.isUnknown()) {
-
- const MDNode *S = DL.getScope(MF->getFunction()->getContext());
-
- // This is taken from DwarfDebug.cpp, which is conveniently not a public
- // LLVM class.
- StringRef Fn;
- StringRef Dir;
- unsigned Src = 1;
- if (S) {
- DIDescriptor Scope(S);
- if (Scope.isCompileUnit()) {
- DICompileUnit CU(S);
- Fn = CU.getFilename();
- Dir = CU.getDirectory();
- } else if (Scope.isFile()) {
- DIFile F(S);
- Fn = F.getFilename();
- Dir = F.getDirectory();
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(S);
- Fn = SP.getFilename();
- Dir = SP.getDirectory();
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(S);
- Fn = DB.getFilename();
- Dir = DB.getDirectory();
- } else
- assert(0 && "Unexpected scope info");
-
- Src = GetOrCreateSourceID(Fn, Dir);
- }
- OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(),
- 0, 0, 0, Fn);
-
- const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc();
-
- OS << "\t.loc ";
- OS << utostr(MDL.getFileNum());
- OS << " ";
- OS << utostr(MDL.getLine());
- OS << " ";
- OS << utostr(MDL.getColumn());
- OS << "\n";
- }
-
-
- // Emit predicate
- printPredicateOperand(MI, OS);
-
- // Write instruction to str
- printInstruction(MI, OS);
- OS << ';';
- OS.flush();
-
- StringRef strref = StringRef(str);
- OutStreamer.EmitRawText(strref);
-}
-
-void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS) {
- const MachineOperand &MO = MI->getOperand(opNum);
-
- switch (MO.getType()) {
- default:
- llvm_unreachable("<unknown operand type>");
- break;
- case MachineOperand::MO_GlobalAddress:
- OS << *Mang->getSymbol(MO.getGlobal());
- break;
- case MachineOperand::MO_Immediate:
- OS << (long) MO.getImm();
- break;
- case MachineOperand::MO_MachineBasicBlock:
- OS << *MO.getMBB()->getSymbol();
- break;
- case MachineOperand::MO_Register:
- OS << getRegisterName(MO.getReg());
- break;
- case MachineOperand::MO_FPImmediate:
- APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
- bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
- // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
- if (isFloat) {
- OS << "0F";
- }
- else {
- OS << "0D";
- }
- // Emit the encoded floating-point value.
- if (constFP.getZExtValue() > 0) {
- OS << constFP.toString(16, false);
- }
- else {
- OS << "00000000";
- // If We have a double-precision zero, pad to 8-bytes.
- if (!isFloat) {
- OS << "00000000";
- }
- }
- break;
- }
-}
-
-void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- printOperand(MI, opNum, OS);
- if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
- return; // don't print "+0"
-
- OS << "+";
- printOperand(MI, opNum+1, OS);
+ //unsigned Index = 1;
+ // Print parameter passing params
+ //for (PTXMachineFunctionInfo::param_iterator
+ // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
+ // std::string def = "\t.param .b";
+ // def += utostr(*i);
+ // def += " __ret_";
+ // def += utostr(Index);
+ // Index++;
+ // def += ";";
+ // OutStreamer.EmitRawText(Twine(def));
+ //}
}
-void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+void PTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(Twine("}"));
}
-void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
- raw_ostream &OS, const char *Modifier) {
- OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+ LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
}
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
@@ -400,14 +310,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
unsigned alignment = gv->getAlignment();
if (alignment != 0) {
decl += ".align ";
- decl += utostr(Log2_32(gv->getAlignment()));
+ decl += utostr(gv->getAlignment());
decl += " ";
}
if (PointerType::classof(gv->getType())) {
- const PointerType* pointerTy = dyn_cast<const PointerType>(gv->getType());
- const Type* elementTy = pointerTy->getElementType();
+ PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
+ Type* elementTy = pointerTy->getElementType();
decl += ".b8 ";
decl += gvsym->getName();
@@ -417,14 +327,14 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
{
assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
- const ArrayType* arrayTy = dyn_cast<const ArrayType>(elementTy);
+ ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
unsigned numElements = arrayTy->getNumElements();
while (elementTy->isArrayTy()) {
- arrayTy = dyn_cast<const ArrayType>(elementTy);
+ arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
numElements *= arrayTy->getNumElements();
@@ -447,7 +357,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
if (gv->hasInitializer())
{
- const Constant *C = gv->getInitializer();
+ const Constant *C = gv->getInitializer();
if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
{
decl += " = {";
@@ -484,7 +394,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
OutStreamer.AddBlankLine();
}
-void PTXAsmPrinter::EmitFunctionDeclaration() {
+void PTXAsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
if (!CurrentFnSym->isUndefined()) {
@@ -494,25 +404,39 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
}
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+ const MachineRegisterInfo& MRI = MF->getRegInfo();
std::string decl = isKernel ? ".entry" : ".func";
- unsigned cnt = 0;
-
if (!isKernel) {
decl += " (";
- for (PTXMachineFunctionInfo::ret_iterator
- i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
+ if (ST.useParamSpaceForDeviceArgs()) {
+ for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
+ decl += ".param .b";
+ decl += utostr(PM.getParamSize(*i));
+ decl += " ";
+ decl += PM.getParamName(*i);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+ decl += ".reg .";
+ decl += getRegisterTypeName(*i, MRI);
+ decl += " ";
+ decl += MFI->getRegisterName(*i);
}
- decl += ".reg .";
- decl += getRegisterTypeName(*i);
- decl += " ";
- decl += getRegisterName(*i);
}
decl += ")";
}
@@ -523,26 +447,65 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
decl += " (";
- cnt = 0;
+ const Function *F = MF->getFunction();
// Print parameters
- for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
- i != e; ++i) {
- if (i != b) {
- decl += ", ";
- }
- if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".param .b";
- decl += utostr(*i);
+ decl += utostr(PM.getParamSize(*i));
decl += " ";
- decl += PARAM_PREFIX;
- decl += utostr(++cnt);
- } else {
+ decl += PM.getParamName(*i);
+ }*/
+ int Counter = 1;
+ for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b)
+ decl += ", ";
+ const Type *ArgType = (*i).getType();
+ decl += ".param .b";
+ if (ArgType->isPointerTy()) {
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ } else {
+ decl += utostr(ArgType->getPrimitiveSizeInBits());
+ }
+ if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
+ const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
+ decl += " .ptr";
+ switch (PtrType->getAddressSpace()) {
+ default:
+ llvm_unreachable("Unknown address space in argument");
+ case PTXStateSpace::Global:
+ decl += " .global";
+ break;
+ case PTXStateSpace::Shared:
+ decl += " .shared";
+ break;
+ }
+ }
+ decl += " __param_";
+ decl += utostr(Counter++);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b) {
+ decl += ", ";
+ }
+
decl += ".reg .";
- decl += getRegisterTypeName(*i);
+ decl += getRegisterTypeName(*i, MRI);
decl += " ";
- decl += getRegisterName(*i);
+ decl += MFI->getRegisterName(*i);
}
}
decl += ")";
@@ -550,25 +513,6 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
OutStreamer.EmitRawText(Twine(decl));
}
-void PTXAsmPrinter::
-printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
- int i = MI->findFirstPredOperandIdx();
- if (i == -1)
- llvm_unreachable("missing predicate operand");
-
- unsigned reg = MI->getOperand(i).getReg();
- int predOp = MI->getOperand(i+1).getImm();
-
- DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n");
-
- if (reg != PTX::NoRegister) {
- O << '@';
- if (predOp == PTX::PRED_NEGATE)
- O << '!';
- O << getRegisterName(reg);
- }
-}
-
unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
StringRef DirName) {
// If FE did not provide a file name, then assume stdin.
@@ -596,10 +540,58 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
return SrcId;
}
-#include "PTXGenAsmWriter.inc"
+MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
+ MCOperand MCOp;
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const MCExpr *Expr;
+ const char *RegSymbolName;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unknown operand type");
+ case MachineOperand::MO_Register:
+ // We create register operands as symbols, since the PTXInstPrinter class
+ // has no way to map virtual registers back to a name without some ugly
+ // hacks.
+ // FIXME: Figure out a better way to handle virtual register naming.
+ RegSymbolName = MFI->getRegisterName(MO.getReg());
+ Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
+ OutContext);
+ MCOp = MCOperand::CreateExpr(Expr);
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+
+ return MCOp;
+}
// Force static initialization.
extern "C" void LLVMInitializePTXAsmPrinter() {
RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
new file mode 100644
index 0000000..538c080
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
@@ -0,0 +1,57 @@
+//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PTX Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXASMPRINTER_H
+#define PTXASMPRINTER_H
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
+public:
+ explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const { return "PTX Assembly Printer"; }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitStartOfAsmFile(Module &M);
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ unsigned GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName);
+
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCOperand lowerOperand(const MachineOperand &MO);
+
+private:
+ void EmitVariableDeclaration(const GlobalVariable *gv);
+ void EmitFunctionDeclaration();
+
+ StringMap<unsigned> SourceIdMap;
+}; // class PTXAsmPrinter
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td b/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
deleted file mode 100644
index 3e3ff48..0000000
--- a/contrib/llvm/lib/Target/PTX/PTXCallingConv.td
+++ /dev/null
@@ -1,29 +0,0 @@
-
-//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the PTX architecture.
-//
-//===----------------------------------------------------------------------===//
-
-// PTX Formal Parameter Calling Convention
-def CC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
- CCIfType<[i16], CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
- CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
- CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
-]>;
-
-// PTX Return Value Calling Convention
-def RetCC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
- CCIfType<[i16], CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
- CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
- CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
-]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
new file mode 100644
index 0000000..0b653e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -0,0 +1,179 @@
+//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a machine function pass that sets appropriate FP rounding
+// modes for all relevant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-fp-rounding-mode"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXFPRoundingModePass should be executed just before emission.
+
+namespace llvm {
+ /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
+ /// all FP instructions. Essentially, this pass just looks for all FP
+ /// instructions that have a rounding mode set to RndDefault, and sets an
+ /// appropriate rounding mode based on the target device.
+ ///
+ class PTXFPRoundingModePass : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ typedef std::pair<unsigned, unsigned> RndModeDesc;
+
+ PTXTargetMachine& TargetMachine;
+ DenseMap<unsigned, RndModeDesc> Instrs;
+
+ public:
+ PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID),
+ TargetMachine(TM) {
+ initializeMap();
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX FP Rounding Mode Pass";
+ }
+
+ private:
+
+ void initializeMap();
+ void processInstruction(MachineInstr &MI);
+ }; // class PTXFPRoundingModePass
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXFPRoundingModePass::ID = 0;
+
+bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
+ // Look at each basic block
+ for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
+ ++bbi) {
+ MachineBasicBlock &MBB = *bbi;
+ // Look at each instruction
+ for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
+ ii != ie; ++ii) {
+ MachineInstr &MI = *ii;
+ processInstruction(MI);
+ }
+ }
+ return false;
+}
+
+void PTXFPRoundingModePass::initializeMap() {
+ using namespace PTXRoundingMode;
+ const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
+
+ // Build a map of default rounding mode for all instructions that need a
+ // rounding mode.
+ Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
+
+ unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
+
+ unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
+
+ Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
+
+ Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+
+ Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+}
+
+void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
+ // Is this an instruction that needs a rounding mode?
+ if (Instrs.count(MI.getOpcode())) {
+ const RndModeDesc &Desc = Instrs[MI.getOpcode()];
+ // Get the rounding mode operand
+ MachineOperand &Op = MI.getOperand(Desc.first);
+ // Update the rounding mode if needed
+ if (Op.getImm() == PTXRoundingMode::RndDefault) {
+ Op.setImm(Desc.second);
+ }
+ }
+}
+
+FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXFPRoundingModePass(TM, OptLevel);
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
index 9adfa62..5c7ee29 100644
--- a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -12,7 +12,9 @@
//===----------------------------------------------------------------------===//
#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Support/Debug.h"
@@ -37,6 +39,7 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
// Include the pieces auto'gened from the target description
#include "PTXGenDAGISel.inc"
@@ -46,6 +49,10 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
// pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
SDNode *SelectBRCOND(SDNode *Node);
+ SDNode *SelectREADPARAM(SDNode *Node);
+ SDNode *SelectWRITEPARAM(SDNode *Node);
+ SDNode *SelectFrameIndex(SDNode *Node);
+
bool isImm(const SDValue &operand);
bool SelectImm(const SDValue &operand, SDValue &imm);
@@ -68,6 +75,12 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
switch (Node->getOpcode()) {
case ISD::BRCOND:
return SelectBRCOND(Node);
+ case PTXISD::READ_PARAM:
+ return SelectREADPARAM(Node);
+ case PTXISD::WRITE_PARAM:
+ return SelectWRITEPARAM(Node);
+ case ISD::FrameIndex:
+ return SelectFrameIndex(Node);
default:
return SelectCode(Node);
}
@@ -79,7 +92,7 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Pred = Node->getOperand(1);
SDValue Target = Node->getOperand(2); // branch target
- SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
DebugLoc dl = Node->getDebugLoc();
assert(Target.getOpcode() == ISD::BasicBlock);
@@ -90,6 +103,97 @@ SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
}
+SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Index = Node->getOperand(1);
+
+ int OpCode;
+
+ // Get the type of parameter we are reading
+ EVT VT = Node->getValueType(0);
+ assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::READPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::READPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::READPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::READPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::READPARAMF32;
+ else {
+ assert(Type == MVT::f64 && "Unexpected type!");
+ OpCode = PTX::READPARAMF64;
+ }
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Index, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Value = Node->getOperand(1);
+
+ int OpCode;
+
+ //Node->dumpr(CurDAG);
+
+ // Get the type of parameter we are writing
+ EVT VT = Value->getValueType(0);
+ assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::WRITEPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::WRITEPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::WRITEPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::WRITEPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::WRITEPARAMF32;
+ else if (Type == MVT::f64)
+ OpCode = PTX::WRITEPARAMF64;
+ else
+ llvm_unreachable("Invalid type in SelectWRITEPARAM");
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Value, Pred, PredOp, Chain };
+ SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
+
+ //dbgs() << "SelectWRITEPARAM produced:\n\t";
+ //Ret->dumpr(CurDAG);
+
+ return Ret;
+}
+
+SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ //dbgs() << "Selecting FrameIndex at index " << FI << "\n";
+ //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
+
+ PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+ SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
+ Node->getValueType(0));
+
+ return FrameSymbol.getNode();
+}
+
// Match memory operand of the form [reg+reg]
bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
@@ -107,14 +211,54 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
- if (Addr.getOpcode() != ISD::ADD) {
+ // FrameIndex addresses are handled separately
+ //errs() << "SelectADDRri: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Success\n";
+ return true;
+ }
+
+ /*if (Addr.getNumOperands() == 1) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ errs() << "Success\n";
+ return true;
+ }*/
+
+ //errs() << "SelectADDRri fails on: ";
+ //Addr.getNode()->dumpr();
+
+ if (isImm(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ //errs() << "Success\n";
+ return true;
+
+ /*if (Addr.getOpcode() != ISD::ADD) {
// let SelectADDRii handle the [imm] case
if (isImm(Addr))
return false;
// it is [reg]
assert(Addr.getValueType().isSimple() && "Type must be simple");
-
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
@@ -136,7 +280,7 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
}
// neither [reg+imm] nor [imm+reg]
- return false;
+ return false;*/
}
// Match memory operand of the form [imm+imm] and [imm]
@@ -160,6 +304,36 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
return false;
}
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ //errs() << "SelectADDRlocal: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ //errs() << "Success\n";
+ return true;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (!isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Offset: ";
+ //Offset.getNode()->dumpr();
+ //errs() << "Success\n";
+ return true;
+ }
+
+ //errs() << "Failure\n";
+ return false;
+}
+
bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
return ConstantSDNode::classof(operand.getNode());
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
index 6fcf710..3307d91 100644
--- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -16,23 +16,19 @@
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
+#include "llvm/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "PTXGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
@@ -47,57 +43,58 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setMinFunctionAlignment(2);
-
+
////////////////////////////////////
/////////// Expansion //////////////
////////////////////////////////////
-
+
// (any/zero/sign) extload => load + (any/zero/sign) extend
-
+
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
-
+
// f32 extload => load + fextend
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
// f64 truncstore => trunc + store
-
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
// sign_extend_inreg => sign_extend
-
+
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
+
// br_cc => brcond
-
+
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
// select_cc => setcc
-
+
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
-
+
////////////////////////////////////
//////////// Legal /////////////////
////////////////////////////////////
-
+
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
+
////////////////////////////////////
//////////// Custom ////////////////
////////////////////////////////////
-
+
// customise setcc to use bitwise logic if possible
-
+
setOperationAction(ISD::SETCC, MVT::i1, Custom);
// customize translation of memory addresses
-
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -105,7 +102,7 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
computeRegisterProperties();
}
-MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
@@ -130,10 +127,16 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PTXISD::LOAD_PARAM";
case PTXISD::STORE_PARAM:
return "PTXISD::STORE_PARAM";
+ case PTXISD::READ_PARAM:
+ return "PTXISD::READ_PARAM";
+ case PTXISD::WRITE_PARAM:
+ return "PTXISD::WRITE_PARAM";
case PTXISD::EXIT:
return "PTXISD::EXIT";
case PTXISD::RET:
return "PTXISD::RET";
+ case PTXISD::CALL:
+ return "PTXISD::CALL";
}
}
@@ -149,7 +152,7 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- // Look for X == 0, X == 1, X != 0, or X != 1
+ // Look for X == 0, X == 1, X != 0, or X != 1
// We can simplify these to bitwise logic
if (Op1.getOpcode() == ISD::Constant &&
@@ -197,6 +200,7 @@ SDValue PTXTargetLowering::
MachineFunction &MF = DAG.getMachineFunction();
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
switch (CallConv) {
default:
@@ -216,68 +220,34 @@ SDValue PTXTargetLowering::
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
// We just need to emit the proper LOAD_PARAM ISDs
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-
assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
"Kernels cannot take pred operands");
+ unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addArgumentParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
- DAG.getTargetConstant(i, MVT::i32));
+ ParamValue);
InVals.push_back(ArgValue);
-
- // Instead of storing a physical register in our argument list, we just
- // store the total size of the parameter, in bits. The ASM printer
- // knows how to process this.
- MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
}
}
else {
- // For device functions, we use the PTX calling convention to do register
- // assignments then create CopyFromReg ISDs for the allocated registers
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
- *DAG.getContext());
-
- CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
-
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-
- CCValAssign& VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- TargetRegisterClass* TRC = 0;
-
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
-
- // Determine which register class we need
- if (RegVT == MVT::i1) {
- TRC = PTX::RegPredRegisterClass;
- }
- else if (RegVT == MVT::i16) {
- TRC = PTX::RegI16RegisterClass;
- }
- else if (RegVT == MVT::i32) {
- TRC = PTX::RegI32RegisterClass;
- }
- else if (RegVT == MVT::i64) {
- TRC = PTX::RegI64RegisterClass;
- }
- else if (RegVT == MVT::f32) {
- TRC = PTX::RegF32RegisterClass;
- }
- else if (RegVT == MVT::f64) {
- TRC = PTX::RegF64RegisterClass;
- }
- else {
- llvm_unreachable("Unknown parameter type");
- }
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT RegVT = Ins[i].VT;
+ TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ // Use a unique index in the instruction to prevent instruction folding.
+ // Yes, this is a hack.
+ SDValue Index = DAG.getTargetConstant(i, MVT::i32);
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+ SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
+ Index);
- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
InVals.push_back(ArgValue);
- MFI->addArgReg(VA.getLocReg());
+ MFI->addArgReg(Reg);
}
}
@@ -301,41 +271,66 @@ SDValue PTXTargetLowering::
assert(Outs.size() == 0 && "Kernel must return void.");
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
case CallingConv::PTX_Device:
- //assert(Outs.size() <= 1 && "Can at most return one value.");
+ assert(Outs.size() <= 1 && "Can at most return one value.");
break;
}
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
SDValue Flag;
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
- // Even though we could use the .param space for return arguments for
- // device functions if SM >= 2.0 and the number of return arguments is
- // only 1, we just always use registers since this makes the codegen
- // easier.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
-
- CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
-
- for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
- CCValAssign& VA = RVLocs[i];
-
- assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+ if (ST.useParamSpaceForDeviceArgs()) {
+ assert(Outs.size() < 2 && "Device functions can return at most one value");
+
+ if (Outs.size() == 1) {
+ unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
+ unsigned Param = PM.addReturnParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[0]);
+ }
+ } else {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT RegVT = Outs[i].VT;
+ TargetRegisterClass* TRC = 0;
- unsigned Reg = VA.getLocReg();
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
- DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
- Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
+ SDValue OutReg = DAG.getRegister(Reg, RegVT);
- // Guarantee that all emitted copies are stuck together,
- // avoiding something bad
- Flag = Chain.getValue(1);
+ Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
- MFI->addRetReg(Reg);
+ MFI->addRetReg(Reg);
+ }
}
if (Flag.getNode() == 0) {
@@ -345,3 +340,83 @@ SDValue PTXTargetLowering::
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
}
}
+
+SDValue
+PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
+ "Calls are not handled for the target device");
+
+ std::vector<SDValue> Ops;
+ // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
+ Ops.resize(Outs.size() + Ins.size() + 4);
+
+ Ops[0] = Chain;
+
+ // Identify the callee function
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device &&
+ "PTX function calls must be to PTX device functions");
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ Ops[Ins.size()+2] = Callee;
+
+ // Generate STORE_PARAM nodes for each function argument. In PTX, function
+ // arguments are explicitly stored into .param variables and passed as
+ // arguments. There is no register/stack-based calling convention in PTX.
+ Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32);
+ for (unsigned i = 0; i != OutVals.size(); ++i) {
+ unsigned Size = OutVals[i].getValueType().getSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[i]);
+ Ops[i+Ins.size()+4] = ParamValue;
+ }
+
+ std::vector<SDValue> InParams;
+
+ // Generate list of .param variables to hold the return value(s).
+ Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ unsigned Size = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Ops[i+2] = ParamValue;
+ InParams.push_back(ParamValue);
+ }
+
+ Ops[0] = Chain;
+
+ // Create the CALL node.
+ Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
+
+ // Create the LOAD_PARAM nodes that retrieve the function return value(s).
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ InParams[i]);
+ InVals.push_back(Load);
+ }
+
+ return Chain;
+}
+
+unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
+ // All arguments consist of one "register," regardless of the type.
+ return 1;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
index 4318541..4d25665 100644
--- a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -26,9 +26,12 @@ namespace PTXISD {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
LOAD_PARAM,
STORE_PARAM,
+ READ_PARAM,
+ WRITE_PARAM,
EXIT,
RET,
- COPY_ADDRESS
+ COPY_ADDRESS,
+ CALL
};
} // namespace PTXISD
@@ -60,7 +63,19 @@ class PTXTargetLowering : public TargetLowering {
DebugLoc dl,
SelectionDAG &DAG) const;
- virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
private:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
index 8cee351..397fdc3 100644
--- a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
@@ -7,12 +7,39 @@
//
//===----------------------------------------------------------------------===//
-// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+
+// Rounding Mode Specifier
+/*class RoundingMode<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def RndDefault : RoundingMode<0>;
+def RndNearestEven : RoundingMode<1>;
+def RndNearestZero : RoundingMode<2>;
+def RndNegInf : RoundingMode<3>;
+def RndPosInf : RoundingMode<4>;
+def RndApprox : RoundingMode<5>;*/
+
+
+// Rounding Mode Operand
+def RndMode : Operand<i32> {
+ let PrintMethod = "printRoundingMode";
+}
+
+def RndDefault : PatLeaf<(i32 0)>;
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
// Leave PrintMethod empty; predicate printing is defined elsewhere.
def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
- (ops (i1 zero_reg), (i32 0))>;
+ (ops (i1 zero_reg), (i32 2))>;
+def RndModeOperand : Operand<OtherVT> {
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Instruction Types
let Namespace = "PTX" in {
+
class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
: Instruction {
dag OutOperandList = oops;
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
index 425265a..1b947a5 100644
--- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
@@ -16,10 +16,11 @@
#include "PTX.h"
#include "PTXInstrInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRINFO_CTOR
@@ -47,8 +48,13 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const {
- for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
- if (map[i].cls->contains(DstReg, SrcReg)) {
+
+ const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
+ // "Invalid register copy between two register classes");
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
+ if (map[i].cls == MRI.getRegClass(DstReg)) {
const MCInstrDesc &MCID = get(map[i].opcode);
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
addReg(SrcReg, getKillRegState(KillSrc));
@@ -161,7 +167,7 @@ DefinesPredicate(MachineInstr *MI,
return false;
Pred.push_back(MO);
- Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
return true;
}
@@ -277,7 +283,7 @@ InsertBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(PTX::BRAdp))
.addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 2;
} else if (Cond.size()) {
BuildMI(&MBB, DL, get(PTX::BRAdp))
@@ -285,7 +291,7 @@ InsertBranch(MachineBasicBlock &MBB,
return 1;
} else {
BuildMI(&MBB, DL, get(PTX::BRAd))
- .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL);
+ .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 1;
}
}
@@ -296,34 +302,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineInstr& MI = *MII;
- DebugLoc DL = MI.getDebugLoc();
-
- DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
-
- int OpCode;
-
- // Select the appropriate opcode based on the register class
- if (RC == PTX::RegI16RegisterClass) {
- OpCode = PTX::STACKSTOREI16;
- } else if (RC == PTX::RegI32RegisterClass) {
- OpCode = PTX::STACKSTOREI32;
- } else if (RC == PTX::RegI64RegisterClass) {
- OpCode = PTX::STACKSTOREI32;
- } else if (RC == PTX::RegF32RegisterClass) {
- OpCode = PTX::STACKSTOREF32;
- } else if (RC == PTX::RegF64RegisterClass) {
- OpCode = PTX::STACKSTOREF64;
- } else {
- llvm_unreachable("Unknown PTX register class!");
- }
-
- // Build the store instruction (really a mov)
- MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
- MIB.addFrameIndex(FrameIdx);
- MIB.addReg(SrcReg);
-
- AddDefaultPredicate(MIB);
+ assert(false && "storeRegToStackSlot should not be called for PTX");
}
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -331,34 +310,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineInstr& MI = *MII;
- DebugLoc DL = MI.getDebugLoc();
-
- DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
-
- int OpCode;
-
- // Select the appropriate opcode based on the register class
- if (RC == PTX::RegI16RegisterClass) {
- OpCode = PTX::STACKLOADI16;
- } else if (RC == PTX::RegI32RegisterClass) {
- OpCode = PTX::STACKLOADI32;
- } else if (RC == PTX::RegI64RegisterClass) {
- OpCode = PTX::STACKLOADI32;
- } else if (RC == PTX::RegF32RegisterClass) {
- OpCode = PTX::STACKLOADF32;
- } else if (RC == PTX::RegF64RegisterClass) {
- OpCode = PTX::STACKLOADF64;
- } else {
- llvm_unreachable("Unknown PTX register class!");
- }
-
- // Build the load instruction (really a mov)
- MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
- MIB.addReg(DestReg);
- MIB.addFrameIndex(FrameIdx);
-
- AddDefaultPredicate(MIB);
+ assert(false && "loadRegFromStackSlot should not be called for PTX");
}
// static helper routines
@@ -367,7 +319,7 @@ MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
@@ -376,7 +328,7 @@ MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
- SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, Op2, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
@@ -384,7 +336,7 @@ GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
if (MI->findFirstPredOperandIdx() == -1) {
MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
- MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
}
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
index 6bfe906..a3fcea9 100644
--- a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -21,10 +21,6 @@ include "PTXInstrFormats.td"
// Code Generation Predicates
//===----------------------------------------------------------------------===//
-// Addressing
-def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
-def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
-
// Shader Model Support
def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
@@ -43,130 +39,19 @@ def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
-//===----------------------------------------------------------------------===//
-// Instruction Pattern Stuff
-//===----------------------------------------------------------------------===//
-def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::GLOBAL;
- return false;
-}]>;
-
-def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::CONSTANT;
- return false;
-}]>;
-
-def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::LOCAL;
- return false;
-}]>;
-
-def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::PARAMETER;
- return false;
-}]>;
-
-def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::SHARED;
- return false;
-}]>;
-
-def store_global
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::GLOBAL;
- return false;
-}]>;
-
-def store_local
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::LOCAL;
- return false;
-}]>;
-
-def store_parameter
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::PARAMETER;
- return false;
-}]>;
-
-def store_shared
- : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
- const Value *Src;
- const PointerType *PT;
- if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
- (PT = dyn_cast<PointerType>(Src->getType())))
- return PT->getAddressSpace() == PTX::SHARED;
- return false;
-}]>;
-
-// Addressing modes.
-def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
-def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
-def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
-def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
-def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
-
-// Address operands
-def MEMri32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI32, i32imm);
-}
-def MEMri64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops RegI64, i64imm);
-}
-def MEMii32 : Operand<i32> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i32imm, i32imm);
-}
-def MEMii64 : Operand<i64> {
- let PrintMethod = "printMemOperand";
- let MIOperandInfo = (ops i64imm, i64imm);
-}
-// The operand here does not correspond to an actual address, so we
-// can use i32 in 64-bit address modes.
-def MEMpi : Operand<i32> {
- let PrintMethod = "printParamOperand";
- let MIOperandInfo = (ops i32imm);
-}
-def MEMret : Operand<i32> {
- let PrintMethod = "printReturnOperand";
- let MIOperandInfo = (ops i32imm);
-}
+
+// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart,
+// [SDNPHasChain, SDNPOutGlue]>;
+// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd,
+// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def PTXcall : SDNode<"PTXISD::CALL", SDTNone,
+ [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>;
+
// Branch & call targets have OtherVT type.
def brtarget : Operand<OtherVT>;
@@ -189,87 +74,73 @@ def PTXret
def PTXcopyaddress
: SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
-// Load/store .param space
-def PTXloadparam
- : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
-def PTXstoreparam
- : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
- [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
+// For floating-point instructions, we cannot just embed the pattern into the
+// instruction definition since we need to muck around with the rounding mode,
+// and I do not know how to insert constants into instructions directly from
+// pattern matches.
+
//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
-multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
+multiclass PTX_FLOAT_2OP<string opcstr> {
def rr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- !strconcat(opcstr, ".f32\t$d, $a"),
- [(set RegF32:$d, (opnode RegF32:$a))]>;
+ (ins RndMode:$r, RegF32:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
def ri32 : InstPTX<(outs RegF32:$d),
- (ins f32imm:$a),
- !strconcat(opcstr, ".f32\t$d, $a"),
- [(set RegF32:$d, (opnode fpimm:$a))]>;
+ (ins RndMode:$r, f32imm:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
def rr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- !strconcat(opcstr, ".f64\t$d, $a"),
- [(set RegF64:$d, (opnode RegF64:$a))]>;
+ (ins RndMode:$r, RegF64:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
def ri64 : InstPTX<(outs RegF64:$d),
- (ins f64imm:$a),
- !strconcat(opcstr, ".f64\t$d, $a"),
- [(set RegF64:$d, (opnode fpimm:$a))]>;
+ (ins RndMode:$r, f64imm:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
}
//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
-multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
+multiclass PTX_FLOAT_3OP<string opcstr> {
def rr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- !strconcat(opcstr, ".f32\t$d, $a, $b"),
- [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
def ri32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- !strconcat(opcstr, ".f32\t$d, $a, $b"),
- [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>;
+ (ins RndMode:$r, RegF32:$a, f32imm:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
def rr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- !strconcat(opcstr, ".f64\t$d, $a, $b"),
- [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
def ri64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- !strconcat(opcstr, ".f64\t$d, $a, $b"),
- [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>;
+ (ins RndMode:$r, RegF64:$a, f64imm:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
}
//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
-multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
+multiclass PTX_FLOAT_4OP<string opcstr> {
def rrr32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b, RegF32:$c),
- !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
- [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
- RegF32:$b),
- RegF32:$c))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
def rri32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b, f32imm:$c),
- !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
- [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
- RegF32:$b),
- fpimm:$c))]>;
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rii32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
def rrr64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b, RegF64:$c),
- !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
- [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
- RegF64:$b),
- RegF64:$c))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
def rri64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b, f64imm:$c),
- !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
- [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
- RegF64:$b),
- fpimm:$c))]>;
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+ def rii64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
}
-multiclass INT3<string opcstr, SDNode opnode> {
+//===- Integer Instructions - 3 Operand Form ------------------------------===//
+multiclass PTX_INT3<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RegI16:$d),
(ins RegI16:$a, RegI16:$b),
!strconcat(opcstr, ".u16\t$d, $a, $b"),
@@ -296,6 +167,35 @@ multiclass INT3<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
}
+//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===//
+multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===//
multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
def ripreds : InstPTX<(outs RegPred:$d),
(ins RegPred:$a, i1imm:$b),
@@ -331,7 +231,8 @@ multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
}
-multiclass INT3ntnc<string opcstr, SDNode opnode> {
+//===- Integer Shift Instructions - 3 Operand Form ------------------------===//
+multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> {
def rr16 : InstPTX<(outs RegI16:$d),
(ins RegI16:$a, RegI16:$b),
!strconcat(opcstr, "16\t$d, $a, $b"),
@@ -370,6 +271,7 @@ multiclass INT3ntnc<string opcstr, SDNode opnode> {
[(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
}
+//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===//
multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
CondCode cmp, string cmpstr> {
// TODO support 5-operand format: p|q, a, b, c
@@ -385,56 +287,77 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
def rr_and_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_and_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
def rr_or_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_or_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
def rr_xor_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
def ri_xor_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
def rr_and_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_and_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_or_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
def ri_xor_not_r
: InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
}
-multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
+//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===//
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls,
CondCode ucmp, CondCode ocmp, string cmpstr> {
// TODO support 5-operand format: p|q, a, b, c
@@ -447,137 +370,110 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
!strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
[(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+ def ri_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>;
+ def ri_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>;
+
def rr_and_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
def rr_and_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
def rr_or_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
def rr_or_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
[(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
def rr_xor_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
def rr_xor_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
def rr_and_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_and_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_or_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r_u
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
def rr_xor_not_r_o
: InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
- !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
- [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
}
-multiclass PTX_SELP<RegisterClass RC, string regclsname> {
+//===- Select Predicate Instructions - 4 Operand Form ---------------------===//
+multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls,
+ SDNode immnode> {
def rr
: InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
!strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
[(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
+ def ri
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>;
+ def ii
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>;
}
-multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
- def rr32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs RC:$d),
- (ins MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs RC:$d),
- (ins MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs RC:$d),
- (ins MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs RC:$d),
- (ins MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
- [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
-}
-
-multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
- defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
- defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
- defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
- defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
- defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
-}
-
-multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
- def rr32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
- def rr64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
- def ri32 : InstPTX<(outs),
- (ins RC:$d, MEMri32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
- def ri64 : InstPTX<(outs),
- (ins RC:$d, MEMri64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
- def ii32 : InstPTX<(outs),
- (ins RC:$d, MEMii32:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
- def ii64 : InstPTX<(outs),
- (ins RC:$d, MEMii64:$a),
- !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
- [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
-}
-multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
- defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
- defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
- defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
- defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
- defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
-}
//===----------------------------------------------------------------------===//
// Instructions
@@ -585,118 +481,61 @@ multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
///===- Integer Arithmetic Instructions -----------------------------------===//
-defm ADD : INT3<"add", add>;
-defm SUB : INT3<"sub", sub>;
-defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
-defm DIV : INT3<"div", udiv>;
-defm REM : INT3<"rem", urem>;
+defm ADD : PTX_INT3<"add", add>;
+defm SUB : PTX_INT3<"sub", sub>;
+defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : PTX_INT3<"div", udiv>;
+defm SDIV : PTX_INT3_SIGNED<"div", sdiv>;
+defm REM : PTX_INT3<"rem", urem>;
///===- Floating-Point Arithmetic Instructions ----------------------------===//
-// Standard Unary Operations
-defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
+// FNEG
+defm FNEG : PTX_FLOAT_2OP<"neg">;
// Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
-defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
-defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
-
-// For floating-point division:
-// SM_13+ defaults to .rn for f32 and f64,
-// SM10 must *not* provide a rounding
-
-// TODO:
-// - Allow user selection of rounding modes for fdiv
-// - Add support for -prec-div=false (.approx)
-
-def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- "div.rn.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- "div.rn.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, RegF32:$b),
- "div.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a, f32imm:$b),
- "div.f32\t$d, $a, $b",
- [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-
-def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- "div.rn.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- "div.rn.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
- Requires<[FDivNeedsRoundingMode]>;
-def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, RegF64:$b),
- "div.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a, f64imm:$b),
- "div.f64\t$d, $a, $b",
- [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
- Requires<[FDivNoRoundingMode]>;
-
-
+defm FADD : PTX_FLOAT_3OP<"add">;
+defm FSUB : PTX_FLOAT_3OP<"sub">;
+defm FMUL : PTX_FLOAT_3OP<"mul">;
+defm FDIV : PTX_FLOAT_3OP<"div">;
// Multi-operation hybrid instructions
+defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>;
-// The selection of mad/fma is tricky. In some cases, they are the *same*
-// instruction, but in other cases we may prefer one or the other. Also,
-// different PTX versions differ on whether rounding mode flags are required.
-// In the short term, mad is supported on all PTX versions and we use a
-// default rounding mode no matter what shader model or PTX version.
-// TODO: Allow the rounding mode to be selectable through llc.
-defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
- Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
-defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
- Requires<[FMadNoRoundingMode, SupportsFMA]>;
///===- Floating-Point Intrinsic Instructions -----------------------------===//
-def FSQRT32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "sqrt.rn.f32\t$d, $a",
- [(set RegF32:$d, (fsqrt RegF32:$a))]>;
-
-def FSQRT64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "sqrt.rn.f64\t$d, $a",
- [(set RegF64:$d, (fsqrt RegF64:$a))]>;
-
-def FSIN32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "sin.approx.f32\t$d, $a",
- [(set RegF32:$d, (fsin RegF32:$a))]>;
+// SQRT
+def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+
+// SIN
+def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sin$r.f64\t$d, $a", []>;
+def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sin$r.f64\t$d, $a", []>;
+
+// COS
+def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "cos$r.f64\t$d, $a", []>;
+def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "cos$r.f64\t$d, $a", []>;
-def FSIN64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "sin.approx.f64\t$d, $a",
- [(set RegF64:$d, (fsin RegF64:$a))]>;
-def FCOS32 : InstPTX<(outs RegF32:$d),
- (ins RegF32:$a),
- "cos.approx.f32\t$d, $a",
- [(set RegF32:$d, (fcos RegF32:$a))]>;
-
-def FCOS64 : InstPTX<(outs RegF64:$d),
- (ins RegF64:$a),
- "cos.approx.f64\t$d, $a",
- [(set RegF64:$d, (fcos RegF64:$a))]>;
///===- Comparison and Selection Instructions -----------------------------===//
@@ -744,35 +583,35 @@ defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
// Compare f32
-defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">;
-defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">;
-defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">;
-defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">;
-defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">;
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">;
// Compare f64
-defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">;
-defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">;
-defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">;
-defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">;
-defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">;
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">;
// .selp
-defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">;
-defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">;
-defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">;
-defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">;
-defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">;
+defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>;
+defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>;
+defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>;
+defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>;
+defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>;
///===- Logic and Shift Instructions --------------------------------------===//
-defm SHL : INT3ntnc<"shl.b", PTXshl>;
-defm SRL : INT3ntnc<"shr.u", PTXsrl>;
-defm SRA : INT3ntnc<"shr.s", PTXsra>;
+defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>;
+defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>;
defm AND : PTX_LOGIC<"and", and>;
defm OR : PTX_LOGIC<"or", or>;
@@ -780,6 +619,24 @@ defm XOR : PTX_LOGIC<"xor", xor>;
///===- Data Movement and Conversion Instructions -------------------------===//
+// any_extend
+// Implement the anyext instruction in terms of the PTX cvt instructions.
+//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>;
+
+// bitconvert
+// These instructions implement the bit-wise conversion between integer and
+// floating-point types.
+def MOVi32f32
+ : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>;
+def MOVf32i32
+ : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>;
+def MOVi64f64
+ : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>;
+def MOVf64i64
+ : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>;
+
let neverHasSideEffects = 1 in {
def MOVPREDrr
: InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
@@ -825,278 +682,332 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
[(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
}
-// Loads
-defm LDg : PTX_LD_ALL<"ld.global", load_global>;
-defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
-defm LDl : PTX_LD_ALL<"ld.local", load_local>;
-defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+// PTX cvt instructions
+// Note all of these may actually be used, we just define all possible patterns
+// here (that make sense).
+// FIXME: Can we collapse this somehow into a multiclass def?
+
+// To i16
+def CVTu16u32
+ : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>;
+def CVTu16u64
+ : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
+def CVTu16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u16.f32\t$d, $a", []>;
+def CVTs16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s16.f32\t$d, $a", []>;
+def CVTu16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u16.f64\t$d, $a", []>;
+def CVTs16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s16.f64\t$d, $a", []>;
+
+// To i32
+def CVTu32u16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>;
+def CVTs32s16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>;
+def CVTu32u64
+ : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
+def CVTu32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u32.f32\t$d, $a", []>;
+def CVTs32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s32.f32\t$d, $a", []>;
+def CVTu32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u32.f64\t$d, $a", []>;
+def CVTs32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s32.f64\t$d, $a", []>;
+
+// To i64
+def CVTu64u16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>;
+def CVTs64s16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>;
+def CVTu64u32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>;
+def CVTs64s32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
+def CVTu64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u64.f32\t$d, $a", []>;
+def CVTs64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s64.f32\t$d, $a", []>;
+def CVTu64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u64.f64\t$d, $a", []>;
+def CVTs64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s64.f64\t$d, $a", []>;
+
+// To f32
+def CVTf32u16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.u16\t$d, $a", []>;
+def CVTf32s16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.s16\t$d, $a", []>;
+def CVTf32u32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.u32\t$d, $a", []>;
+def CVTf32s32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.s32\t$d, $a", []>;
+def CVTf32u64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.u64\t$d, $a", []>;
+def CVTf32s64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.s64\t$d, $a", []>;
+def CVTf32f64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.f32.f64\t$d, $a", []>;
+
+// To f64
+def CVTf64u16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.u16\t$d, $a", []>;
+def CVTf64s16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.s16\t$d, $a", []>;
+def CVTf64u32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.u32\t$d, $a", []>;
+def CVTf64s32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.s32\t$d, $a", []>;
+def CVTf64u64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.u64\t$d, $a", []>;
+def CVTf64s64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.s64\t$d, $a", []>;
+def CVTf64f32
+ : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
+
+ ///===- Control Flow Instructions -----------------------------------------===//
-// These instructions are used to load/store from the .param space for
-// device and kernel parameters
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def BRAd
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
-let hasSideEffects = 1 in {
- def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
- "ld.param.pred\t$d, [$a]",
- [(set RegPred:$d, (PTXloadparam timm:$a))]>;
- def LDpiU16 : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
- "ld.param.u16\t$d, [$a]",
- [(set RegI16:$d, (PTXloadparam timm:$a))]>;
- def LDpiU32 : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
- "ld.param.u32\t$d, [$a]",
- [(set RegI32:$d, (PTXloadparam timm:$a))]>;
- def LDpiU64 : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
- "ld.param.u64\t$d, [$a]",
- [(set RegI64:$d, (PTXloadparam timm:$a))]>;
- def LDpiF32 : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
- "ld.param.f32\t$d, [$a]",
- [(set RegF32:$d, (PTXloadparam timm:$a))]>;
- def LDpiF64 : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
- "ld.param.f64\t$d, [$a]",
- [(set RegF64:$d, (PTXloadparam timm:$a))]>;
-
- def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
- "st.param.pred\t[$d], $a",
- [(PTXstoreparam timm:$d, RegPred:$a)]>;
- def STpiU16 : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
- "st.param.u16\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI16:$a)]>;
- def STpiU32 : InstPTX<(outs), (ins MEMret:$d, RegI32:$a),
- "st.param.u32\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI32:$a)]>;
- def STpiU64 : InstPTX<(outs), (ins MEMret:$d, RegI64:$a),
- "st.param.u64\t[$d], $a",
- [(PTXstoreparam timm:$d, RegI64:$a)]>;
- def STpiF32 : InstPTX<(outs), (ins MEMret:$d, RegF32:$a),
- "st.param.f32\t[$d], $a",
- [(PTXstoreparam timm:$d, RegF32:$a)]>;
- def STpiF64 : InstPTX<(outs), (ins MEMret:$d, RegF64:$a),
- "st.param.f64\t[$d], $a",
- [(PTXstoreparam timm:$d, RegF64:$a)]>;
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a RegPred register) here
+ def BRAdp
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+ [/*(brcond pred:$_p, bb:$d)*/]>;
}
-// Stores
-defm STg : PTX_ST_ALL<"st.global", store_global>;
-defm STl : PTX_ST_ALL<"st.local", store_local>;
-defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+ def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
+}
-// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
-// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
-// TODO: Do something with st.param if/when it is needed.
+let hasSideEffects = 1 in {
+ def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>;
+}
-// Conversion to pred
-// PTX does not directly support converting to a predicate type, so we fake it
-// by performing a greater-than test between the value and zero. This follows
-// the C convention that any non-zero value is equivalent to 'true'.
-def CVT_pred_u16
- : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI16:$a))]>;
+///===- Parameter Passing Pseudo-Instructions -----------------------------===//
+
+def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
+ "mov.pred\t$a, %param$b", []>;
+def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
+ "mov.b16\t$a, %param$b", []>;
+def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
+ "mov.b32\t$a, %param$b", []>;
+def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
+ "mov.b64\t$a, %param$b", []>;
+def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
+ "mov.f32\t$a, %param$b", []>;
+def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
+ "mov.f64\t$a, %param$b", []>;
+
+def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
+def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
+def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>;
+def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
+def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
+def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
-def CVT_pred_u32
- : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI32:$a))]>;
-def CVT_pred_u64
- : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
- [(set RegPred:$d, (trunc RegI64:$a))]>;
+//===----------------------------------------------------------------------===//
+// Instruction Selection Patterns
+//===----------------------------------------------------------------------===//
-def CVT_pred_f32
- : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
- [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
+// FADD
+def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)),
+ (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)),
+ (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)),
+ (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)),
+ (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FSUB
+def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)),
+ (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)),
+ (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)),
+ (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)),
+ (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL
+def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)),
+ (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)),
+ (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)),
+ (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)),
+ (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FDIV
+def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)),
+ (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)),
+ (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)),
+ (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
+ (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL+FADD
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
+ (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>;
+
+// FNEG
+def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>;
+
+// FSQRT
+def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>;
+
+// FSIN
+def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>;
+
+// FCOS
+def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>;
+
+// Type conversion notes:
+// - PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+// - PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero. This follows
+// the C convention that any non-zero value is equivalent to 'true'.
-def CVT_pred_f64
- : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
- [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
+// Conversion to pred
+def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>;
+def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>;
+def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>;
+def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>;
+def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>;
// Conversion to u16
-// PTX does not directly support converting a predicate to a value, so we
-// use a select instruction to select either 0 or 1 (integer or fp) based
-// on the truth value of the predicate.
-def CVT_u16_preda
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (anyext RegPred:$a))]>;
-
-def CVT_u16_pred
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (zext RegPred:$a))]>;
-
-def CVT_u16_preds
- : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
- [(set RegI16:$d, (sext RegPred:$a))]>;
-
-def CVT_u16_u32
- : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
- [(set RegI16:$d, (trunc RegI32:$a))]>;
-
-def CVT_u16_u64
- : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a",
- [(set RegI16:$d, (trunc RegI64:$a))]>;
-
-def CVT_u16_f32
- : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
- [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u16_f64
- : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
- [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
+def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
+def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
+def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
+def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
// Conversion to u32
-
-def CVT_u32_pred
- : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
- [(set RegI32:$d, (zext RegPred:$a))]>;
-
-def CVT_u32_b16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
- [(set RegI32:$d, (anyext RegI16:$a))]>;
-
-def CVT_u32_u16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
- [(set RegI32:$d, (zext RegI16:$a))]>;
-
-def CVT_u32_preds
- : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
- [(set RegI32:$d, (sext RegPred:$a))]>;
-
-def CVT_u32_s16
- : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
- [(set RegI32:$d, (sext RegI16:$a))]>;
-
-def CVT_u32_u64
- : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
- [(set RegI32:$d, (trunc RegI64:$a))]>;
-
-def CVT_u32_f32
- : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
- [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u32_f64
- : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
- [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>;
+def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
+def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
+def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
// Conversion to u64
-
-def CVT_u64_pred
- : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
- [(set RegI64:$d, (zext RegPred:$a))]>;
-
-def CVT_u64_preds
- : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
- [(set RegI64:$d, (sext RegPred:$a))]>;
-
-def CVT_u64_u16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
- [(set RegI64:$d, (zext RegI16:$a))]>;
-
-def CVT_u64_s16
- : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
- [(set RegI64:$d, (sext RegI16:$a))]>;
-
-def CVT_u64_u32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
- [(set RegI64:$d, (zext RegI32:$a))]>;
-
-def CVT_u64_s32
- : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
- [(set RegI64:$d, (sext RegI32:$a))]>;
-
-def CVT_u64_f32
- : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
- [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
-
-def CVT_u64_f64
- : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
- [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
+def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a,
+ 0xFFFFFFFFFFFFFFFF, 0)>;
+def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>;
+def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
+def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
// Conversion to f32
-
-def CVT_f32_pred
- : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
- "selp.f32\t$d, 0F3F800000, 0F00000000, $a", // 1.0
- [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
-
-def CVT_f32_u16
- : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
-
-def CVT_f32_u32
- : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI32:$a))]>;
-
-def CVT_f32_u64
- : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a",
- [(set RegF32:$d, (uint_to_fp RegI64:$a))]>;
-
-def CVT_f32_f64
- : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a",
- [(set RegF32:$d, (fround RegF64:$a))]>;
+def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
+ (MOVf32i32 0x3F800000), (MOVf32i32 0))>;
+def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
+def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
// Conversion to f64
+def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
+ (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
+def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
+def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
-def CVT_f64_pred
- : InstPTX<(outs RegF64:$d), (ins RegPred:$a),
- "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a", // 1.0
- [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
-
-def CVT_f64_u16
- : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
-
-def CVT_f64_u32
- : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI32:$a))]>;
-
-def CVT_f64_u64
- : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a",
- [(set RegF64:$d, (uint_to_fp RegI64:$a))]>;
-
-def CVT_f64_f32
- : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a",
- [(set RegF64:$d, (fextend RegF32:$a))]>;
-
-///===- Control Flow Instructions -----------------------------------------===//
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
- def BRAd
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
-}
-
-let isBranch = 1, isTerminator = 1 in {
- // FIXME: The pattern part is blank because I cannot (or do not yet know
- // how to) use the first operand of PredicateOperand (a RegPred register) here
- def BRAdp
- : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
- [/*(brcond pred:$_p, bb:$d)*/]>;
-}
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
- def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
-}
-
-///===- Spill Instructions ------------------------------------------------===//
-// Special instructions used for stack spilling
-def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
- "mov.u16\ts$d, $a", []>;
-def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
- "mov.u32\ts$d, $a", []>;
-def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
- "mov.u64\ts$d, $a", []>;
-def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
- "mov.f32\ts$d, $a", []>;
-def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
- "mov.f64\ts$d, $a", []>;
-
-def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
- "mov.u16\t$d, s$a", []>;
-def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
- "mov.u32\t$d, s$a", []>;
-def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
- "mov.u64\t$d, s$a", []>;
-def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
- "mov.f32\t$d, s$a", []>;
-def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
- "mov.f64\t$d, s$a", []>;
///===- Intrinsic Instructions --------------------------------------------===//
-
include "PTXIntrinsicInstrInfo.td"
+
+///===- Load/Store Instructions -------------------------------------------===//
+include "PTXInstrLoadStore.td"
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
new file mode 100644
index 0000000..9b4f56c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
@@ -0,0 +1,278 @@
+//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX load/store instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Addressing Predicates
+// We have to differentiate between 32- and 64-bit pointer types
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments for Loads/Stores
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Constant;
+ return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+def store_global
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def store_shared
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
+def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
+
+// Address operands
+def MEMri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI64, i64imm);
+}
+def LOCALri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def LOCALri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+def MEMii32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+ let PrintMethod = "printParamOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+def MEMret : Operand<i32> {
+ let PrintMethod = "printReturnOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// Load/store .param space
+def PTXloadparam
+ : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+ : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+def PTXreadparam
+ : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXwriteparam
+ : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Classes for loads/stores
+//===----------------------------------------------------------------------===//
+multiclass PTX_LD<string opstr, string typestr,
+ RegisterClass RC, PatFrag pat_load> {
+ def rr32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs RC:$d),
+ (ins MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs RC:$d),
+ (ins MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
+ PatFrag pat_store> {
+ def rr32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs),
+ (ins RC:$d, MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs),
+ (ins RC:$d, MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
+ def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal32:$a))]>;
+ def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal64:$a))]>;
+ def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal32:$a)]>;
+ def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal64:$a)]>;
+}
+
+multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
+ let hasSideEffects = 1 in {
+ def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
+ !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (PTXloadparam texternalsym:$a))]>;
+ def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
+ !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
+ [(PTXstoreparam texternalsym:$d, RC:$a)]>;
+ }
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for loads/stores
+//===----------------------------------------------------------------------===//
+
+// Global/shared stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// Global/shared/constant loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// Param loads/stores
+defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
+defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
+defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
+defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
+defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
+defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
+
+// Local loads/stores
+defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
+defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
+defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
+defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
+defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
+defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
index 8d97909..9de1cb6 100644
--- a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -25,37 +25,63 @@ class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
// TODO Add read vector-version of special registers
-//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>;
-def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>;
-def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>;
-def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>;
-def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>;
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
+// int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
+ int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
+ int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
+ int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
+ int_ptx_read_tid_w>;
-//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>;
-def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>;
-def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>;
-def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>;
-def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>;
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
+// int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
+ int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
+ int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
+ int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
+ int_ptx_read_ntid_w>;
-def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>;
-def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>;
-def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>;
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
+ int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
+ int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
+ int_ptx_read_nwarpid>;
-//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
-def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>;
-def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>;
-def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>;
-def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>;
+//def PTX_READ_CTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
+ int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
+ int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
+ int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
+ int_ptx_read_ctaid_w>;
-//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
-def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>;
-def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>;
-def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>;
-def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>;
+//def PTX_READ_NCTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
+ int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
+ int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
+ int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
+ int_ptx_read_nctaid_w>;
-def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>;
-def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>;
-def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>;
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
+ int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
+ int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
+ int_ptx_read_gridid>;
def PTX_READ_LANEMASK_EQ
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
index b13a3da..468ce93 100644
--- a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -100,7 +100,7 @@ public:
/// @{
virtual void ChangeSection(const MCSection *Section);
- virtual void InitSections() {}
+ virtual void InitSections() { /* PTX does not use sections */ }
virtual void EmitLabel(MCSymbol *Symbol);
@@ -132,7 +132,9 @@ public:
///
/// @param Symbol - The common symbol to emit.
/// @param Size - The size of the common symbol.
- virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ /// @param ByteAlignment - The alignment of the common symbol in bytes.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
@@ -233,7 +235,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- //assert(getCurrentSection() && "Cannot emit before setting section!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
OS << *Symbol << MAI.getLabelSuffix();
EmitEOL();
@@ -283,7 +285,8 @@ void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
-void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
unsigned Size, unsigned ByteAlignment) {}
@@ -510,7 +513,7 @@ void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
// If we have an AsmPrinter, use that to print, otherwise print the MCInst.
if (InstPrinter)
- InstPrinter->printInst(&Inst, OS);
+ InstPrinter->printInst(&Inst, OS, "");
else
Inst.print(OS, &MAI);
EmitEOL();
@@ -533,7 +536,7 @@ namespace llvm {
formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc, bool useCFI,
MCInstPrinter *IP,
- MCCodeEmitter *CE, TargetAsmBackend *TAB,
+ MCCodeEmitter *CE, MCAsmBackend *MAB,
bool ShowInst) {
return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
IP, CE, ShowInst);
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
new file mode 100644
index 0000000..142e639
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
@@ -0,0 +1,32 @@
+//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PTX MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXAsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp;
+ OutMI.addOperand(AP.lowerOperand(MO));
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
index 6fe9e6c..b33a273 100644
--- a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -52,36 +52,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
- DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
-
- DEBUG(dbgs()
- << "PTX::NoRegister == " << PTX::NoRegister << "\n"
- << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
-
- DEBUG(for (unsigned reg = PTX::NoRegister + 1;
- reg < PTX::NUM_TARGET_REGS; ++reg)
- if (MRI.isPhysRegUsed(reg))
- dbgs() << "Used Reg: " << reg << "\n";);
-
- // FIXME: This is a slow linear scanning
- for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
- if (MRI.isPhysRegUsed(reg) &&
- !MFI->isRetReg(reg) &&
- (MFI->isKernel() || !MFI->isArgReg(reg)))
- MFI->addLocalVarReg(reg);
-
- // Notify MachineFunctionInfo that I've done adding local var reg
- MFI->doneAddLocalVar();
-
- DEBUG(for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->argRegBegin(), e = MFI->argRegEnd();
- i != e; ++i)
- dbgs() << "Arg Reg: " << *i << "\n";);
-
- DEBUG(for (PTXMachineFunctionInfo::reg_iterator
- i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
- i != e; ++i)
- dbgs() << "Local Var Reg: " << *i << "\n";);
+ // Generate list of all virtual registers used in this function
+ for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
+ MFI->addVirtualRegister(TRC, Reg);
+ }
return false;
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
index 9d65f5b..3b985f7 100644
--- a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -15,75 +15,148 @@
#define PTX_MACHINE_FUNCTION_INFO_H
#include "PTX.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
+
/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
/// contains private PTX target-specific information for each MachineFunction.
///
class PTXMachineFunctionInfo : public MachineFunctionInfo {
private:
- bool is_kernel;
- std::vector<unsigned> reg_arg, reg_local_var;
- std::vector<unsigned> reg_ret;
- bool _isDoneAddArg;
+ bool IsKernel;
+ DenseSet<unsigned> RegArgs;
+ DenseSet<unsigned> RegRets;
+
+ typedef std::vector<unsigned> RegisterList;
+ typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
+ typedef DenseMap<unsigned, std::string> RegisterNameMap;
+ typedef DenseMap<int, std::string> FrameMap;
+
+ RegisterMap UsedRegs;
+ RegisterNameMap RegNames;
+ FrameMap FrameSymbols;
+
+ PTXParamManager ParamManager;
public:
+ typedef DenseSet<unsigned>::const_iterator reg_iterator;
+
PTXMachineFunctionInfo(MachineFunction &MF)
- : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
- reg_arg.reserve(8);
- reg_local_var.reserve(32);
+ : IsKernel(false) {
+ UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
+ UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
}
- void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
-
- void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
- void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
- void addRetReg(unsigned reg) {
- if (!isRetReg(reg)) {
- reg_ret.push_back(reg);
+ /// getParamManager - Returns the PTXParamManager instance for this function.
+ PTXParamManager& getParamManager() { return ParamManager; }
+ const PTXParamManager& getParamManager() const { return ParamManager; }
+
+ /// setKernel/isKernel - Gets/sets a flag that indicates if this function is
+ /// a PTX kernel function.
+ void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
+ bool isKernel() const { return IsKernel; }
+
+ /// argreg_begin/argreg_end - Returns iterators to the set of registers
+ /// containing function arguments.
+ reg_iterator argreg_begin() const { return RegArgs.begin(); }
+ reg_iterator argreg_end() const { return RegArgs.end(); }
+
+ /// retreg_begin/retreg_end - Returns iterators to the set of registers
+ /// containing the function return values.
+ reg_iterator retreg_begin() const { return RegRets.begin(); }
+ reg_iterator retreg_end() const { return RegRets.end(); }
+
+ /// addRetReg - Adds a register to the set of return-value registers.
+ void addRetReg(unsigned Reg) {
+ if (!RegRets.count(Reg)) {
+ RegRets.insert(Reg);
+ std::string name;
+ name = "%ret";
+ name += utostr(RegRets.size() - 1);
+ RegNames[Reg] = name;
}
}
- void doneAddArg(void) {
- _isDoneAddArg = true;
+ /// addArgReg - Adds a register to the set of function argument registers.
+ void addArgReg(unsigned Reg) {
+ RegArgs.insert(Reg);
+ std::string name;
+ name = "%param";
+ name += utostr(RegArgs.size() - 1);
+ RegNames[Reg] = name;
}
- void doneAddLocalVar(void) {}
-
- bool isKernel() const { return is_kernel; }
- typedef std::vector<unsigned>::const_iterator reg_iterator;
- typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
- typedef std::vector<unsigned>::const_iterator ret_iterator;
-
- bool argRegEmpty() const { return reg_arg.empty(); }
- int getNumArg() const { return reg_arg.size(); }
- reg_iterator argRegBegin() const { return reg_arg.begin(); }
- reg_iterator argRegEnd() const { return reg_arg.end(); }
- reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
- reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
-
- bool localVarRegEmpty() const { return reg_local_var.empty(); }
- reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
- reg_iterator localVarRegEnd() const { return reg_local_var.end(); }
-
- bool retRegEmpty() const { return reg_ret.empty(); }
- int getNumRet() const { return reg_ret.size(); }
- ret_iterator retRegBegin() const { return reg_ret.begin(); }
- ret_iterator retRegEnd() const { return reg_ret.end(); }
+ /// addVirtualRegister - Adds a virtual register to the set of all used
+ /// registers in the function.
+ void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
+ std::string name;
+
+ // Do not count registers that are argument/return registers.
+ if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
+ UsedRegs[TRC].push_back(Reg);
+ if (TRC == PTX::RegPredRegisterClass)
+ name = "%p";
+ else if (TRC == PTX::RegI16RegisterClass)
+ name = "%rh";
+ else if (TRC == PTX::RegI32RegisterClass)
+ name = "%r";
+ else if (TRC == PTX::RegI64RegisterClass)
+ name = "%rd";
+ else if (TRC == PTX::RegF32RegisterClass)
+ name = "%f";
+ else if (TRC == PTX::RegF64RegisterClass)
+ name = "%fd";
+ else
+ llvm_unreachable("Invalid register class");
+
+ name += utostr(UsedRegs[TRC].size() - 1);
+ RegNames[Reg] = name;
+ }
+ }
- bool isArgReg(unsigned reg) const {
- return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
+ /// getRegisterName - Returns the name of the specified virtual register. This
+ /// name is used during PTX emission.
+ const char *getRegisterName(unsigned Reg) const {
+ if (RegNames.count(Reg))
+ return RegNames.find(Reg)->second.c_str();
+ else if (Reg == PTX::NoRegister)
+ return "%noreg";
+ else
+ llvm_unreachable("Register not in register name map");
}
- bool isRetReg(unsigned reg) const {
- return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
+ /// getNumRegistersForClass - Returns the number of virtual registers that are
+ /// used for the specified register class.
+ unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
+ return UsedRegs.lookup(TRC).size();
}
- bool isLocalVarReg(unsigned reg) const {
- return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
- != reg_local_var.end();
+ /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
+ const char* getFrameSymbol(int FrameIndex) {
+ if (FrameSymbols.count(FrameIndex)) {
+ return FrameSymbols.lookup(FrameIndex).c_str();
+ } else {
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
+ // The whole point of caching this name is to ensure the pointer we pass
+ // to any getExternalSymbol() calls will remain valid for the lifetime of
+ // the back-end instance. This is to work around an issue in SelectionDAG
+ // where symbol names are expected to be life-long strings.
+ FrameSymbols[FrameIndex] = Name;
+ return FrameSymbols[FrameIndex].c_str();
+ }
}
}; // class PTXMachineFunctionInfo
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
new file mode 100644
index 0000000..7753787
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
@@ -0,0 +1,73 @@
+//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXParamManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXParamManager.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+PTXParamManager::PTXParamManager() {
+}
+
+unsigned PTXParamManager::addArgumentParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_ARGUMENT;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__param_";
+ Name += utostr(ArgumentParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ArgumentParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addReturnParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_RETURN;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__ret_";
+ Name += utostr(ReturnParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ReturnParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addLocalParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_LOCAL;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__localparam_";
+ Name += utostr(LocalParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ LocalParams.push_back(Index);
+
+ return Index;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
new file mode 100644
index 0000000..9fd2de5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
@@ -0,0 +1,86 @@
+//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTXParamManager class, which manages all defined .param
+// variables for a particular function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_PARAM_MANAGER_H
+#define PTX_PARAM_MANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+/// PTXParamManager - This class manages all .param variables defined for a
+/// particular function.
+class PTXParamManager {
+private:
+
+ /// PTXParamType - Type of a .param variable
+ enum PTXParamType {
+ PTX_PARAM_TYPE_ARGUMENT,
+ PTX_PARAM_TYPE_RETURN,
+ PTX_PARAM_TYPE_LOCAL
+ };
+
+ /// PTXParam - Definition of a PTX .param variable
+ struct PTXParam {
+ PTXParamType Type;
+ unsigned Size;
+ std::string Name;
+ };
+
+ DenseMap<unsigned, PTXParam> AllParams;
+ SmallVector<unsigned, 4> ArgumentParams;
+ SmallVector<unsigned, 4> ReturnParams;
+ SmallVector<unsigned, 4> LocalParams;
+
+public:
+
+ typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
+
+ PTXParamManager();
+
+ param_iterator arg_begin() const { return ArgumentParams.begin(); }
+ param_iterator arg_end() const { return ArgumentParams.end(); }
+ param_iterator ret_begin() const { return ReturnParams.begin(); }
+ param_iterator ret_end() const { return ReturnParams.end(); }
+ param_iterator local_begin() const { return LocalParams.begin(); }
+ param_iterator local_end() const { return LocalParams.end(); }
+
+ /// addArgumentParam - Returns a new .param used as an argument.
+ unsigned addArgumentParam(unsigned Size);
+
+ /// addReturnParam - Returns a new .param used as a return argument.
+ unsigned addReturnParam(unsigned Size);
+
+ /// addLocalParam - Returns a new .param used as a local .param variable.
+ unsigned addLocalParam(unsigned Size);
+
+ /// getParamName - Returns the name of the parameter as a string.
+ const std::string &getParamName(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Name;
+ }
+
+ /// getParamSize - Returns the size of the parameter in bits.
+ unsigned getParamSize(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Size;
+ }
+
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
new file mode 100644
index 0000000..2d2d5c3
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
@@ -0,0 +1,58 @@
+//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a register allocator for PTX code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-reg-alloc"
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+
+using namespace llvm;
+
+namespace {
+ // Special register allocator for PTX.
+ class PTXRegAlloc : public MachineFunctionPass {
+ public:
+ static char ID;
+ PTXRegAlloc() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char* getPassName() const {
+ return "PTX Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequiredID(PHIEliminationID);
+ AU.addRequiredID(TwoAddressInstructionPassID);
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // We do not actually do anything (at least not yet).
+ return false;
+ }
+ };
+
+ char PTXRegAlloc::ID = 0;
+
+ static RegisterRegAlloc
+ ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
+}
+
+FunctionPass *llvm::createPTXRegisterAllocator() {
+ return new PTXRegAlloc();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
index cb56ea9..c806266 100644
--- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -14,6 +14,9 @@
#include "PTX.h"
#include "PTXRegisterInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -23,15 +26,23 @@
using namespace llvm;
PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &TII)
- : PTXGenRegisterInfo() {
+ const TargetInstrInfo &tii)
+ // PTX does not have a return address register.
+ : PTXGenRegisterInfo(0), TII(tii) {
}
void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj,
RegScavenger *RS) const {
unsigned Index;
- MachineInstr& MI = *II;
+ MachineInstr &MI = *II;
+ //MachineBasicBlock &MBB = *MI.getParent();
+ //DebugLoc dl = MI.getDebugLoc();
+ //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+
+ //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass);
+
+ llvm_unreachable("FrameIndex should have been previously eliminated!");
Index = 0;
while (!MI.getOperand(Index).isFI()) {
@@ -46,6 +57,18 @@ void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+ //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32))
+ //.addReg(Reg, RegState::Define).addImm(FrameIndex);
+ //if (MI2->findFirstPredOperandIdx() == -1) {
+ // MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ // MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
+ //}
+ //MI2->dump();
+
+ //MachineOperand ESOp = MachineOperand::CreateES("__local__");
+
// This frame index is post stack slot re-use assignments
+ //MI.getOperand(Index).ChangeToRegister(Reg, false);
MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+ //MI.getOperand(Index) = ESOp;
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
index 0b63cb6..55fafe4 100644
--- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
@@ -25,8 +25,12 @@ class PTXTargetMachine;
class MachineFunction;
struct PTXRegisterInfo : public PTXGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+public:
PTXRegisterInfo(PTXTargetMachine &TM,
- const TargetInstrInfo &TII);
+ const TargetInstrInfo &tii);
virtual const unsigned
*getCalleeSavedRegs(const MachineFunction *MF = 0) const {
@@ -47,18 +51,6 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
llvm_unreachable("PTX does not have a frame register");
return 0;
}
-
- virtual unsigned getRARegister() const {
- llvm_unreachable("PTX does not have a return address register");
- return 0;
- }
-
- virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
- }
- virtual int getLLVMRegNum(unsigned RegNum, bool isEH) const {
- return PTXGenRegisterInfo::getLLVMRegNumFull(RegNum, 0);
- }
}; // struct PTXRegisterInfo
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
index 1313d24..6ed6d3f 100644
--- a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
@@ -20,536 +20,18 @@ class PTXReg<string n> : Register<n> {
// Registers
//===----------------------------------------------------------------------===//
-///===- Predicate Registers -----------------------------------------------===//
-
-def P0 : PTXReg<"p0">;
-def P1 : PTXReg<"p1">;
-def P2 : PTXReg<"p2">;
-def P3 : PTXReg<"p3">;
-def P4 : PTXReg<"p4">;
-def P5 : PTXReg<"p5">;
-def P6 : PTXReg<"p6">;
-def P7 : PTXReg<"p7">;
-def P8 : PTXReg<"p8">;
-def P9 : PTXReg<"p9">;
-def P10 : PTXReg<"p10">;
-def P11 : PTXReg<"p11">;
-def P12 : PTXReg<"p12">;
-def P13 : PTXReg<"p13">;
-def P14 : PTXReg<"p14">;
-def P15 : PTXReg<"p15">;
-def P16 : PTXReg<"p16">;
-def P17 : PTXReg<"p17">;
-def P18 : PTXReg<"p18">;
-def P19 : PTXReg<"p19">;
-def P20 : PTXReg<"p20">;
-def P21 : PTXReg<"p21">;
-def P22 : PTXReg<"p22">;
-def P23 : PTXReg<"p23">;
-def P24 : PTXReg<"p24">;
-def P25 : PTXReg<"p25">;
-def P26 : PTXReg<"p26">;
-def P27 : PTXReg<"p27">;
-def P28 : PTXReg<"p28">;
-def P29 : PTXReg<"p29">;
-def P30 : PTXReg<"p30">;
-def P31 : PTXReg<"p31">;
-def P32 : PTXReg<"p32">;
-def P33 : PTXReg<"p33">;
-def P34 : PTXReg<"p34">;
-def P35 : PTXReg<"p35">;
-def P36 : PTXReg<"p36">;
-def P37 : PTXReg<"p37">;
-def P38 : PTXReg<"p38">;
-def P39 : PTXReg<"p39">;
-def P40 : PTXReg<"p40">;
-def P41 : PTXReg<"p41">;
-def P42 : PTXReg<"p42">;
-def P43 : PTXReg<"p43">;
-def P44 : PTXReg<"p44">;
-def P45 : PTXReg<"p45">;
-def P46 : PTXReg<"p46">;
-def P47 : PTXReg<"p47">;
-def P48 : PTXReg<"p48">;
-def P49 : PTXReg<"p49">;
-def P50 : PTXReg<"p50">;
-def P51 : PTXReg<"p51">;
-def P52 : PTXReg<"p52">;
-def P53 : PTXReg<"p53">;
-def P54 : PTXReg<"p54">;
-def P55 : PTXReg<"p55">;
-def P56 : PTXReg<"p56">;
-def P57 : PTXReg<"p57">;
-def P58 : PTXReg<"p58">;
-def P59 : PTXReg<"p59">;
-def P60 : PTXReg<"p60">;
-def P61 : PTXReg<"p61">;
-def P62 : PTXReg<"p62">;
-def P63 : PTXReg<"p63">;
-def P64 : PTXReg<"p64">;
-def P65 : PTXReg<"p65">;
-def P66 : PTXReg<"p66">;
-def P67 : PTXReg<"p67">;
-def P68 : PTXReg<"p68">;
-def P69 : PTXReg<"p69">;
-def P70 : PTXReg<"p70">;
-def P71 : PTXReg<"p71">;
-def P72 : PTXReg<"p72">;
-def P73 : PTXReg<"p73">;
-def P74 : PTXReg<"p74">;
-def P75 : PTXReg<"p75">;
-def P76 : PTXReg<"p76">;
-def P77 : PTXReg<"p77">;
-def P78 : PTXReg<"p78">;
-def P79 : PTXReg<"p79">;
-def P80 : PTXReg<"p80">;
-def P81 : PTXReg<"p81">;
-def P82 : PTXReg<"p82">;
-def P83 : PTXReg<"p83">;
-def P84 : PTXReg<"p84">;
-def P85 : PTXReg<"p85">;
-def P86 : PTXReg<"p86">;
-def P87 : PTXReg<"p87">;
-def P88 : PTXReg<"p88">;
-def P89 : PTXReg<"p89">;
-def P90 : PTXReg<"p90">;
-def P91 : PTXReg<"p91">;
-def P92 : PTXReg<"p92">;
-def P93 : PTXReg<"p93">;
-def P94 : PTXReg<"p94">;
-def P95 : PTXReg<"p95">;
-def P96 : PTXReg<"p96">;
-def P97 : PTXReg<"p97">;
-def P98 : PTXReg<"p98">;
-def P99 : PTXReg<"p99">;
-def P100 : PTXReg<"p100">;
-def P101 : PTXReg<"p101">;
-def P102 : PTXReg<"p102">;
-def P103 : PTXReg<"p103">;
-def P104 : PTXReg<"p104">;
-def P105 : PTXReg<"p105">;
-def P106 : PTXReg<"p106">;
-def P107 : PTXReg<"p107">;
-def P108 : PTXReg<"p108">;
-def P109 : PTXReg<"p109">;
-def P110 : PTXReg<"p110">;
-def P111 : PTXReg<"p111">;
-def P112 : PTXReg<"p112">;
-def P113 : PTXReg<"p113">;
-def P114 : PTXReg<"p114">;
-def P115 : PTXReg<"p115">;
-def P116 : PTXReg<"p116">;
-def P117 : PTXReg<"p117">;
-def P118 : PTXReg<"p118">;
-def P119 : PTXReg<"p119">;
-def P120 : PTXReg<"p120">;
-def P121 : PTXReg<"p121">;
-def P122 : PTXReg<"p122">;
-def P123 : PTXReg<"p123">;
-def P124 : PTXReg<"p124">;
-def P125 : PTXReg<"p125">;
-def P126 : PTXReg<"p126">;
-def P127 : PTXReg<"p127">;
-
-///===- 16-Bit Registers --------------------------------------------------===//
-
-def RH0 : PTXReg<"rh0">;
-def RH1 : PTXReg<"rh1">;
-def RH2 : PTXReg<"rh2">;
-def RH3 : PTXReg<"rh3">;
-def RH4 : PTXReg<"rh4">;
-def RH5 : PTXReg<"rh5">;
-def RH6 : PTXReg<"rh6">;
-def RH7 : PTXReg<"rh7">;
-def RH8 : PTXReg<"rh8">;
-def RH9 : PTXReg<"rh9">;
-def RH10 : PTXReg<"rh10">;
-def RH11 : PTXReg<"rh11">;
-def RH12 : PTXReg<"rh12">;
-def RH13 : PTXReg<"rh13">;
-def RH14 : PTXReg<"rh14">;
-def RH15 : PTXReg<"rh15">;
-def RH16 : PTXReg<"rh16">;
-def RH17 : PTXReg<"rh17">;
-def RH18 : PTXReg<"rh18">;
-def RH19 : PTXReg<"rh19">;
-def RH20 : PTXReg<"rh20">;
-def RH21 : PTXReg<"rh21">;
-def RH22 : PTXReg<"rh22">;
-def RH23 : PTXReg<"rh23">;
-def RH24 : PTXReg<"rh24">;
-def RH25 : PTXReg<"rh25">;
-def RH26 : PTXReg<"rh26">;
-def RH27 : PTXReg<"rh27">;
-def RH28 : PTXReg<"rh28">;
-def RH29 : PTXReg<"rh29">;
-def RH30 : PTXReg<"rh30">;
-def RH31 : PTXReg<"rh31">;
-def RH32 : PTXReg<"rh32">;
-def RH33 : PTXReg<"rh33">;
-def RH34 : PTXReg<"rh34">;
-def RH35 : PTXReg<"rh35">;
-def RH36 : PTXReg<"rh36">;
-def RH37 : PTXReg<"rh37">;
-def RH38 : PTXReg<"rh38">;
-def RH39 : PTXReg<"rh39">;
-def RH40 : PTXReg<"rh40">;
-def RH41 : PTXReg<"rh41">;
-def RH42 : PTXReg<"rh42">;
-def RH43 : PTXReg<"rh43">;
-def RH44 : PTXReg<"rh44">;
-def RH45 : PTXReg<"rh45">;
-def RH46 : PTXReg<"rh46">;
-def RH47 : PTXReg<"rh47">;
-def RH48 : PTXReg<"rh48">;
-def RH49 : PTXReg<"rh49">;
-def RH50 : PTXReg<"rh50">;
-def RH51 : PTXReg<"rh51">;
-def RH52 : PTXReg<"rh52">;
-def RH53 : PTXReg<"rh53">;
-def RH54 : PTXReg<"rh54">;
-def RH55 : PTXReg<"rh55">;
-def RH56 : PTXReg<"rh56">;
-def RH57 : PTXReg<"rh57">;
-def RH58 : PTXReg<"rh58">;
-def RH59 : PTXReg<"rh59">;
-def RH60 : PTXReg<"rh60">;
-def RH61 : PTXReg<"rh61">;
-def RH62 : PTXReg<"rh62">;
-def RH63 : PTXReg<"rh63">;
-def RH64 : PTXReg<"rh64">;
-def RH65 : PTXReg<"rh65">;
-def RH66 : PTXReg<"rh66">;
-def RH67 : PTXReg<"rh67">;
-def RH68 : PTXReg<"rh68">;
-def RH69 : PTXReg<"rh69">;
-def RH70 : PTXReg<"rh70">;
-def RH71 : PTXReg<"rh71">;
-def RH72 : PTXReg<"rh72">;
-def RH73 : PTXReg<"rh73">;
-def RH74 : PTXReg<"rh74">;
-def RH75 : PTXReg<"rh75">;
-def RH76 : PTXReg<"rh76">;
-def RH77 : PTXReg<"rh77">;
-def RH78 : PTXReg<"rh78">;
-def RH79 : PTXReg<"rh79">;
-def RH80 : PTXReg<"rh80">;
-def RH81 : PTXReg<"rh81">;
-def RH82 : PTXReg<"rh82">;
-def RH83 : PTXReg<"rh83">;
-def RH84 : PTXReg<"rh84">;
-def RH85 : PTXReg<"rh85">;
-def RH86 : PTXReg<"rh86">;
-def RH87 : PTXReg<"rh87">;
-def RH88 : PTXReg<"rh88">;
-def RH89 : PTXReg<"rh89">;
-def RH90 : PTXReg<"rh90">;
-def RH91 : PTXReg<"rh91">;
-def RH92 : PTXReg<"rh92">;
-def RH93 : PTXReg<"rh93">;
-def RH94 : PTXReg<"rh94">;
-def RH95 : PTXReg<"rh95">;
-def RH96 : PTXReg<"rh96">;
-def RH97 : PTXReg<"rh97">;
-def RH98 : PTXReg<"rh98">;
-def RH99 : PTXReg<"rh99">;
-def RH100 : PTXReg<"rh100">;
-def RH101 : PTXReg<"rh101">;
-def RH102 : PTXReg<"rh102">;
-def RH103 : PTXReg<"rh103">;
-def RH104 : PTXReg<"rh104">;
-def RH105 : PTXReg<"rh105">;
-def RH106 : PTXReg<"rh106">;
-def RH107 : PTXReg<"rh107">;
-def RH108 : PTXReg<"rh108">;
-def RH109 : PTXReg<"rh109">;
-def RH110 : PTXReg<"rh110">;
-def RH111 : PTXReg<"rh111">;
-def RH112 : PTXReg<"rh112">;
-def RH113 : PTXReg<"rh113">;
-def RH114 : PTXReg<"rh114">;
-def RH115 : PTXReg<"rh115">;
-def RH116 : PTXReg<"rh116">;
-def RH117 : PTXReg<"rh117">;
-def RH118 : PTXReg<"rh118">;
-def RH119 : PTXReg<"rh119">;
-def RH120 : PTXReg<"rh120">;
-def RH121 : PTXReg<"rh121">;
-def RH122 : PTXReg<"rh122">;
-def RH123 : PTXReg<"rh123">;
-def RH124 : PTXReg<"rh124">;
-def RH125 : PTXReg<"rh125">;
-def RH126 : PTXReg<"rh126">;
-def RH127 : PTXReg<"rh127">;
-
-///===- 32-Bit Registers --------------------------------------------------===//
-
-def R0 : PTXReg<"r0">;
-def R1 : PTXReg<"r1">;
-def R2 : PTXReg<"r2">;
-def R3 : PTXReg<"r3">;
-def R4 : PTXReg<"r4">;
-def R5 : PTXReg<"r5">;
-def R6 : PTXReg<"r6">;
-def R7 : PTXReg<"r7">;
-def R8 : PTXReg<"r8">;
-def R9 : PTXReg<"r9">;
-def R10 : PTXReg<"r10">;
-def R11 : PTXReg<"r11">;
-def R12 : PTXReg<"r12">;
-def R13 : PTXReg<"r13">;
-def R14 : PTXReg<"r14">;
-def R15 : PTXReg<"r15">;
-def R16 : PTXReg<"r16">;
-def R17 : PTXReg<"r17">;
-def R18 : PTXReg<"r18">;
-def R19 : PTXReg<"r19">;
-def R20 : PTXReg<"r20">;
-def R21 : PTXReg<"r21">;
-def R22 : PTXReg<"r22">;
-def R23 : PTXReg<"r23">;
-def R24 : PTXReg<"r24">;
-def R25 : PTXReg<"r25">;
-def R26 : PTXReg<"r26">;
-def R27 : PTXReg<"r27">;
-def R28 : PTXReg<"r28">;
-def R29 : PTXReg<"r29">;
-def R30 : PTXReg<"r30">;
-def R31 : PTXReg<"r31">;
-def R32 : PTXReg<"r32">;
-def R33 : PTXReg<"r33">;
-def R34 : PTXReg<"r34">;
-def R35 : PTXReg<"r35">;
-def R36 : PTXReg<"r36">;
-def R37 : PTXReg<"r37">;
-def R38 : PTXReg<"r38">;
-def R39 : PTXReg<"r39">;
-def R40 : PTXReg<"r40">;
-def R41 : PTXReg<"r41">;
-def R42 : PTXReg<"r42">;
-def R43 : PTXReg<"r43">;
-def R44 : PTXReg<"r44">;
-def R45 : PTXReg<"r45">;
-def R46 : PTXReg<"r46">;
-def R47 : PTXReg<"r47">;
-def R48 : PTXReg<"r48">;
-def R49 : PTXReg<"r49">;
-def R50 : PTXReg<"r50">;
-def R51 : PTXReg<"r51">;
-def R52 : PTXReg<"r52">;
-def R53 : PTXReg<"r53">;
-def R54 : PTXReg<"r54">;
-def R55 : PTXReg<"r55">;
-def R56 : PTXReg<"r56">;
-def R57 : PTXReg<"r57">;
-def R58 : PTXReg<"r58">;
-def R59 : PTXReg<"r59">;
-def R60 : PTXReg<"r60">;
-def R61 : PTXReg<"r61">;
-def R62 : PTXReg<"r62">;
-def R63 : PTXReg<"r63">;
-def R64 : PTXReg<"r64">;
-def R65 : PTXReg<"r65">;
-def R66 : PTXReg<"r66">;
-def R67 : PTXReg<"r67">;
-def R68 : PTXReg<"r68">;
-def R69 : PTXReg<"r69">;
-def R70 : PTXReg<"r70">;
-def R71 : PTXReg<"r71">;
-def R72 : PTXReg<"r72">;
-def R73 : PTXReg<"r73">;
-def R74 : PTXReg<"r74">;
-def R75 : PTXReg<"r75">;
-def R76 : PTXReg<"r76">;
-def R77 : PTXReg<"r77">;
-def R78 : PTXReg<"r78">;
-def R79 : PTXReg<"r79">;
-def R80 : PTXReg<"r80">;
-def R81 : PTXReg<"r81">;
-def R82 : PTXReg<"r82">;
-def R83 : PTXReg<"r83">;
-def R84 : PTXReg<"r84">;
-def R85 : PTXReg<"r85">;
-def R86 : PTXReg<"r86">;
-def R87 : PTXReg<"r87">;
-def R88 : PTXReg<"r88">;
-def R89 : PTXReg<"r89">;
-def R90 : PTXReg<"r90">;
-def R91 : PTXReg<"r91">;
-def R92 : PTXReg<"r92">;
-def R93 : PTXReg<"r93">;
-def R94 : PTXReg<"r94">;
-def R95 : PTXReg<"r95">;
-def R96 : PTXReg<"r96">;
-def R97 : PTXReg<"r97">;
-def R98 : PTXReg<"r98">;
-def R99 : PTXReg<"r99">;
-def R100 : PTXReg<"r100">;
-def R101 : PTXReg<"r101">;
-def R102 : PTXReg<"r102">;
-def R103 : PTXReg<"r103">;
-def R104 : PTXReg<"r104">;
-def R105 : PTXReg<"r105">;
-def R106 : PTXReg<"r106">;
-def R107 : PTXReg<"r107">;
-def R108 : PTXReg<"r108">;
-def R109 : PTXReg<"r109">;
-def R110 : PTXReg<"r110">;
-def R111 : PTXReg<"r111">;
-def R112 : PTXReg<"r112">;
-def R113 : PTXReg<"r113">;
-def R114 : PTXReg<"r114">;
-def R115 : PTXReg<"r115">;
-def R116 : PTXReg<"r116">;
-def R117 : PTXReg<"r117">;
-def R118 : PTXReg<"r118">;
-def R119 : PTXReg<"r119">;
-def R120 : PTXReg<"r120">;
-def R121 : PTXReg<"r121">;
-def R122 : PTXReg<"r122">;
-def R123 : PTXReg<"r123">;
-def R124 : PTXReg<"r124">;
-def R125 : PTXReg<"r125">;
-def R126 : PTXReg<"r126">;
-def R127 : PTXReg<"r127">;
-
-///===- 64-Bit Registers --------------------------------------------------===//
-
-def RD0 : PTXReg<"rd0">;
-def RD1 : PTXReg<"rd1">;
-def RD2 : PTXReg<"rd2">;
-def RD3 : PTXReg<"rd3">;
-def RD4 : PTXReg<"rd4">;
-def RD5 : PTXReg<"rd5">;
-def RD6 : PTXReg<"rd6">;
-def RD7 : PTXReg<"rd7">;
-def RD8 : PTXReg<"rd8">;
-def RD9 : PTXReg<"rd9">;
-def RD10 : PTXReg<"rd10">;
-def RD11 : PTXReg<"rd11">;
-def RD12 : PTXReg<"rd12">;
-def RD13 : PTXReg<"rd13">;
-def RD14 : PTXReg<"rd14">;
-def RD15 : PTXReg<"rd15">;
-def RD16 : PTXReg<"rd16">;
-def RD17 : PTXReg<"rd17">;
-def RD18 : PTXReg<"rd18">;
-def RD19 : PTXReg<"rd19">;
-def RD20 : PTXReg<"rd20">;
-def RD21 : PTXReg<"rd21">;
-def RD22 : PTXReg<"rd22">;
-def RD23 : PTXReg<"rd23">;
-def RD24 : PTXReg<"rd24">;
-def RD25 : PTXReg<"rd25">;
-def RD26 : PTXReg<"rd26">;
-def RD27 : PTXReg<"rd27">;
-def RD28 : PTXReg<"rd28">;
-def RD29 : PTXReg<"rd29">;
-def RD30 : PTXReg<"rd30">;
-def RD31 : PTXReg<"rd31">;
-def RD32 : PTXReg<"rd32">;
-def RD33 : PTXReg<"rd33">;
-def RD34 : PTXReg<"rd34">;
-def RD35 : PTXReg<"rd35">;
-def RD36 : PTXReg<"rd36">;
-def RD37 : PTXReg<"rd37">;
-def RD38 : PTXReg<"rd38">;
-def RD39 : PTXReg<"rd39">;
-def RD40 : PTXReg<"rd40">;
-def RD41 : PTXReg<"rd41">;
-def RD42 : PTXReg<"rd42">;
-def RD43 : PTXReg<"rd43">;
-def RD44 : PTXReg<"rd44">;
-def RD45 : PTXReg<"rd45">;
-def RD46 : PTXReg<"rd46">;
-def RD47 : PTXReg<"rd47">;
-def RD48 : PTXReg<"rd48">;
-def RD49 : PTXReg<"rd49">;
-def RD50 : PTXReg<"rd50">;
-def RD51 : PTXReg<"rd51">;
-def RD52 : PTXReg<"rd52">;
-def RD53 : PTXReg<"rd53">;
-def RD54 : PTXReg<"rd54">;
-def RD55 : PTXReg<"rd55">;
-def RD56 : PTXReg<"rd56">;
-def RD57 : PTXReg<"rd57">;
-def RD58 : PTXReg<"rd58">;
-def RD59 : PTXReg<"rd59">;
-def RD60 : PTXReg<"rd60">;
-def RD61 : PTXReg<"rd61">;
-def RD62 : PTXReg<"rd62">;
-def RD63 : PTXReg<"rd63">;
-def RD64 : PTXReg<"rd64">;
-def RD65 : PTXReg<"rd65">;
-def RD66 : PTXReg<"rd66">;
-def RD67 : PTXReg<"rd67">;
-def RD68 : PTXReg<"rd68">;
-def RD69 : PTXReg<"rd69">;
-def RD70 : PTXReg<"rd70">;
-def RD71 : PTXReg<"rd71">;
-def RD72 : PTXReg<"rd72">;
-def RD73 : PTXReg<"rd73">;
-def RD74 : PTXReg<"rd74">;
-def RD75 : PTXReg<"rd75">;
-def RD76 : PTXReg<"rd76">;
-def RD77 : PTXReg<"rd77">;
-def RD78 : PTXReg<"rd78">;
-def RD79 : PTXReg<"rd79">;
-def RD80 : PTXReg<"rd80">;
-def RD81 : PTXReg<"rd81">;
-def RD82 : PTXReg<"rd82">;
-def RD83 : PTXReg<"rd83">;
-def RD84 : PTXReg<"rd84">;
-def RD85 : PTXReg<"rd85">;
-def RD86 : PTXReg<"rd86">;
-def RD87 : PTXReg<"rd87">;
-def RD88 : PTXReg<"rd88">;
-def RD89 : PTXReg<"rd89">;
-def RD90 : PTXReg<"rd90">;
-def RD91 : PTXReg<"rd91">;
-def RD92 : PTXReg<"rd92">;
-def RD93 : PTXReg<"rd93">;
-def RD94 : PTXReg<"rd94">;
-def RD95 : PTXReg<"rd95">;
-def RD96 : PTXReg<"rd96">;
-def RD97 : PTXReg<"rd97">;
-def RD98 : PTXReg<"rd98">;
-def RD99 : PTXReg<"rd99">;
-def RD100 : PTXReg<"rd100">;
-def RD101 : PTXReg<"rd101">;
-def RD102 : PTXReg<"rd102">;
-def RD103 : PTXReg<"rd103">;
-def RD104 : PTXReg<"rd104">;
-def RD105 : PTXReg<"rd105">;
-def RD106 : PTXReg<"rd106">;
-def RD107 : PTXReg<"rd107">;
-def RD108 : PTXReg<"rd108">;
-def RD109 : PTXReg<"rd109">;
-def RD110 : PTXReg<"rd110">;
-def RD111 : PTXReg<"rd111">;
-def RD112 : PTXReg<"rd112">;
-def RD113 : PTXReg<"rd113">;
-def RD114 : PTXReg<"rd114">;
-def RD115 : PTXReg<"rd115">;
-def RD116 : PTXReg<"rd116">;
-def RD117 : PTXReg<"rd117">;
-def RD118 : PTXReg<"rd118">;
-def RD119 : PTXReg<"rd119">;
-def RD120 : PTXReg<"rd120">;
-def RD121 : PTXReg<"rd121">;
-def RD122 : PTXReg<"rd122">;
-def RD123 : PTXReg<"rd123">;
-def RD124 : PTXReg<"rd124">;
-def RD125 : PTXReg<"rd125">;
-def RD126 : PTXReg<"rd126">;
-def RD127 : PTXReg<"rd127">;
+// The generated register info code throws warnings for empty register classes
+// (e.g. zero-length arrays), so we use a dummy register here just to prevent
+// these warnings.
+def DUMMY_REG : PTXReg<"R0">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
-def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
-def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
-def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
-def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
-def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
-def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
+def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
new file mode 100644
index 0000000..50ef14a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
@@ -0,0 +1,149 @@
+//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-selectiondag-info"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
+}
+
+PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
+}
+
+SDValue
+PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ // Always inline memcpys. In PTX, we do not have a C library that provides
+ // a memcpy function.
+ //if (!AlwaysInline)
+ // return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+ EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+SDValue PTXSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ llvm_unreachable("memset lowering not implemented for PTX yet");
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
new file mode 100644
index 0000000..e0c7167
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
@@ -0,0 +1,53 @@
+//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTX subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXSELECTIONDAGINFO_H
+#define PTXSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
+/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
+class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const PTXSubtarget *Subtarget;
+
+public:
+ explicit PTXSelectionDAGInfo(const TargetMachine &TM);
+ ~PTXSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
index 8ec646e..1eb57d2 100644
--- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
@@ -14,7 +14,7 @@
#include "PTXSubtarget.h"
#include "PTX.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
index 0921f1f..b946d7c 100644
--- a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
@@ -114,7 +114,16 @@ class StringRef;
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
}
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool callsAreHandled() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool emitPtrAttribute() const {
+ return PTXVersion >= PTX_VERSION_2_2;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
}; // class PTXSubtarget
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
index ab926e0..449a3d9 100644
--- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
@@ -14,8 +14,32 @@
#include "PTX.h"
#include "PTXTargetMachine.h"
#include "llvm/PassManager.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+
using namespace llvm;
@@ -25,7 +49,7 @@ namespace llvm {
bool useCFI,
MCInstPrinter *InstPrint,
MCCodeEmitter *CE,
- TargetAsmBackend *TAB,
+ MCAsmBackend *MAB,
bool ShowInst);
}
@@ -43,34 +67,47 @@ namespace {
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
const char* DataLayout64 =
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+
+ // Copied from LLVMTargetMachine.cpp
+ void printNoVerify(PassManagerBase &PM, const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+ }
+
+ void printAndVerify(PassManagerBase &PM,
+ const char *Banner) {
+ if (PrintMachineCode)
+ PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+ //if (VerifyMachineCode)
+ // PM.add(createMachineVerifierPass(Banner));
+ }
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
- const std::string &TT,
- const std::string &CPU,
- const std::string &FS,
+ StringRef TT, StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS),
+ : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
InstrInfo(*this),
+ TSInfo(*this),
TLInfo(*this) {
}
-PTX32TargetMachine::PTX32TargetMachine(const Target &T,
- const std::string& TT,
- const std::string& CPU,
- const std::string& FS)
- : PTXTargetMachine(T, TT, CPU, FS, false) {
+PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
}
-PTX64TargetMachine::PTX64TargetMachine(const Target &T,
- const std::string& TT,
- const std::string& CPU,
- const std::string& FS)
- : PTXTargetMachine(T, TT, CPU, FS, true) {
+PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM)
+ : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
}
bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
@@ -82,6 +119,255 @@ bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// PTXMFInfoExtract must after register allocation!
+ //PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ return false;
+}
+
+bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // This is mostly based on LLVMTargetMachine::addPassesToEmitFile
+
+ // Add common CodeGen passes.
+ MCContext *Context = 0;
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+ return true;
+ assert(Context != 0 && "Failed to get MCContext");
+
+ if (hasMCSaveTempLabels())
+ Context->setAllowTemporaryLabels(false);
+
+ const MCAsmInfo &MAI = *getMCAsmInfo();
+ const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+ OwningPtr<MCStreamer> AsmStreamer;
+
+ switch (FileType) {
+ default: return true;
+ case CGFT_AssemblyFile: {
+ MCInstPrinter *InstPrinter =
+ getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+
+ // Create a code emitter if asked to show the encoding.
+ MCCodeEmitter *MCE = 0;
+ MCAsmBackend *MAB = 0;
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ true, /* verbose asm */
+ hasMCUseLoc(),
+ hasMCUseCFI(),
+ InstPrinter,
+ MCE, MAB,
+ false /* show MC encoding */);
+ AsmStreamer.reset(S);
+ break;
+ }
+ case CGFT_ObjectFile: {
+ llvm_unreachable("Object file emission is not supported with PTX");
+ }
+ case CGFT_Null:
+ // The Null output is intended for use for performance analysis and testing,
+ // not real users.
+ AsmStreamer.reset(createNullStreamer(*Context));
+ break;
+ }
+
+ // MC Logging
+ //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+ // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+ FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+ if (Printer == 0)
+ return true;
+
+ // If successful, createAsmPrinter took ownership of AsmStreamer.
+ AsmStreamer.take();
+
+ PM.add(Printer);
+
+ PM.add(createGCInfoDeleter());
+ return false;
+}
+
+bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify,
+ MCContext *&OutContext) {
+ // Add standard LLVM codegen passes.
+ // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some
+ // modifications for the PTX target.
+
+ // Standard LLVM-Level Passes.
+
+ // Basic AliasAnalysis support.
+ // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+ // BasicAliasAnalysis wins if they disagree. This is intended to help
+ // support "obvious" type-punning idioms.
+ PM.add(createTypeBasedAliasAnalysisPass());
+ PM.add(createBasicAliasAnalysisPass());
+
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Run loop strength reduction before anything else.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createLoopStrengthReducePass(getTargetLowering()));
+ //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+ }
+
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ PM.add(createLowerInvokePass(getTargetLowering()));
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
+
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+ PM.add(createStackProtectorPass(getTargetLowering()));
+
+ addPreISel(PM, OptLevel);
+
+ //PM.add(createPrintFunctionPass("\n\n"
+ // "*** Final LLVM Code input to ISel ***\n",
+ // &dbgs()));
+
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
+ // Standard Lower-Level Passes.
+
+ // Install a MachineModuleInfo class, which is an immutable pass that holds
+ // all the per-module stuff we're generating, including MCContext.
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
+ *getRegisterInfo(),
+ &getTargetLowering()->getObjFileLowering());
+ PM.add(MMI);
+ OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+
+ // Set up a MachineFunction for the rest of CodeGen to work on.
+ PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+ // Ask the target for an isel.
+ if (addInstSelector(PM, OptLevel))
+ return true;
+
+ // Print the instruction selected machine code...
+ printAndVerify(PM, "After Instruction Selection");
+
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+ }
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createOptimizePHIsPass());
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
+ if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass");
+
+ PM.add(createMachineLICMPass());
+ PM.add(createMachineCSEPass());
+ PM.add(createMachineSinkingPass());
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
+ }
+
+ // Run pre-ra passes.
+ if (addPreRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PreRegAlloc passes");
+
+ // Perform register allocation.
+ PM.add(createPTXRegisterAllocator());
+ printAndVerify(PM, "After Register Allocation");
+
+ // Perform stack slot coloring and post-ra machine LICM.
+ if (OptLevel != CodeGenOpt::None) {
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ PM.add(createStackSlotColoringPass(false));
+
+ // FIXME: Post-RA LICM has asserts that fire on virtual registers.
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //if (!DisablePostRAMachineLICM)
+ // PM.add(createMachineLICMPass(false));
+
+ printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
+ }
+
+ // Run post-ra passes.
+ if (addPostRegAlloc(PM, OptLevel))
+ printAndVerify(PM, "After PostRegAlloc passes");
+
+ PM.add(createExpandPostRAPseudosPass());
+ printAndVerify(PM, "After ExpandPostRAPseudos");
+
+ // Insert prolog/epilog code. Eliminate abstract frame index references...
+ PM.add(createPrologEpilogCodeInserter());
+ printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+ // Run pre-sched2 passes.
+ if (addPreSched2(PM, OptLevel))
+ printAndVerify(PM, "After PreSched2 passes");
+
+ // Second pass scheduler.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createPostRAScheduler(OptLevel));
+ printAndVerify(PM, "After PostRAScheduler");
+ }
+
+ // Branch folding must be run after regalloc and prolog/epilog insertion.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+ printNoVerify(PM, "After BranchFolding");
+ }
+
+ // Tail duplication.
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createTailDuplicatePass(false));
+ printNoVerify(PM, "After TailDuplicate");
+ }
+
+ PM.add(createGCMachineCodeAnalysisPass());
+
+ //if (PrintGCInfo)
+ // PM.add(createGCInfoPrinter(dbgs()));
+
+ if (OptLevel != CodeGenOpt::None) {
+ PM.add(createCodePlacementOptPass());
+ printNoVerify(PM, "After CodePlacementOpt");
+ }
+
+ if (addPreEmitPass(PM, OptLevel))
+ printNoVerify(PM, "After PreEmit passes");
+
PM.add(createPTXMFInfoExtract(*this, OptLevel));
+ PM.add(createPTXFPRoundingModePass(*this, OptLevel));
+
return false;
}
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
index ae42153..5b7c82b 100644
--- a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
@@ -17,6 +17,7 @@
#include "PTXISelLowering.h"
#include "PTXInstrInfo.h"
#include "PTXFrameLowering.h"
+#include "PTXSelectionDAGInfo.h"
#include "PTXSubtarget.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -25,15 +26,17 @@
namespace llvm {
class PTXTargetMachine : public LLVMTargetMachine {
private:
- const TargetData DataLayout;
- PTXSubtarget Subtarget; // has to be initialized before FrameLowering
- PTXFrameLowering FrameLowering;
- PTXInstrInfo InstrInfo;
- PTXTargetLowering TLInfo;
+ const TargetData DataLayout;
+ PTXSubtarget Subtarget; // has to be initialized before FrameLowering
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
+ PTXSelectionDAGInfo TSInfo;
+ PTXTargetLowering TLInfo;
public:
- PTXTargetMachine(const Target &T, const std::string &TT,
- const std::string &CPU, const std::string &FS,
+ PTXTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM,
bool is64Bit);
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -49,27 +52,62 @@ class PTXTargetMachine : public LLVMTargetMachine {
virtual const PTXTargetLowering *getTargetLowering() const {
return &TLInfo; }
+ virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
+
+ // We override this method to supply our own set of codegen passes.
+ virtual bool addPassesToEmitFile(PassManagerBase &,
+ formatted_raw_ostream &,
+ CodeGenFileType,
+ CodeGenOpt::Level,
+ bool = true);
+
+ // Emission of machine code through JITCodeEmitter is not supported.
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &,
+ JITCodeEmitter &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ // Emission of machine code through MCJIT is not supported.
+ virtual bool addPassesToEmitMC(PassManagerBase &,
+ MCContext *&,
+ raw_ostream &,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
+
+ private:
+
+ bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+ bool DisableVerify, MCContext *&OutCtx);
}; // class PTXTargetMachine
class PTX32TargetMachine : public PTXTargetMachine {
public:
- PTX32TargetMachine(const Target &T, const std::string &TT,
- const std::string& CPU, const std::string& FS);
+ PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
}; // class PTX32TargetMachine
class PTX64TargetMachine : public PTXTargetMachine {
public:
- PTX64TargetMachine(const Target &T, const std::string &TT,
- const std::string& CPU, const std::string& FS);
+ PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ Reloc::Model RM, CodeModel::Model CM);
}; // class PTX32TargetMachine
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 4b09cf5..0000000
--- a/contrib/llvm/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMPTXInfo
- PTXTargetInfo.cpp
- )
-
-add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile b/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
deleted file mode 100644
index 8619785..0000000
--- a/contrib/llvm/lib/Target/PTX/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMPTXInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
index 9df6c75..09a2735 100644
--- a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -9,7 +9,7 @@
#include "PTX.h"
#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/contrib/llvm/lib/Target/PTX/generate-register-td.py b/contrib/llvm/lib/Target/PTX/generate-register-td.py
deleted file mode 100755
index 1528690..0000000
--- a/contrib/llvm/lib/Target/PTX/generate-register-td.py
+++ /dev/null
@@ -1,163 +0,0 @@
-#!/usr/bin/env python
-##===- generate-register-td.py --------------------------------*-python-*--===##
-##
-## The LLVM Compiler Infrastructure
-##
-## This file is distributed under the University of Illinois Open Source
-## License. See LICENSE.TXT for details.
-##
-##===----------------------------------------------------------------------===##
-##
-## This file describes the PTX register file generator.
-##
-##===----------------------------------------------------------------------===##
-
-from sys import argv, exit, stdout
-
-
-if len(argv) != 5:
- print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
- exit(1)
-
-try:
- num_pred = int(argv[1])
- num_16bit = int(argv[2])
- num_32bit = int(argv[3])
- num_64bit = int(argv[4])
-except:
- print('ERROR: Invalid integer parameter')
- exit(1)
-
-## Print the register definition file
-td_file = open('PTXRegisterInfo.td', 'w')
-
-td_file.write('''
-//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Declarations that describe the PTX register file
-//===----------------------------------------------------------------------===//
-
-class PTXReg<string n> : Register<n> {
- let Namespace = "PTX";
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-''')
-
-
-# Print predicate registers
-td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
-for r in range(0, num_pred):
- td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
-
-# Print 16-bit registers
-td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_16bit):
- td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
-
-# Print 32-bit registers
-td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_32bit):
- td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
-
-# Print 64-bit registers
-td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
-for r in range(0, num_64bit):
- td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
-
-
-td_file.write('''
-//===----------------------------------------------------------------------===//
-// Register classes
-//===----------------------------------------------------------------------===//
-''')
-
-
-# Print register classes
-
-td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
-td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
-td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
-td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
-td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
-td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
-
-
-td_file.close()
-
-## Now write the PTXCallingConv.td file
-td_file = open('PTXCallingConv.td', 'w')
-
-# Reserve 10% of the available registers for return values, and the other 90%
-# for parameters
-num_ret_pred = int(0.1 * num_pred)
-num_ret_16bit = int(0.1 * num_16bit)
-num_ret_32bit = int(0.1 * num_32bit)
-num_ret_64bit = int(0.1 * num_64bit)
-num_param_pred = num_pred - num_ret_pred
-num_param_16bit = num_16bit - num_ret_16bit
-num_param_32bit = num_32bit - num_ret_32bit
-num_param_64bit = num_64bit - num_ret_64bit
-
-param_regs_pred = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
-ret_regs_pred = ['P%d' % i for i in range(0, num_ret_pred)]
-param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
-ret_regs_16bit = ['RH%d' % i for i in range(0, num_ret_16bit)]
-param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
-ret_regs_32bit = ['R%d' % i for i in range(0, num_ret_32bit)]
-param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
-ret_regs_64bit = ['RD%d' % i for i in range(0, num_ret_64bit)]
-
-param_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
-ret_list_pred = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
-param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
-ret_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
-param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
-ret_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
-param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
-ret_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
-
-td_file.write('''
-//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the PTX architecture.
-//
-//===----------------------------------------------------------------------===//
-
-// PTX Formal Parameter Calling Convention
-def CC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[%s]>>,
- CCIfType<[i16], CCAssignToReg<[%s]>>,
- CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
- CCIfType<[i64,f64], CCAssignToReg<[%s]>>
-]>;
-
-// PTX Return Value Calling Convention
-def RetCC_PTX : CallingConv<[
- CCIfType<[i1], CCAssignToReg<[%s]>>,
- CCIfType<[i16], CCAssignToReg<[%s]>>,
- CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
- CCIfType<[i64,f64], CCAssignToReg<[%s]>>
-]>;
-''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
- ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
-
-
-td_file.close()
OpenPOWER on IntegriCloud