summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PTX
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PTX')
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp249
-rw-r--r--contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h45
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h134
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp37
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp98
-rw-r--r--contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h36
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.h43
-rw-r--r--contrib/llvm/lib/Target/PTX/PTX.td141
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp561
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h57
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp181
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp24
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXFrameLowering.h44
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp356
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp522
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXISelLowering.h82
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrFormats.td51
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp359
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.h133
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrInfo.td1031
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td278
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td110
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp556
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp32
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp85
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.cpp14
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h202
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.cpp73
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXParamManager.h87
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp53
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp38
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h56
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td36
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp150
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h53
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp68
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXSubtarget.h131
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp165
-rw-r--r--contrib/llvm/lib/Target/PTX/PTXTargetMachine.h104
-rw-r--r--contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp25
41 files changed, 6530 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
new file mode 100644
index 0000000..1830213
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -0,0 +1,249 @@
+//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PTXInstPrinter.h"
+#include "MCTargetDesc/PTXBaseInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#include "PTXGenAsmWriter.inc"
+
+PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) :
+ MCInstPrinter(MAI, MII, MRI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(STI.getFeatureBits());
+}
+
+void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ // Decode the register number into type and offset
+ unsigned RegSpace = RegNo & 0x7;
+ unsigned RegType = (RegNo >> 3) & 0x7;
+ unsigned RegOffset = RegNo >> 6;
+
+ // Print the register
+ OS << "%";
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
+}
+
+void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ printPredicate(MI, O);
+ switch (MI->getOpcode()) {
+ default:
+ printInstruction(MI, O);
+ break;
+ case PTX::CALL:
+ printCall(MI, O);
+ }
+ O << ";";
+ printAnnotation(O, Annot);
+}
+
+void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
+ // The last two operands are the predicate operands
+ int RegIndex;
+ int OpIndex;
+
+ if (MI->getOpcode() == PTX::CALL) {
+ RegIndex = 0;
+ OpIndex = 1;
+ } else {
+ RegIndex = MI->getNumOperands()-2;
+ OpIndex = MI->getNumOperands()-1;
+ }
+
+ int PredOp = MI->getOperand(OpIndex).getImm();
+ if (PredOp == PTXPredicate::None)
+ return;
+
+ if (PredOp == PTXPredicate::Negate)
+ O << '!';
+ else
+ O << '@';
+
+ printOperand(MI, RegIndex, O);
+}
+
+void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
+ O << "\tcall.uni\t";
+ // The first two operands are the predicate slot
+ unsigned Index = 2;
+ unsigned NumRets = MI->getOperand(Index++).getImm();
+
+ if (NumRets > 0) {
+ O << "(";
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumRets; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ O << "), ";
+ }
+
+ const MCExpr* Expr = MI->getOperand(Index++).getExpr();
+ unsigned NumArgs = MI->getOperand(Index++).getImm();
+
+ // if the function call is to printf or puts, change to vprintf
+ if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol &Sym = SymRefExpr->getSymbol();
+ if (Sym.getName() == "printf" || Sym.getName() == "puts") {
+ O << "vprintf";
+ } else {
+ O << Sym.getName();
+ }
+ } else {
+ O << *Expr;
+ }
+
+ O << ", (";
+
+ if (NumArgs > 0) {
+ printOperand(MI, Index++, O);
+ for (unsigned i = 1; i < NumArgs; ++i) {
+ O << ", ";
+ printOperand(MI, Index++, O);
+ }
+ }
+ O << ")";
+}
+
+void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ double Imm = Op.getFPImm();
+ APFloat FPImm(Imm);
+ APInt FPIntImm = FPImm.bitcastToAPInt();
+ O << "0D";
+ // PTX requires us to output the full 64 bits, even if the number is zero
+ if (FPIntImm.getZExtValue() > 0) {
+ O << FPIntImm.toString(16, false);
+ } else {
+ O << "0000000000000000";
+ }
+ } else if (Op.isReg()) {
+ printRegName(O, Op.getReg());
+ } else {
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ const MCExpr *Expr = Op.getExpr();
+ if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+ const MCSymbol &Sym = SymRefExpr->getSymbol();
+ O << Sym.getName();
+ } else {
+ O << *Op.getExpr();
+ }
+ }
+}
+
+void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // By definition, operand OpNo+1 is an i32imm
+ const MCOperand &Op2 = MI->getOperand(OpNo+1);
+ printOperand(MI, OpNo, O);
+ if (Op2.getImm() == 0)
+ return; // don't print "+0"
+ O << "+" << Op2.getImm();
+}
+
+void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert (Op.isImm() && "Rounding modes must be immediate values");
+ switch (Op.getImm()) {
+ default:
+ llvm_unreachable("Unknown rounding mode!");
+ case PTXRoundingMode::RndDefault:
+ llvm_unreachable("FP rounding-mode pass did not handle instruction!");
+ case PTXRoundingMode::RndNone:
+ // Do not print anything.
+ break;
+ case PTXRoundingMode::RndNearestEven:
+ O << ".rn";
+ break;
+ case PTXRoundingMode::RndTowardsZero:
+ O << ".rz";
+ break;
+ case PTXRoundingMode::RndNegInf:
+ O << ".rm";
+ break;
+ case PTXRoundingMode::RndPosInf:
+ O << ".rp";
+ break;
+ case PTXRoundingMode::RndApprox:
+ O << ".approx";
+ break;
+ case PTXRoundingMode::RndNearestEvenInt:
+ O << ".rni";
+ break;
+ case PTXRoundingMode::RndTowardsZeroInt:
+ O << ".rzi";
+ break;
+ case PTXRoundingMode::RndNegInfInt:
+ O << ".rmi";
+ break;
+ case PTXRoundingMode::RndPosInfInt:
+ O << ".rpi";
+ break;
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
new file mode 100644
index 0000000..ea4d504
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
@@ -0,0 +1,45 @@
+//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints n PTX MCInst to a .ptx file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXINSTPRINTER_H
+#define PTXINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PTXInstPrinter : public MCInstPrinter {
+public:
+ PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+
+ virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printPredicate(const MCInst *MI, raw_ostream &O);
+ void printCall(const MCInst *MI, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
new file mode 100644
index 0000000..a3e0f32
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -0,0 +1,134 @@
+//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the PTX target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXBASEINFO_H
+#define PTXBASEINFO_H
+
+#include "PTXMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+ namespace PTXStateSpace {
+ enum {
+ Global = 0, // default to global state space
+ Constant = 1,
+ Local = 2,
+ Parameter = 3,
+ Shared = 4
+ };
+ } // namespace PTXStateSpace
+
+ namespace PTXPredicate {
+ enum {
+ Normal = 0,
+ Negate = 1,
+ None = 2
+ };
+ } // namespace PTXPredicate
+
+ /// Namespace to hold all target-specific flags.
+ namespace PTXRoundingMode {
+ // Instruction Flags
+ enum {
+ // Rounding Mode Flags
+ RndMask = 15,
+ RndDefault = 0, // ---
+ RndNone = 1, // <NONE>
+ RndNearestEven = 2, // .rn
+ RndTowardsZero = 3, // .rz
+ RndNegInf = 4, // .rm
+ RndPosInf = 5, // .rp
+ RndApprox = 6, // .approx
+ RndNearestEvenInt = 7, // .rni
+ RndTowardsZeroInt = 8, // .rzi
+ RndNegInfInt = 9, // .rmi
+ RndPosInfInt = 10 // .rpi
+ };
+ } // namespace PTXII
+
+ namespace PTXRegisterType {
+ // Register type encoded in MCOperands
+ enum {
+ Pred = 0,
+ B16,
+ B32,
+ B64,
+ F32,
+ F64
+ };
+ } // namespace PTXRegisterType
+
+ namespace PTXRegisterSpace {
+ // Register space encoded in MCOperands
+ enum {
+ Reg = 0,
+ Local,
+ Param,
+ Argument,
+ Return
+ };
+ }
+
+ inline static void decodeRegisterName(raw_ostream &OS,
+ unsigned EncodedReg) {
+ OS << "%";
+
+ unsigned RegSpace = EncodedReg & 0x7;
+ unsigned RegType = (EncodedReg >> 3) & 0x7;
+ unsigned RegOffset = EncodedReg >> 6;
+
+ switch (RegSpace) {
+ default:
+ llvm_unreachable("Unknown register space!");
+ case PTXRegisterSpace::Reg:
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type!");
+ case PTXRegisterType::Pred:
+ OS << "p";
+ break;
+ case PTXRegisterType::B16:
+ OS << "rh";
+ break;
+ case PTXRegisterType::B32:
+ OS << "r";
+ break;
+ case PTXRegisterType::B64:
+ OS << "rd";
+ break;
+ case PTXRegisterType::F32:
+ OS << "f";
+ break;
+ case PTXRegisterType::F64:
+ OS << "fd";
+ break;
+ }
+ break;
+ case PTXRegisterSpace::Return:
+ OS << "ret";
+ break;
+ case PTXRegisterSpace::Argument:
+ OS << "arg";
+ break;
+ }
+
+ OS << RegOffset;
+ }
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
new file mode 100644
index 0000000..cdfbc80
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -0,0 +1,37 @@
+//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+void PTXMCAsmInfo::anchor() { }
+
+PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+ Triple TheTriple(TT);
+ if (TheTriple.getArch() == Triple::ptx64)
+ PointerSize = 8;
+
+ CommentString = "//";
+
+ PrivateGlobalPrefix = "$L__";
+
+ AllowPeriodsInName = false;
+
+ HasSetDirective = false;
+
+ HasDotTypeDotSizeDirective = false;
+
+ HasSingleParameterDotFile = false;
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
new file mode 100644
index 0000000..32ca069
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MCASM_INFO_H
+#define PTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ class PTXMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
+ };
+} // namespace llvm
+
+#endif // PTX_MCASM_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
new file mode 100644
index 0000000..08fb970
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -0,0 +1,98 @@
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCTargetDesc.h"
+#include "PTXMCAsmInfo.h"
+#include "InstPrinter/PTXInstPrinter.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPTXMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPTXMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ // PTX does not have a return address register.
+ InitPTXMCRegisterInfo(X, 0);
+ return X;
+}
+
+static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitPTXMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ assert(SyntaxVariant == 0 && "We only have one syntax variant");
+ return new PTXInstPrinter(MAI, MII, MRI, STI);
+}
+
+extern "C" void LLVMInitializePTXTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+ RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
+ createPTXMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
+ createPTXMCSubtargetInfo);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
new file mode 100644
index 0000000..542638a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -0,0 +1,36 @@
+//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXMCTARGETDESC_H
+#define PTXMCTARGETDESC_H
+
+namespace llvm {
+class Target;
+
+extern Target ThePTX32Target;
+extern Target ThePTX64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "PTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PTXGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTX.h b/contrib/llvm/lib/Target/PTX/PTX.h
new file mode 100644
index 0000000..ffb92cb
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.h
@@ -0,0 +1,43 @@
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_H
+#define PTX_H
+
+#include "MCTargetDesc/PTXBaseInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MachineInstr;
+ class MCInst;
+ class PTXAsmPrinter;
+ class PTXTargetMachine;
+ class FunctionPass;
+
+ FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+
+ FunctionPass *createPTXRegisterAllocator();
+
+ void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP);
+
+} // namespace llvm;
+
+#endif // PTX_H
diff --git a/contrib/llvm/lib/Target/PTX/PTX.td b/contrib/llvm/lib/Target/PTX/PTX.td
new file mode 100644
index 0000000..994a68e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTX.td
@@ -0,0 +1,141 @@
+//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+ "Do not demote .f64 to .f32">;
+
+def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
+ "Disable Fused-Multiply Add">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+ "Use PTX Language Version 2.0">;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+ "Use PTX Language Version 2.1">;
+
+def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
+ "Use PTX Language Version 2.2">;
+
+def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
+ "Use PTX Language Version 2.3">;
+
+//===- PTX Target ---------------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
+ "Use Shader Model 1.0">;
+def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
+ "Use Shader Model 1.1">;
+def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
+ "Use Shader Model 1.2">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
+ "Use Shader Model 1.3">;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
+ "Use Shader Model 2.0", [FeatureDouble]>;
+def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
+ "Use Shader Model 2.1", [FeatureDouble]>;
+def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
+ "Use Shader Model 2.2", [FeatureDouble]>;
+def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
+ "Use Shader Model 2.3", [FeatureDouble]>;
+
+def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
+ "PTX_COMPUTE_1_0",
+ "Use Compute Compatibility 1.0">;
+def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
+ "PTX_COMPUTE_1_1",
+ "Use Compute Compatibility 1.1">;
+def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
+ "PTX_COMPUTE_1_2",
+ "Use Compute Compatibility 1.2">;
+def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
+ "PTX_COMPUTE_1_3",
+ "Use Compute Compatibility 1.3">;
+def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
+ "PTX_COMPUTE_2_0",
+ "Use Compute Compatibility 2.0",
+ [FeatureDouble]>;
+
+//===----------------------------------------------------------------------===//
+// PTX supported processors
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+// Processor definitions for compute/shader models
+def : Proc<"compute_10", [FeatureCOMPUTE10]>;
+def : Proc<"compute_11", [FeatureCOMPUTE11]>;
+def : Proc<"compute_12", [FeatureCOMPUTE12]>;
+def : Proc<"compute_13", [FeatureCOMPUTE13]>;
+def : Proc<"compute_20", [FeatureCOMPUTE20]>;
+def : Proc<"sm_10", [FeatureSM10]>;
+def : Proc<"sm_11", [FeatureSM11]>;
+def : Proc<"sm_12", [FeatureSM12]>;
+def : Proc<"sm_13", [FeatureSM13]>;
+def : Proc<"sm_20", [FeatureSM20]>;
+def : Proc<"sm_21", [FeatureSM21]>;
+def : Proc<"sm_22", [FeatureSM22]>;
+def : Proc<"sm_23", [FeatureSM23]>;
+
+// Processor definitions for common GPU architectures
+def : Proc<"g80", [FeatureSM10]>;
+def : Proc<"gt200", [FeatureSM13]>;
+def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
+def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PTXRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrInfo.td"
+
+def PTXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// PTX uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def PTXAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def PTX : Target {
+ let InstructionSet = PTXInstrInfo;
+ let AssemblyWriters = [PTXAsmWriter];
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
new file mode 100644
index 0000000..0b6ac7b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -0,0 +1,561 @@
+//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-asm-printer"
+
+#include "PTXAsmPrinter.h"
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/Argument.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static const char PARAM_PREFIX[] = "__param_";
+static const char RETURN_PREFIX[] = "__ret_";
+
+static const char *getRegisterTypeName(unsigned RegType) {
+ switch (RegType) {
+ default:
+ llvm_unreachable("Unknown register type");
+ case PTXRegisterType::Pred:
+ return ".pred";
+ case PTXRegisterType::B16:
+ return ".b16";
+ case PTXRegisterType::B32:
+ return ".b32";
+ case PTXRegisterType::B64:
+ return ".b64";
+ case PTXRegisterType::F32:
+ return ".f32";
+ case PTXRegisterType::F64:
+ return ".f64";
+ }
+}
+
+static const char *getStateSpaceName(unsigned addressSpace) {
+ switch (addressSpace) {
+ default: llvm_unreachable("Unknown state space");
+ case PTXStateSpace::Global: return "global";
+ case PTXStateSpace::Constant: return "const";
+ case PTXStateSpace::Local: return "local";
+ case PTXStateSpace::Parameter: return "param";
+ case PTXStateSpace::Shared: return "shared";
+ }
+}
+
+static const char *getTypeName(Type* type) {
+ while (true) {
+ switch (type->getTypeID()) {
+ default: llvm_unreachable("Unknown type");
+ case Type::FloatTyID: return ".f32";
+ case Type::DoubleTyID: return ".f64";
+ case Type::IntegerTyID:
+ switch (type->getPrimitiveSizeInBits()) {
+ default: llvm_unreachable("Unknown integer bit-width");
+ case 16: return ".u16";
+ case 32: return ".u32";
+ case 64: return ".u64";
+ }
+ case Type::ArrayTyID:
+ case Type::PointerTyID:
+ type = dyn_cast<SequentialType>(type)->getElementType();
+ break;
+ }
+ }
+ return NULL;
+}
+
+bool PTXAsmPrinter::doFinalization(Module &M) {
+ // XXX Temproarily remove global variables so that doFinalization() will not
+ // emit them again (global variables are emitted at beginning).
+
+ Module::GlobalListType &global_list = M.getGlobalList();
+ int i, n = global_list.size();
+ GlobalVariable **gv_array = new GlobalVariable* [n];
+
+ // first, back-up GlobalVariable in gv_array
+ i = 0;
+ for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+ I != E; ++I)
+ gv_array[i++] = &*I;
+
+ // second, empty global_list
+ while (!global_list.empty())
+ global_list.remove(global_list.begin());
+
+ // call doFinalization
+ bool ret = AsmPrinter::doFinalization(M);
+
+ // now we restore global variables
+ for (i = 0; i < n; i ++)
+ global_list.insert(global_list.end(), gv_array[i]);
+
+ delete[] gv_array;
+ return ret;
+}
+
+void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ // Emit the PTX .version and .target attributes
+ OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString());
+ OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() +
+ (ST.supportsDouble() ? ""
+ : ", map_f64_to_f32"));
+ // .address_size directive is optional, but it must immediately follow
+ // the .target directive if present within a module
+ if (ST.supportsPTX23()) {
+ const char *addrSize = ST.is64Bit() ? "64" : "32";
+ OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize);
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Define any .file directives
+ DebugInfoFinder DbgFinder;
+ DbgFinder.processModule(M);
+
+ for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+ E = DbgFinder.compile_unit_end(); I != E; ++I) {
+ DICompileUnit DIUnit(*I);
+ StringRef FN = DIUnit.getFilename();
+ StringRef Dir = DIUnit.getDirectory();
+ GetOrCreateSourceID(FN, Dir);
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // declare external functions
+ for (Module::const_iterator i = M.begin(), e = M.end();
+ i != e; ++i)
+ EmitFunctionDeclaration(i);
+
+ // declare global variables
+ for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
+ i != e; ++i)
+ EmitVariableDeclaration(i);
+}
+
+void PTXAsmPrinter::EmitFunctionBodyStart() {
+ OutStreamer.EmitRawText(Twine("{"));
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
+
+ // Print register definitions
+ SmallString<128> regDefs;
+ raw_svector_ostream os(regDefs);
+ unsigned numRegs;
+
+ // pred
+ numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .pred %p<" << numRegs << ">;\n";
+
+ // i16
+ numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
+
+ // i32
+ numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .b32 %r<" << numRegs << ">;\n";
+
+ // i64
+ numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
+
+ // f32
+ numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .f32 %f<" << numRegs << ">;\n";
+
+ // f64
+ numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
+ if(numRegs > 0)
+ os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
+
+ // Local params
+ for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
+ i != e; ++i)
+ os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i)
+ << ";\n";
+
+ OutStreamer.EmitRawText(os.str());
+
+
+ const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+ DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+ << " frame object(s)\n");
+ for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+ DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+ if (FrameInfo->getObjectSize(i) > 0) {
+ OutStreamer.EmitRawText("\t.local .align " +
+ Twine(FrameInfo->getObjectAlignment(i)) +
+ " .b8 __local" +
+ Twine(i) +
+ "[" +
+ Twine(FrameInfo->getObjectSize(i)) +
+ "];");
+ }
+ }
+
+ //unsigned Index = 1;
+ // Print parameter passing params
+ //for (PTXMachineFunctionInfo::param_iterator
+ // i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
+ // std::string def = "\t.param .b";
+ // def += utostr(*i);
+ // def += " __ret_";
+ // def += utostr(Index);
+ // Index++;
+ // def += ";";
+ // OutStreamer.EmitRawText(Twine(def));
+ //}
+}
+
+void PTXAsmPrinter::EmitFunctionBodyEnd() {
+ OutStreamer.EmitRawText(Twine("}"));
+}
+
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+ LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
+ OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(gv))
+ return;
+
+ MCSymbol *gvsym = Mang->getSymbol(gv);
+
+ assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
+
+ SmallString<128> decl;
+ raw_svector_ostream os(decl);
+
+ // check if it is defined in some other translation unit
+ if (gv->isDeclaration())
+ os << ".extern ";
+
+ // state space: e.g., .global
+ os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' ';
+
+ // alignment (optional)
+ unsigned alignment = gv->getAlignment();
+ if (alignment != 0)
+ os << ".align " << gv->getAlignment() << ' ';
+
+
+ if (PointerType::classof(gv->getType())) {
+ PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
+ Type* elementTy = pointerTy->getElementType();
+
+ if (elementTy->isArrayTy()) {
+ assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
+
+ ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ unsigned numElements = arrayTy->getNumElements();
+
+ while (elementTy->isArrayTy()) {
+ arrayTy = dyn_cast<ArrayType>(elementTy);
+ elementTy = arrayTy->getElementType();
+
+ numElements *= arrayTy->getNumElements();
+ }
+
+ // FIXME: isPrimitiveType() == false for i16?
+ assert(elementTy->isSingleValueType() &&
+ "Non-primitive types are not handled");
+
+ // Find the size of the element in bits
+ unsigned elementSize = elementTy->getPrimitiveSizeInBits();
+
+ os << ".b" << elementSize << ' ' << gvsym->getName()
+ << '[' << numElements << ']';
+ } else {
+ os << ".b8" << gvsym->getName() << "[]";
+ }
+
+ // handle string constants (assume ConstantArray means string)
+ if (gv->hasInitializer()) {
+ const Constant *C = gv->getInitializer();
+ if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+ os << " = {";
+
+ for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+ if (i > 0)
+ os << ',';
+
+ os << "0x";
+ os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+ }
+
+ os << '}';
+ }
+ }
+ } else {
+ // Note: this is currently the fall-through case and most likely generates
+ // incorrect code.
+ os << getTypeName(gv->getType()) << ' ' << gvsym->getName();
+
+ if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType()))
+ os << "[]";
+ }
+
+ os << ';';
+
+ OutStreamer.EmitRawText(os.str());
+ OutStreamer.AddBlankLine();
+}
+
+void PTXAsmPrinter::EmitFunctionEntryLabel() {
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (!CurrentFnSym->isUndefined())
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
+
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ const PTXParamManager &PM = MFI->getParamManager();
+ const bool isKernel = MFI->isKernel();
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ SmallString<128> decl;
+ raw_svector_ostream os(decl);
+ os << (isKernel ? ".entry" : ".func");
+
+ if (!isKernel) {
+ os << " (";
+ if (ST.useParamSpaceForDeviceArgs()) {
+ for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
+ b = i; i != e; ++i) {
+ if (i != b)
+ os << ", ";
+
+ os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b)
+ os << ", ";
+
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
+ << MFI->getRegisterName(*i);
+ }
+ }
+ os << ')';
+ }
+
+ // Print function name
+ os << ' ' << CurrentFnSym->getName() << " (";
+
+ const Function *F = MF->getFunction();
+
+ // Print parameters
+ if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+ /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b)
+ os << ", ";
+
+ os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
+ }*/
+ int Counter = 1;
+ for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
+ b = i; i != e; ++i) {
+ if (i != b)
+ os << ", ";
+ const Type *ArgType = (*i).getType();
+ os << ".param .b";
+ if (ArgType->isPointerTy()) {
+ if (ST.is64Bit())
+ os << "64";
+ else
+ os << "32";
+ } else {
+ os << ArgType->getPrimitiveSizeInBits();
+ }
+ if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
+ const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
+ os << " .ptr";
+ switch (PtrType->getAddressSpace()) {
+ default:
+ llvm_unreachable("Unknown address space in argument");
+ case PTXStateSpace::Global:
+ os << " .global";
+ break;
+ case PTXStateSpace::Shared:
+ os << " .shared";
+ break;
+ }
+ }
+ os << " __param_" << Counter++;
+ }
+ } else {
+ for (PTXMachineFunctionInfo::reg_iterator
+ i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
+ i != e; ++i) {
+ if (i != b)
+ os << ", ";
+
+ os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
+ << MFI->getRegisterName(*i);
+ }
+ }
+ os << ')';
+
+ OutStreamer.EmitRawText(os.str());
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func)
+{
+ const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+ std::string decl = "";
+
+ // hard-coded emission of extern vprintf function
+
+ if (func->getName() == "printf" || func->getName() == "puts") {
+ decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b";
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ decl += " __param_2, .param .b";
+ if (ST.is64Bit())
+ decl += "64";
+ else
+ decl += "32";
+ decl += " __param_3)\n";
+ }
+
+ OutStreamer.EmitRawText(Twine(decl));
+}
+
+unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName) {
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return GetOrCreateSourceID("<stdin>", StringRef());
+
+ // MCStream expects full path name as filename.
+ if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+ SmallString<128> FullPathName = DirName;
+ sys::path::append(FullPathName, FileName);
+ // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+ return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+ }
+
+ StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+ if (Entry.getValue())
+ return Entry.getValue();
+
+ unsigned SrcId = SourceIdMap.size();
+ Entry.setValue(SrcId);
+
+ // Print out a .file directive to specify files for .loc directives.
+ OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey());
+
+ return SrcId;
+}
+
+MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+ const MCSymbol *Symbol) {
+ const MCExpr *Expr;
+ Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
+ MCOperand MCOp;
+ const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+ unsigned EncodedReg;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unknown operand type");
+ case MachineOperand::MO_Register:
+ if (MO.getReg() > 0) {
+ // Encode the register
+ EncodedReg = MFI->getEncodedRegister(MO.getReg());
+ } else {
+ EncodedReg = 0;
+ }
+ MCOp = MCOperand::CreateReg(EncodedReg);
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_FPImmediate:
+ APFloat Val = MO.getFPImm()->getValueAPF();
+ bool ignored;
+ Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+ MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+ break;
+ }
+
+ return MCOp;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializePTXAsmPrinter() {
+ RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
+ RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
new file mode 100644
index 0000000..74c8d58
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXAsmPrinter.h
@@ -0,0 +1,57 @@
+//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PTX Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXASMPRINTER_H
+#define PTXASMPRINTER_H
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
+public:
+ explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const { return "PTX Assembly Printer"; }
+
+ bool doFinalization(Module &M);
+
+ virtual void EmitStartOfAsmFile(Module &M);
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ virtual void EmitFunctionEntryLabel();
+ virtual void EmitInstruction(const MachineInstr *MI);
+
+ unsigned GetOrCreateSourceID(StringRef FileName,
+ StringRef DirName);
+
+ MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCOperand lowerOperand(const MachineOperand &MO);
+
+private:
+ void EmitVariableDeclaration(const GlobalVariable *gv);
+ void EmitFunctionDeclaration(const Function* func);
+
+ StringMap<unsigned> SourceIdMap;
+}; // class PTXAsmPrinter
+} // namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
new file mode 100644
index 0000000..a21d172
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -0,0 +1,181 @@
+//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a machine function pass that sets appropriate FP rounding
+// modes for all relevant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-fp-rounding-mode"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// NOTE: PTXFPRoundingModePass should be executed just before emission.
+
+namespace {
+ /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
+ /// all FP instructions. Essentially, this pass just looks for all FP
+ /// instructions that have a rounding mode set to RndDefault, and sets an
+ /// appropriate rounding mode based on the target device.
+ ///
+ class PTXFPRoundingModePass : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ typedef std::pair<unsigned, unsigned> RndModeDesc;
+
+ PTXTargetMachine& TargetMachine;
+ DenseMap<unsigned, RndModeDesc> Instrs;
+
+ public:
+ PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID),
+ TargetMachine(TM) {
+ initializeMap();
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX FP Rounding Mode Pass";
+ }
+
+ private:
+
+ void initializeMap();
+ void processInstruction(MachineInstr &MI);
+ }; // class PTXFPRoundingModePass
+} // end anonymous namespace
+
+using namespace llvm;
+
+char PTXFPRoundingModePass::ID = 0;
+
+bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
+ // Look at each basic block
+ for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
+ ++bbi) {
+ MachineBasicBlock &MBB = *bbi;
+ // Look at each instruction
+ for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
+ ii != ie; ++ii) {
+ MachineInstr &MI = *ii;
+ processInstruction(MI);
+ }
+ }
+ return false;
+}
+
+void PTXFPRoundingModePass::initializeMap() {
+ using namespace PTXRoundingMode;
+ const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
+
+ // Build a map of default rounding mode for all instructions that need a
+ // rounding mode.
+ Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
+ Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
+
+ unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
+ Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
+
+ unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
+ Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
+ Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
+
+ Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
+
+ Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
+ Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
+
+ Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+ Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
+
+ Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
+ Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
+}
+
+void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
+ // Is this an instruction that needs a rounding mode?
+ if (Instrs.count(MI.getOpcode())) {
+ const RndModeDesc &Desc = Instrs[MI.getOpcode()];
+ // Get the rounding mode operand
+ MachineOperand &Op = MI.getOperand(Desc.first);
+ // Update the rounding mode if needed
+ if (Op.getImm() == PTXRoundingMode::RndDefault) {
+ Op.setImm(Desc.second);
+ }
+ }
+}
+
+FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXFPRoundingModePass(TM, OptLevel);
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
new file mode 100644
index 0000000..e6e268e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.cpp
@@ -0,0 +1,24 @@
+//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXFrameLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+
+void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
new file mode 100644
index 0000000..831e818
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXFrameLowering.h
@@ -0,0 +1,44 @@
+//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_FRAMEINFO_H
+#define PTX_FRAMEINFO_H
+
+#include "PTX.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+ class PTXSubtarget;
+
+class PTXFrameLowering : public TargetFrameLowering {
+protected:
+ const PTXSubtarget &STI;
+
+public:
+ explicit PTXFrameLowering(const PTXSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
+ STI(sti) {
+ }
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
new file mode 100644
index 0000000..5c7ee29
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -0,0 +1,356 @@
+//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// PTXDAGToDAGISel - PTX specific code to select PTX machine
+// instructions for SelectionDAG operations.
+class PTXDAGToDAGISel : public SelectionDAGISel {
+ public:
+ PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+ virtual const char *getPassName() const {
+ return "PTX DAG->DAG Pattern Instruction Selection";
+ }
+
+ SDNode *Select(SDNode *Node);
+
+ // Complex Pattern Selectors.
+ bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
+ bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+ bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces auto'gened from the target description
+#include "PTXGenDAGISel.inc"
+
+ private:
+ // We need this only because we can't match intruction BRAdp
+ // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
+ SDNode *SelectBRCOND(SDNode *Node);
+
+ SDNode *SelectREADPARAM(SDNode *Node);
+ SDNode *SelectWRITEPARAM(SDNode *Node);
+ SDNode *SelectFrameIndex(SDNode *Node);
+
+ bool isImm(const SDValue &operand);
+ bool SelectImm(const SDValue &operand, SDValue &imm);
+
+ const PTXSubtarget& getSubtarget() const;
+}; // class PTXDAGToDAGISel
+} // namespace
+
+// createPTXISelDag - This pass converts a legalized DAG into a
+// PTX-specific DAG, ready for instruction scheduling
+FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXDAGToDAGISel(TM, OptLevel);
+}
+
+PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(TM, OptLevel) {}
+
+SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
+ switch (Node->getOpcode()) {
+ case ISD::BRCOND:
+ return SelectBRCOND(Node);
+ case PTXISD::READ_PARAM:
+ return SelectREADPARAM(Node);
+ case PTXISD::WRITE_PARAM:
+ return SelectWRITEPARAM(Node);
+ case ISD::FrameIndex:
+ return SelectFrameIndex(Node);
+ default:
+ return SelectCode(Node);
+ }
+}
+
+SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
+ assert(Node->getNumOperands() >= 3);
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Pred = Node->getOperand(1);
+ SDValue Target = Node->getOperand(2); // branch target
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ assert(Target.getOpcode() == ISD::BasicBlock);
+ assert(Pred.getValueType() == MVT::i1);
+
+ // Emit BRAdp
+ SDValue Ops[] = { Target, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Index = Node->getOperand(1);
+
+ int OpCode;
+
+ // Get the type of parameter we are reading
+ EVT VT = Node->getValueType(0);
+ assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::READPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::READPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::READPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::READPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::READPARAMF32;
+ else {
+ assert(Type == MVT::f64 && "Unexpected type!");
+ OpCode = PTX::READPARAMF64;
+ }
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Index, Pred, PredOp, Chain };
+ return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
+}
+
+SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
+
+ SDValue Chain = Node->getOperand(0);
+ SDValue Value = Node->getOperand(1);
+
+ int OpCode;
+
+ //Node->dumpr(CurDAG);
+
+ // Get the type of parameter we are writing
+ EVT VT = Value->getValueType(0);
+ assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
+
+ MVT Type = VT.getSimpleVT();
+
+ if (Type == MVT::i1)
+ OpCode = PTX::WRITEPARAMPRED;
+ else if (Type == MVT::i16)
+ OpCode = PTX::WRITEPARAMI16;
+ else if (Type == MVT::i32)
+ OpCode = PTX::WRITEPARAMI32;
+ else if (Type == MVT::i64)
+ OpCode = PTX::WRITEPARAMI64;
+ else if (Type == MVT::f32)
+ OpCode = PTX::WRITEPARAMF32;
+ else if (Type == MVT::f64)
+ OpCode = PTX::WRITEPARAMF64;
+ else
+ llvm_unreachable("Invalid type in SelectWRITEPARAM");
+
+ SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ DebugLoc dl = Node->getDebugLoc();
+
+ SDValue Ops[] = { Value, Pred, PredOp, Chain };
+ SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
+
+ //dbgs() << "SelectWRITEPARAM produced:\n\t";
+ //Ret->dumpr(CurDAG);
+
+ return Ret;
+}
+
+SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ //dbgs() << "Selecting FrameIndex at index " << FI << "\n";
+ //SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
+
+ PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+ SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
+ Node->getValueType(0));
+
+ return FrameSymbol.getNode();
+}
+
+// Match memory operand of the form [reg+reg]
+bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
+ if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
+ isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
+ return false;
+
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ // FrameIndex addresses are handled separately
+ //errs() << "SelectADDRri: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Success\n";
+ return true;
+ }
+
+ /*if (Addr.getNumOperands() == 1) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ errs() << "Success\n";
+ return true;
+ }*/
+
+ //errs() << "SelectADDRri fails on: ";
+ //Addr.getNode()->dumpr();
+
+ if (isImm(Addr)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ //errs() << "Success\n";
+ return true;
+
+ /*if (Addr.getOpcode() != ISD::ADD) {
+ // let SelectADDRii handle the [imm] case
+ if (isImm(Addr))
+ return false;
+ // it is [reg]
+
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+ }
+
+ if (Addr.getNumOperands() < 2)
+ return false;
+
+ // let SelectADDRii handle the [imm+imm] case
+ if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
+ return false;
+
+ // try [reg+imm] and [imm+reg]
+ for (int i = 0; i < 2; i ++)
+ if (SelectImm(Addr.getOperand(1-i), Offset)) {
+ Base = Addr.getOperand(i);
+ return true;
+ }
+
+ // neither [reg+imm] nor [imm+reg]
+ return false;*/
+}
+
+// Match memory operand of the form [imm+imm] and [imm]
+bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ // is [imm+imm]?
+ if (Addr.getOpcode() == ISD::ADD) {
+ return SelectImm(Addr.getOperand(0), Base) &&
+ SelectImm(Addr.getOperand(1), Offset);
+ }
+
+ // is [imm]?
+ if (SelectImm(Addr, Base)) {
+ assert(Addr.getValueType().isSimple() && "Type must be simple");
+
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+
+ return true;
+ }
+
+ return false;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
+ SDValue &Offset) {
+ //errs() << "SelectADDRlocal: ";
+ //Addr.getNode()->dumpr();
+ if (isa<FrameIndexSDNode>(Addr)) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
+ //errs() << "Success\n";
+ return true;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ Base = Addr.getOperand(0);
+ if (!isa<FrameIndexSDNode>(Base)) {
+ //errs() << "Failure\n";
+ return false;
+ }
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ //errs() << "Offset: ";
+ //Offset.getNode()->dumpr();
+ //errs() << "Success\n";
+ return true;
+ }
+
+ //errs() << "Failure\n";
+ return false;
+}
+
+bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
+ return ConstantSDNode::classof(operand.getNode());
+}
+
+bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
+ SDNode *node = operand.getNode();
+ if (!ConstantSDNode::classof(node))
+ return false;
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(node);
+ imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
+ operand.getValueType());
+ return true;
+}
+
+const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
+{
+ return TM.getSubtarget<PTXSubtarget>();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
new file mode 100644
index 0000000..ef4455b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.cpp
@@ -0,0 +1,522 @@
+//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXISelLowering.h"
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXRegisterInfo.h"
+#include "PTXSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+ // Set up the register classes.
+ addRegisterClass(MVT::i1, PTX::RegPredRegisterClass);
+ addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
+ addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
+ addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
+ addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
+ addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+ setMinFunctionAlignment(2);
+
+ // Let LLVM use loads/stores for all mem* operations
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+ ////////////////////////////////////
+ /////////// Expansion //////////////
+ ////////////////////////////////////
+
+ // (any/zero/sign) extload => load + (any/zero/sign) extend
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+ // f32 extload => load + fextend
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+ // f64 truncstore => trunc + store
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // sign_extend_inreg => sign_extend
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // br_cc => brcond
+
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // select_cc => setcc
+
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ ////////////////////////////////////
+ //////////// Legal /////////////////
+ ////////////////////////////////////
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ ////////////////////////////////////
+ //////////// Custom ////////////////
+ ////////////////////////////////////
+
+ // customise setcc to use bitwise logic if possible
+
+ //setOperationAction(ISD::SETCC, MVT::i1, Custom);
+ setOperationAction(ISD::SETCC, MVT::i1, Legal);
+
+ // customize translation of memory addresses
+
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+
+ // Compute derived properties from the register classes
+ computeRegisterProperties();
+}
+
+EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i1;
+}
+
+SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unimplemented operand");
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ }
+}
+
+const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unknown opcode");
+ case PTXISD::COPY_ADDRESS:
+ return "PTXISD::COPY_ADDRESS";
+ case PTXISD::LOAD_PARAM:
+ return "PTXISD::LOAD_PARAM";
+ case PTXISD::STORE_PARAM:
+ return "PTXISD::STORE_PARAM";
+ case PTXISD::READ_PARAM:
+ return "PTXISD::READ_PARAM";
+ case PTXISD::WRITE_PARAM:
+ return "PTXISD::WRITE_PARAM";
+ case PTXISD::EXIT:
+ return "PTXISD::EXIT";
+ case PTXISD::RET:
+ return "PTXISD::RET";
+ case PTXISD::CALL:
+ return "PTXISD::CALL";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Custom Lower Operation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+ // Look for X == 0, X == 1, X != 0, or X != 1
+ // We can simplify these to bitwise logic
+
+ //if (Op1.getOpcode() == ISD::Constant &&
+ // (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ // cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ // (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ //
+ // return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+ //}
+
+ //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1);
+ //if(COp1 && COp1->getZExtValue() == 1) {
+ // if(CC == ISD::SETNE) {
+ // return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0);
+ // }
+ //}
+
+ llvm_unreachable("setcc was not matched by a pattern!");
+
+ return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
+}
+
+SDValue PTXTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+ assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
+
+ SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+ SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
+ dl,
+ PtrVT.getSimpleVT(),
+ targetGlobal);
+
+ return movInstr;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
+ case CallingConv::PTX_Kernel:
+ MFI->setKernel(true);
+ break;
+ case CallingConv::PTX_Device:
+ MFI->setKernel(false);
+ break;
+ }
+
+ // We do one of two things here:
+ // IsKernel || SM >= 2.0 -> Use param space for arguments
+ // SM < 2.0 -> Use registers for arguments
+ if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
+ // We just need to emit the proper LOAD_PARAM ISDs
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
+ "Kernels cannot take pred operands");
+
+ unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addArgumentParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ ParamValue);
+ InVals.push_back(ArgValue);
+ }
+ }
+ else {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ EVT RegVT = Ins[i].VT;
+ const TargetRegisterClass* TRC = getRegClassFor(RegVT);
+ unsigned RegType;
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1)
+ RegType = PTXRegisterType::Pred;
+ else if (RegVT == MVT::i16)
+ RegType = PTXRegisterType::B16;
+ else if (RegVT == MVT::i32)
+ RegType = PTXRegisterType::B32;
+ else if (RegVT == MVT::i64)
+ RegType = PTXRegisterType::B64;
+ else if (RegVT == MVT::f32)
+ RegType = PTXRegisterType::F32;
+ else if (RegVT == MVT::f64)
+ RegType = PTXRegisterType::F64;
+ else
+ llvm_unreachable("Unknown parameter type");
+
+ // Use a unique index in the instruction to prevent instruction folding.
+ // Yes, this is a hack.
+ SDValue Index = DAG.getTargetConstant(i, MVT::i32);
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+ SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
+ Index);
+
+ InVals.push_back(ArgValue);
+
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
+ }
+ }
+
+ return Chain;
+}
+
+SDValue PTXTargetLowering::
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const {
+ if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+ switch (CallConv) {
+ default:
+ llvm_unreachable("Unsupported calling convention.");
+ case CallingConv::PTX_Kernel:
+ assert(Outs.size() == 0 && "Kernel must return void.");
+ return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
+ case CallingConv::PTX_Device:
+ assert(Outs.size() <= 1 && "Can at most return one value.");
+ break;
+ }
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = MFI->getParamManager();
+
+ SDValue Flag;
+ const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
+
+ if (ST.useParamSpaceForDeviceArgs()) {
+ assert(Outs.size() < 2 && "Device functions can return at most one value");
+
+ if (Outs.size() == 1) {
+ unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
+ unsigned Param = PM.addReturnParam(ParamSize);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[0]);
+ }
+ } else {
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT RegVT = Outs[i].VT;
+ const TargetRegisterClass* TRC;
+ unsigned RegType;
+
+ // Determine which register class we need
+ if (RegVT == MVT::i1) {
+ TRC = PTX::RegPredRegisterClass;
+ RegType = PTXRegisterType::Pred;
+ }
+ else if (RegVT == MVT::i16) {
+ TRC = PTX::RegI16RegisterClass;
+ RegType = PTXRegisterType::B16;
+ }
+ else if (RegVT == MVT::i32) {
+ TRC = PTX::RegI32RegisterClass;
+ RegType = PTXRegisterType::B32;
+ }
+ else if (RegVT == MVT::i64) {
+ TRC = PTX::RegI64RegisterClass;
+ RegType = PTXRegisterType::B64;
+ }
+ else if (RegVT == MVT::f32) {
+ TRC = PTX::RegF32RegisterClass;
+ RegType = PTXRegisterType::F32;
+ }
+ else if (RegVT == MVT::f64) {
+ TRC = PTX::RegF64RegisterClass;
+ RegType = PTXRegisterType::F64;
+ }
+ else {
+ llvm_unreachable("Unknown parameter type");
+ }
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+
+ SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
+ SDValue OutReg = DAG.getRegister(Reg, RegVT);
+
+ Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
+
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
+ }
+ }
+
+ if (Flag.getNode() == 0) {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+ }
+ else {
+ return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+ }
+}
+
+SDValue
+PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool doesNotRet, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ MachineFunction& MF = DAG.getMachineFunction();
+ PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>();
+ PTXParamManager &PM = PTXMFI->getParamManager();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
+ "Calls are not handled for the target device");
+
+ // Identify the callee function
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+ const Function *function = cast<Function>(GV);
+
+ // allow non-device calls only for printf
+ bool isPrintf = function->getName() == "printf" || function->getName() == "puts";
+
+ assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) &&
+ "PTX function calls must be to PTX device functions");
+
+ unsigned outSize = isPrintf ? 2 : Outs.size();
+
+ std::vector<SDValue> Ops;
+ // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
+ Ops.resize(outSize + Ins.size() + 4);
+
+ Ops[0] = Chain;
+
+ // Identify the callee function
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
+ Ops[Ins.size()+2] = Callee;
+
+ // #Outs
+ Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32);
+
+ if (isPrintf) {
+ // first argument is the address of the global string variable in memory
+ unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits());
+ SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue0, OutVals[0]);
+ Ops[Ins.size()+4] = ParamValue0;
+
+ // alignment is the maximum size of all the arguments
+ unsigned alignment = 0;
+ for (unsigned i = 1; i < OutVals.size(); ++i) {
+ alignment = std::max(alignment,
+ OutVals[i].getValueType().getSizeInBits());
+ }
+
+ // size is the alignment multiplied by the number of arguments
+ unsigned size = alignment * (OutVals.size() - 1);
+
+ // second argument is the address of the stack object (unless no arguments)
+ unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits());
+ SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(),
+ MVT::Other);
+ Ops[Ins.size()+5] = ParamValue1;
+
+ if (size > 0)
+ {
+ // create a local stack object to store the arguments
+ unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false);
+ SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy());
+
+ // store each of the arguments to the stack in turn
+ for (unsigned int i = 1; i != OutVals.size(); i++) {
+ SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy()));
+ Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr,
+ MachinePointerInfo(),
+ false, false, 0);
+ }
+
+ // copy the address of the local frame index to get the address in non-local space
+ SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex);
+
+ // store this address in the second argument
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr);
+ }
+ }
+ else
+ {
+ // Generate STORE_PARAM nodes for each function argument. In PTX, function
+ // arguments are explicitly stored into .param variables and passed as
+ // arguments. There is no register/stack-based calling convention in PTX.
+ for (unsigned i = 0; i != OutVals.size(); ++i) {
+ unsigned Size = OutVals[i].getValueType().getSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+ ParamValue, OutVals[i]);
+ Ops[i+Ins.size()+4] = ParamValue;
+ }
+ }
+
+ std::vector<SDValue> InParams;
+
+ // Generate list of .param variables to hold the return value(s).
+ Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ unsigned Size = Ins[i].VT.getStoreSizeInBits();
+ unsigned Param = PM.addLocalParam(Size);
+ const std::string &ParamName = PM.getParamName(Param);
+ SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+ MVT::Other);
+ Ops[i+2] = ParamValue;
+ InParams.push_back(ParamValue);
+ }
+
+ Ops[0] = Chain;
+
+ // Create the CALL node.
+ Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
+
+ // Create the LOAD_PARAM nodes that retrieve the function return value(s).
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+ InParams[i]);
+ InVals.push_back(Load);
+ }
+
+ return Chain;
+}
+
+unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
+ // All arguments consist of one "register," regardless of the type.
+ return 1;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXISelLowering.h b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
new file mode 100644
index 0000000..33220f4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXISelLowering.h
@@ -0,0 +1,82 @@
+//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_ISEL_LOWERING_H
+#define PTX_ISEL_LOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+namespace PTXISD {
+ enum NodeType {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ LOAD_PARAM,
+ STORE_PARAM,
+ READ_PARAM,
+ WRITE_PARAM,
+ EXIT,
+ RET,
+ COPY_ADDRESS,
+ CALL
+ };
+} // namespace PTXISD
+
+class PTXTargetLowering : public TargetLowering {
+ public:
+ explicit PTXTargetLowering(TargetMachine &TM);
+
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool doesNotRet, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
+
+ private:
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+}; // class PTXTargetLowering
+} // namespace llvm
+
+#endif // PTX_ISEL_LOWERING_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
new file mode 100644
index 0000000..267e834
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrFormats.td
@@ -0,0 +1,51 @@
+//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Rounding Mode Specifier
+/*class RoundingMode<bits<3> val> {
+ bits<3> Value = val;
+}
+
+def RndDefault : RoundingMode<0>;
+def RndNearestEven : RoundingMode<1>;
+def RndNearestZero : RoundingMode<2>;
+def RndNegInf : RoundingMode<3>;
+def RndPosInf : RoundingMode<4>;
+def RndApprox : RoundingMode<5>;*/
+
+
+// Rounding Mode Operand
+def RndMode : Operand<i32> {
+ let PrintMethod = "printRoundingMode";
+}
+
+def RndDefault : PatLeaf<(i32 0)>;
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
+// Leave PrintMethod empty; predicate printing is defined elsewhere.
+def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
+ (ops (i1 zero_reg), (i32 2))>;
+
+def RndModeOperand : Operand<OtherVT> {
+ let MIOperandInfo = (ops i32imm);
+}
+
+// Instruction Types
+let Namespace = "PTX" in {
+
+ class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
+ : Instruction {
+ dag OutOperandList = oops;
+ dag InOperandList = !con(iops, (ins pred:$_p));
+ let AsmString = asmstr; // Predicate printing is defined elsewhere.
+ let Pattern = pattern;
+ let isPredicable = 1;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
new file mode 100644
index 0000000..443cd54
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.cpp
@@ -0,0 +1,359 @@
+//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-instrinfo"
+
+#include "PTXInstrInfo.h"
+#include "PTX.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PTXGenInstrInfo.inc"
+
+using namespace llvm;
+
+PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
+ : PTXGenInstrInfo(),
+ RI(_TM, *this), TM(_TM) {}
+
+static const struct map_entry {
+ const TargetRegisterClass *cls;
+ const int opcode;
+} map[] = {
+ { &PTX::RegI16RegClass, PTX::MOVU16rr },
+ { &PTX::RegI32RegClass, PTX::MOVU32rr },
+ { &PTX::RegI64RegClass, PTX::MOVU64rr },
+ { &PTX::RegF32RegClass, PTX::MOVF32rr },
+ { &PTX::RegF64RegClass, PTX::MOVF64rr },
+ { &PTX::RegPredRegClass, PTX::MOVPREDrr }
+};
+
+void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const {
+
+ const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ //assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
+ // "Invalid register copy between two register classes");
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
+ if (map[i].cls == MRI.getRegClass(DstReg)) {
+ const MCInstrDesc &MCID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
+ addReg(SrcReg, getKillRegState(KillSrc));
+ AddDefaultPredicate(MI);
+ return;
+ }
+ }
+
+ llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
+ if (DstRC != SrcRC)
+ return false;
+
+ for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+ if (DstRC == map[i].cls) {
+ const MCInstrDesc &MCID = get(map[i].opcode);
+ MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
+ AddDefaultPredicate(MI);
+ return true;
+ }
+
+ return false;
+}
+
+bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case PTX::MOVU16rr:
+ case PTX::MOVU32rr:
+ case PTX::MOVU64rr:
+ case PTX::MOVF32rr:
+ case PTX::MOVF64rr:
+ case PTX::MOVPREDrr:
+ assert(MI.getNumOperands() >= 2 &&
+ MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+ "Invalid register-register move instruction");
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+}
+
+// predicate support
+
+bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
+ int i = MI->findFirstPredOperandIdx();
+ return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
+}
+
+bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ return !isPredicated(MI) && MI->isTerminator();
+}
+
+bool PTXInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ if (Pred.size() < 2)
+ llvm_unreachable("lesser than 2 predicate operands are provided");
+
+ int i = MI->findFirstPredOperandIdx();
+ if (i == -1)
+ llvm_unreachable("missing predicate operand");
+
+ MI->getOperand(i).setReg(Pred[0].getReg());
+ MI->getOperand(i+1).setImm(Pred[1].getImm());
+
+ return true;
+}
+
+bool PTXInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ const MachineOperand &PredReg1 = Pred1[0];
+ const MachineOperand &PredReg2 = Pred2[0];
+ if (PredReg1.getReg() != PredReg2.getReg())
+ return false;
+
+ const MachineOperand &PredOp1 = Pred1[1];
+ const MachineOperand &PredOp2 = Pred2[1];
+ if (PredOp1.getImm() != PredOp2.getImm())
+ return false;
+
+ return true;
+}
+
+bool PTXInstrInfo::
+DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // If an instruction sets a predicate register, it defines a predicate.
+
+ // TODO supprot 5-operand format of setp instruction
+
+ if (MI->getNumOperands() < 1)
+ return false;
+
+ const MachineOperand &MO = MI->getOperand(0);
+
+ if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
+ return false;
+
+ Pred.push_back(MO);
+ Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
+ return true;
+}
+
+// branch support
+
+bool PTXInstrInfo::
+AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // TODO implement cases when AllowModify is true
+
+ if (MBB.empty())
+ return true;
+
+ MachineBasicBlock::iterator iter = MBB.end();
+ const MachineInstr& instLast1 = *--iter;
+ // for special case that MBB has only 1 instruction
+ const bool IsSizeOne = MBB.size() == 1;
+ // if IsSizeOne is true, *--iter and instLast2 are invalid
+ // we put a dummy value in instLast2 and desc2 since they are used
+ const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
+
+ DEBUG(dbgs() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n");
+ DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n");
+
+ // this block ends with no branches
+ if (!IsAnyKindOfBranch(instLast1)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
+ return false;
+ }
+
+ // this block ends with only an unconditional branch
+ if (instLast1.isUnconditionalBranch() &&
+ // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
+ (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
+ TBB = GetBranchTarget(instLast1);
+ return false;
+ }
+
+ // this block ends with a conditional branch and
+ // it falls through to a successor block
+ if (instLast1.isConditionalBranch() &&
+ IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
+ TBB = GetBranchTarget(instLast1);
+ int i = instLast1.findFirstPredOperandIdx();
+ Cond.push_back(instLast1.getOperand(i));
+ Cond.push_back(instLast1.getOperand(i+1));
+ return false;
+ }
+
+ // when IsSizeOne is true, we are done
+ if (IsSizeOne)
+ return true;
+
+ // this block ends with a conditional branch
+ // followed by an unconditional branch
+ if (instLast2.isConditionalBranch() &&
+ instLast1.isUnconditionalBranch()) {
+ DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
+ TBB = GetBranchTarget(instLast2);
+ FBB = GetBranchTarget(instLast1);
+ int i = instLast2.findFirstPredOperandIdx();
+ Cond.push_back(instLast2.getOperand(i));
+ Cond.push_back(instLast2.getOperand(i+1));
+ return false;
+ }
+
+ // branch cannot be understood
+ DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
+ return true;
+}
+
+unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ unsigned count = 0;
+ while (!MBB.empty())
+ if (IsAnyKindOfBranch(MBB.back())) {
+ MBB.pop_back();
+ ++count;
+ } else
+ break;
+ DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
+ return count;
+}
+
+unsigned PTXInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
+ DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: TBB: (NULL)\n");
+ DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
+ << "\n";
+ else dbgs() << "InsertBranch: FBB: (NULL)\n");
+ DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
+
+ assert(TBB && "TBB is NULL");
+
+ if (FBB) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
+ return 2;
+ } else if (Cond.size()) {
+ BuildMI(&MBB, DL, get(PTX::BRAdp))
+ .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
+ return 1;
+ } else {
+ BuildMI(&MBB, DL, get(PTX::BRAd))
+ .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
+ return 1;
+ }
+}
+
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ llvm_unreachable("storeRegToStackSlot should not be called for PTX");
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ llvm_unreachable("loadRegFromStackSlot should not be called for PTX");
+}
+
+// static helper routines
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ SDValue ops[] = { Op1, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+MachineSDNode *PTXInstrInfo::
+GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
+ SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
+ SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
+ SDValue ops[] = { Op1, Op2, predReg, predOp };
+ return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+}
+
+void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
+ if (MI->findFirstPredOperandIdx() == -1) {
+ MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
+ MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
+ }
+}
+
+bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
+ return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
+}
+
+bool PTXInstrInfo::
+IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
+ for (MachineBasicBlock::const_succ_iterator
+ i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
+ if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
+ return true;
+ return false;
+}
+
+MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
+ // FIXME So far all branch instructions put destination in 1st operand
+ const MachineOperand& target = inst.getOperand(0);
+ assert(target.isMBB() && "FIXME: detect branch target operand");
+ return target.getMBB();
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
new file mode 100644
index 0000000..fba89c0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.h
@@ -0,0 +1,133 @@
+//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_INSTR_INFO_H
+#define PTX_INSTR_INFO_H
+
+#include "PTXRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PTXGenInstrInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+
+class MachineSDNode;
+class SDValue;
+class SelectionDAG;
+
+class PTXInstrInfo : public PTXGenInstrInfo {
+private:
+ const PTXRegisterInfo RI;
+ PTXTargetMachine &TM;
+
+public:
+ explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+ virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DstReg, unsigned SrcReg,
+ const TargetRegisterClass *DstRC,
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
+
+ virtual bool isMoveInstr(const MachineInstr& MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ // predicate support
+
+ virtual bool isPredicated(const MachineInstr *MI) const;
+
+ virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+ virtual
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const;
+
+ virtual
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+ virtual bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const;
+
+ // PTX is fully-predicable
+ virtual bool isPredicable(MachineInstr *MI) const { return true; }
+
+ // branch support
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ // Memory operand folding for spills
+ // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+ // loadRegFromStackSlot. Doing so will get rid of the "stack" registers
+ // we currently use to spill, though I doubt the overall effect on ptxas
+ // output will be large. I have yet to see a case where ptxas is unable
+ // to see through the "stack" register usage and hence generates
+ // efficient code anyway.
+ // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ // MachineInstr* MI,
+ // const SmallVectorImpl<unsigned> &Ops,
+ // int FrameIndex) const;
+
+ virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass* RC,
+ const TargetRegisterInfo* TRI) const;
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MII,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ // static helper routines
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1);
+
+ static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+ DebugLoc dl, EVT VT,
+ SDValue Op1, SDValue Op2);
+
+ static void AddDefaultPredicate(MachineInstr *MI);
+
+ static bool IsAnyKindOfBranch(const MachineInstr& inst);
+
+ static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
+
+ static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
+}; // class PTXInstrInfo
+} // namespace llvm
+
+#endif // PTX_INSTR_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
new file mode 100644
index 0000000..bead428
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrInfo.td
@@ -0,0 +1,1031 @@
+//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Code Generation Predicates
+//===----------------------------------------------------------------------===//
+
+// Shader Model Support
+def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
+def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
+def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
+def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
+
+// PTX Version Support
+def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">;
+def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">;
+def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">;
+def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">;
+def SupportsPTX23 : Predicate<"getSubtarget().supportsPTX23()">;
+def DoesNotSupportPTX23 : Predicate<"!getSubtarget().supportsPTX23()">;
+
+// Fused-Multiply Add
+def SupportsFMA : Predicate<"getSubtarget().supportsFMA()">;
+def DoesNotSupportFMA : Predicate<"!getSubtarget().supportsFMA()">;
+
+
+
+// def SDT_PTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+// def SDT_PTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// def PTXcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PTXCallSeqStart,
+// [SDNPHasChain, SDNPOutGlue]>;
+// def PTXcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PTXCallSeqEnd,
+// [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def PTXcall : SDNode<"PTXISD::CALL", SDTNone,
+ [SDNPHasChain, SDNPVariadic, SDNPOptInGlue, SDNPOutGlue]>;
+
+
+// Branch & call targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// PTX Specific Node Definitions
+//===----------------------------------------------------------------------===//
+
+// PTX allow generic 3-reg shifts like shl r0, r1, r2
+def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
+def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
+def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
+
+def PTXexit
+ : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
+def PTXret
+ : SDNode<"PTXISD::RET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def PTXcopyaddress
+ : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// For floating-point instructions, we cannot just embed the pattern into the
+// instruction definition since we need to muck around with the rounding mode,
+// and I do not know how to insert constants into instructions directly from
+// pattern matches.
+
+//===- Floating-Point Instructions - 2 Operand Form -----------------------===//
+multiclass PTX_FLOAT_2OP<string opcstr> {
+ def rr32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
+ def ri32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, f32imm:$a),
+ !strconcat(opcstr, "$r.f32\t$d, $a"), []>;
+ def rr64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
+ def ri64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, f64imm:$a),
+ !strconcat(opcstr, "$r.f64\t$d, $a"), []>;
+}
+
+//===- Floating-Point Instructions - 3 Operand Form -----------------------===//
+multiclass PTX_FLOAT_3OP<string opcstr> {
+ def rr32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, RegF32:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
+ def ri32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, f32imm:$b),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b"), []>;
+ def rr64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, RegF64:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
+ def ri64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, f64imm:$b),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b"), []>;
+}
+
+//===- Floating-Point Instructions - 4 Operand Form -----------------------===//
+multiclass PTX_FLOAT_4OP<string opcstr> {
+ def rrr32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, RegF32:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rri32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, RegF32:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rii32 : InstPTX<(outs RegF32:$d),
+ (ins RndMode:$r, RegF32:$a, f32imm:$b, f32imm:$c),
+ !strconcat(opcstr, "$r.f32\t$d, $a, $b, $c"), []>;
+ def rrr64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, RegF64:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+ def rri64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, RegF64:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+ def rii64 : InstPTX<(outs RegF64:$d),
+ (ins RndMode:$r, RegF64:$a, f64imm:$b, f64imm:$c),
+ !strconcat(opcstr, "$r.f64\t$d, $a, $b, $c"), []>;
+}
+
+//===- Integer Instructions - 3 Operand Form ------------------------------===//
+multiclass PTX_INT3<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".u16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".u32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".u64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Integer Instructions - 3 Operand Form (Signed) ---------------------===//
+multiclass PTX_INT3_SIGNED<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".s16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".s32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".s64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Bitwise Logic Instructions - 3 Operand Form ------------------------===//
+multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
+ def ripreds : InstPTX<(outs RegPred:$d),
+ (ins RegPred:$a, i1imm:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>;
+ def rrpreds : InstPTX<(outs RegPred:$d),
+ (ins RegPred:$a, RegPred:$b),
+ !strconcat(opcstr, ".pred\t$d, $a, $b"),
+ [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>;
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, ".b16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, ".b32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, ".b64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+}
+
+//===- Integer Shift Instructions - 3 Operand Form ------------------------===//
+multiclass PTX_INT3ntnc<string opcstr, SDNode opnode> {
+ def rr16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, RegI16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+ def rr32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, RegI32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+ def rr64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, RegI64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+ def ri16 : InstPTX<(outs RegI16:$d),
+ (ins RegI16:$a, i16imm:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+ def ri32 : InstPTX<(outs RegI32:$d),
+ (ins RegI32:$a, i32imm:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+ def ri64 : InstPTX<(outs RegI64:$d),
+ (ins RegI64:$a, i64imm:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+ def ir16 : InstPTX<(outs RegI16:$d),
+ (ins i16imm:$a, RegI16:$b),
+ !strconcat(opcstr, "16\t$d, $a, $b"),
+ [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>;
+ def ir32 : InstPTX<(outs RegI32:$d),
+ (ins i32imm:$a, RegI32:$b),
+ !strconcat(opcstr, "32\t$d, $a, $b"),
+ [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>;
+ def ir64 : InstPTX<(outs RegI64:$d),
+ (ins i64imm:$a, RegI64:$b),
+ !strconcat(opcstr, "64\t$d, $a, $b"),
+ [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
+}
+
+//===- Set Predicate Instructions (Int) - 3/4 Operand Forms ---------------===//
+multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
+ CondCode cmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>;
+ def ri
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>;
+
+ def rr_and_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_and_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
+ def rr_or_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_or_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
+ def rr_xor_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
+ def ri_xor_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ RegPred:$c))]>;
+
+ def rr_and_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
+ def ri_and_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
+ def rr_or_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
+ def ri_or_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
+ def rr_xor_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp),
+ (not RegPred:$c)))]>;
+ def ri_xor_not_r
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp),
+ (not RegPred:$c)))]>;
+}
+
+//===- Set Predicate Instructions (FP) - 3/4 Operand Form -----------------===//
+multiclass PTX_SETP_FP<RegisterClass RC, string regclsname, Operand immcls,
+ CondCode ucmp, CondCode ocmp, string cmpstr> {
+ // TODO support 5-operand format: p|q, a, b, c
+
+ def rr_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+ def rr_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+
+ def ri_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ucmp))]>;
+ def ri_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
+ !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
+ [(set RegPred:$p, (setcc RC:$a, fpimm:$b, ocmp))]>;
+
+ def rr_and_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
+ def rr_and_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
+
+ def rr_or_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
+ def rr_or_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
+
+ def rr_xor_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ RegPred:$c))]>;
+ def rr_xor_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, $c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ RegPred:$c))]>;
+
+ def rr_and_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
+ def rr_and_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".and.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
+
+ def rr_or_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
+ def rr_or_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".or.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
+
+ def rr_xor_not_r_u
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, "u.xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp),
+ (not RegPred:$c)))]>;
+ def rr_xor_not_r_o
+ : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
+ !strconcat("setp.", cmpstr, ".xor.", regclsname,
+ "\t$p, $a, $b, !$c"),
+ [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp),
+ (not RegPred:$c)))]>;
+}
+
+//===- Select Predicate Instructions - 4 Operand Form ---------------------===//
+multiclass PTX_SELP<RegisterClass RC, string regclsname, Operand immcls,
+ SDNode immnode> {
+ def rr
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
+ def ri
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, RC:$b, immnode:$c))]>;
+ def ii
+ : InstPTX<(outs RC:$r), (ins RegPred:$a, immcls:$b, immcls:$c),
+ !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
+ [(set RC:$r, (select RegPred:$a, immnode:$b, immnode:$c))]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+///===- Integer Arithmetic Instructions -----------------------------------===//
+
+defm ADD : PTX_INT3<"add", add>;
+defm SUB : PTX_INT3<"sub", sub>;
+defm MUL : PTX_INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies
+defm DIV : PTX_INT3<"div", udiv>;
+defm SDIV : PTX_INT3_SIGNED<"div", sdiv>;
+defm REM : PTX_INT3<"rem", urem>;
+
+///===- Floating-Point Arithmetic Instructions ----------------------------===//
+
+// FNEG
+defm FNEG : PTX_FLOAT_2OP<"neg">;
+
+// Standard Binary Operations
+defm FADD : PTX_FLOAT_3OP<"add">;
+defm FSUB : PTX_FLOAT_3OP<"sub">;
+defm FMUL : PTX_FLOAT_3OP<"mul">;
+defm FDIV : PTX_FLOAT_3OP<"div">;
+
+// Multi-operation hybrid instructions
+defm FMAD : PTX_FLOAT_4OP<"mad">, Requires<[SupportsFMA]>;
+
+
+///===- Floating-Point Intrinsic Instructions -----------------------------===//
+
+// SQRT
+def FSQRTrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sqrt$r.f32\t$d, $a", []>;
+def FSQRTrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+def FSQRTri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sqrt$r.f64\t$d, $a", []>;
+
+// SIN
+def FSINrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "sin$r.f32\t$d, $a", []>;
+def FSINrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "sin$r.f64\t$d, $a", []>;
+def FSINri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "sin$r.f64\t$d, $a", []>;
+
+// COS
+def FCOSrr32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF32:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSri32 : InstPTX<(outs RegF32:$d), (ins RndMode:$r, f32imm:$a),
+ "cos$r.f32\t$d, $a", []>;
+def FCOSrr64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegF64:$a),
+ "cos$r.f64\t$d, $a", []>;
+def FCOSri64 : InstPTX<(outs RegF64:$d), (ins RndMode:$r, f64imm:$a),
+ "cos$r.f64\t$d, $a", []>;
+
+
+
+
+///===- Comparison and Selection Instructions -----------------------------===//
+
+// .setp
+
+// Compare u16
+
+defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ, "eq">;
+defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE, "ne">;
+defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT, "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE, "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT, "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE, "ge">;
+
+// Compare u32
+
+defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ, "eq">;
+defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE, "ne">;
+defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT, "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE, "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT, "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE, "ge">;
+
+// Compare u64
+
+defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ, "eq">;
+defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE, "ne">;
+defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT, "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE, "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT, "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE, "ge">;
+
+// Compare f32
+
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", f32imm, SETUGE, SETOGE, "ge">;
+
+// Compare f64
+
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", f64imm, SETUGE, SETOGE, "ge">;
+
+// .selp
+
+defm SELPi16 : PTX_SELP<RegI16, "u16", i16imm, imm>;
+defm SELPi32 : PTX_SELP<RegI32, "u32", i32imm, imm>;
+defm SELPi64 : PTX_SELP<RegI64, "u64", i64imm, imm>;
+defm SELPf32 : PTX_SELP<RegF32, "f32", f32imm, fpimm>;
+defm SELPf64 : PTX_SELP<RegF64, "f64", f64imm, fpimm>;
+
+///===- Logic and Shift Instructions --------------------------------------===//
+
+defm SHL : PTX_INT3ntnc<"shl.b", PTXshl>;
+defm SRL : PTX_INT3ntnc<"shr.u", PTXsrl>;
+defm SRA : PTX_INT3ntnc<"shr.s", PTXsra>;
+
+defm AND : PTX_LOGIC<"and", and>;
+defm OR : PTX_LOGIC<"or", or>;
+defm XOR : PTX_LOGIC<"xor", xor>;
+
+///===- Data Movement and Conversion Instructions -------------------------===//
+
+// any_extend
+// Implement the anyext instruction in terms of the PTX cvt instructions.
+//def : Pat<(i32 (anyext RegI16:$a)), (CVT_u32_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI16:$a)), (CVT_u64_u16 RegI16:$a)>;
+//def : Pat<(i64 (anyext RegI32:$a)), (CVT_u64_u32 RegI32:$a)>;
+
+// bitconvert
+// These instructions implement the bit-wise conversion between integer and
+// floating-point types.
+def MOVi32f32
+ : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "mov.b32\t$d, $a", []>;
+def MOVf32i32
+ : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "mov.b32\t$d, $a", []>;
+def MOVi64f64
+ : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "mov.b64\t$d, $a", []>;
+def MOVf64i64
+ : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "mov.b64\t$d, $a", []>;
+
+let neverHasSideEffects = 1 in {
+ def MOVPREDrr
+ : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
+ def MOVU16rr
+ : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
+ def MOVU32rr
+ : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
+ def MOVU64rr
+ : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
+ def MOVF32rr
+ : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
+ def MOVF64rr
+ : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVPREDri
+ : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+ [(set RegPred:$d, imm:$a)]>;
+ def MOVU16ri
+ : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+ [(set RegI16:$d, imm:$a)]>;
+ def MOVU32ri
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RegI32:$d, imm:$a)]>;
+ def MOVU64ri
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RegI64:$d, imm:$a)]>;
+ def MOVF32ri
+ : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+ [(set RegF32:$d, fpimm:$a)]>;
+ def MOVF64ri
+ : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+ [(set RegF64:$d, fpimm:$a)]>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def MOVaddr32
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+ [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+ def MOVaddr64
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+ [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+ def MOVframe32
+ : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a",
+ [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>;
+ def MOVframe64
+ : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a",
+ [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>;
+}
+
+// PTX cvt instructions
+// Note all of these may actually be used, we just define all possible patterns
+// here (that make sense).
+// FIXME: Can we collapse this somehow into a multiclass def?
+
+// To i16
+def CVTu16u32
+ : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a", []>;
+def CVTu16u64
+ : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a", []>;
+def CVTu16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u16.f32\t$d, $a", []>;
+def CVTs16f32
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s16.f32\t$d, $a", []>;
+def CVTu16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u16.f64\t$d, $a", []>;
+def CVTs16f64
+ : InstPTX<(outs RegI16:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s16.f64\t$d, $a", []>;
+
+// To i32
+def CVTu32u16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a", []>;
+def CVTs32s16
+ : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.s32.s16\t$d, $a", []>;
+def CVTu32u64
+ : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a", []>;
+def CVTu32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u32.f32\t$d, $a", []>;
+def CVTs32f32
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s32.f32\t$d, $a", []>;
+def CVTu32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u32.f64\t$d, $a", []>;
+def CVTs32f64
+ : InstPTX<(outs RegI32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s32.f64\t$d, $a", []>;
+
+// To i64
+def CVTu64u16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a", []>;
+def CVTs64s16
+ : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.s64.s16\t$d, $a", []>;
+def CVTu64u32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a", []>;
+def CVTs64s32
+ : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.s64.s32\t$d, $a", []>;
+def CVTu64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.u64.f32\t$d, $a", []>;
+def CVTs64f32
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF32:$a),
+ "cvt$r.s64.f32\t$d, $a", []>;
+def CVTu64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.u64.f64\t$d, $a", []>;
+def CVTs64f64
+ : InstPTX<(outs RegI64:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.s64.f64\t$d, $a", []>;
+
+// To f32
+def CVTf32u16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.u16\t$d, $a", []>;
+def CVTf32s16
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f32.s16\t$d, $a", []>;
+def CVTf32u32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.u32\t$d, $a", []>;
+def CVTf32s32
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f32.s32\t$d, $a", []>;
+def CVTf32u64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.u64\t$d, $a", []>;
+def CVTf32s64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f32.s64\t$d, $a", []>;
+def CVTf32f64
+ : InstPTX<(outs RegF32:$d), (ins RndMode:$r, RegF64:$a),
+ "cvt$r.f32.f64\t$d, $a", []>;
+
+// To f64
+def CVTf64u16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.u16\t$d, $a", []>;
+def CVTf64s16
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI16:$a),
+ "cvt$r.f64.s16\t$d, $a", []>;
+def CVTf64u32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.u32\t$d, $a", []>;
+def CVTf64s32
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI32:$a),
+ "cvt$r.f64.s32\t$d, $a", []>;
+def CVTf64u64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.u64\t$d, $a", []>;
+def CVTf64s64
+ : InstPTX<(outs RegF64:$d), (ins RndMode:$r, RegI64:$a),
+ "cvt$r.f64.s64\t$d, $a", []>;
+def CVTf64f32
+ : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a", []>;
+
+ ///===- Control Flow Instructions -----------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+ def BRAd
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ // FIXME: The pattern part is blank because I cannot (or do not yet know
+ // how to) use the first operand of PredicateOperand (a RegPred register) here
+ // When this is revisited, make sure to also look at LowerSETCC and try to
+ // fold it into negated predicates, if possible.
+ def BRAdp
+ : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
+ [/*(brcond pred:$_p, bb:$d)*/]>;
+}
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+ def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+ def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>;
+}
+
+let hasSideEffects = 1 in {
+ def CALL : InstPTX<(outs), (ins), "call", [(PTXcall)]>;
+}
+
+///===- Parameter Passing Pseudo-Instructions -----------------------------===//
+
+def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
+ "mov.pred\t$a, %arg$b", []>;
+def READPARAMI16 : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
+ "mov.b16\t$a, %arg$b", []>;
+def READPARAMI32 : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
+ "mov.b32\t$a, %arg$b", []>;
+def READPARAMI64 : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
+ "mov.b64\t$a, %arg$b", []>;
+def READPARAMF32 : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
+ "mov.f32\t$a, %arg$b", []>;
+def READPARAMF64 : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
+ "mov.f64\t$a, %arg$b", []>;
+
+def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
+def WRITEPARAMI16 : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
+def WRITEPARAMI32 : InstPTX<(outs), (ins RegI32:$a), "//w", []>;
+def WRITEPARAMI64 : InstPTX<(outs), (ins RegI64:$a), "//w", []>;
+def WRITEPARAMF32 : InstPTX<(outs), (ins RegF32:$a), "//w", []>;
+def WRITEPARAMF64 : InstPTX<(outs), (ins RegF64:$a), "//w", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selection Patterns
+//===----------------------------------------------------------------------===//
+
+// FADD
+def : Pat<(f32 (fadd RegF32:$a, RegF32:$b)),
+ (FADDrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fadd RegF32:$a, fpimm:$b)),
+ (FADDri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, RegF64:$b)),
+ (FADDrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fadd RegF64:$a, fpimm:$b)),
+ (FADDri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FSUB
+def : Pat<(f32 (fsub RegF32:$a, RegF32:$b)),
+ (FSUBrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fsub RegF32:$a, fpimm:$b)),
+ (FSUBri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, RegF64:$b)),
+ (FSUBrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fsub RegF64:$a, fpimm:$b)),
+ (FSUBri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL
+def : Pat<(f32 (fmul RegF32:$a, RegF32:$b)),
+ (FMULrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fmul RegF32:$a, fpimm:$b)),
+ (FMULri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, RegF64:$b)),
+ (FMULrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fmul RegF64:$a, fpimm:$b)),
+ (FMULri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FDIV
+def : Pat<(f32 (fdiv RegF32:$a, RegF32:$b)),
+ (FDIVrr32 RndDefault, RegF32:$a, RegF32:$b)>;
+def : Pat<(f32 (fdiv RegF32:$a, fpimm:$b)),
+ (FDIVri32 RndDefault, RegF32:$a, fpimm:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, RegF64:$b)),
+ (FDIVrr64 RndDefault, RegF64:$a, RegF64:$b)>;
+def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
+ (FDIVri64 RndDefault, RegF64:$a, fpimm:$b)>;
+
+// FMUL+FADD
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>,
+ Requires<[SupportsFMA]>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
+ (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
+def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
+ (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
+ (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>,
+ Requires<[SupportsFMA]>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
+def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
+ (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>,
+ Requires<[SupportsFMA]>;
+
+// FNEG
+def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fneg fpimm:$a)), (FNEGri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fneg RegF64:$a)), (FNEGrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fneg fpimm:$a)), (FNEGri64 RndDefault, fpimm:$a)>;
+
+// FSQRT
+def : Pat<(f32 (fsqrt RegF32:$a)), (FSQRTrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsqrt fpimm:$a)), (FSQRTri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsqrt RegF64:$a)), (FSQRTrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsqrt fpimm:$a)), (FSQRTri64 RndDefault, fpimm:$a)>;
+
+// FSIN
+def : Pat<(f32 (fsin RegF32:$a)), (FSINrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fsin fpimm:$a)), (FSINri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fsin RegF64:$a)), (FSINrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fsin fpimm:$a)), (FSINri64 RndDefault, fpimm:$a)>;
+
+// FCOS
+def : Pat<(f32 (fcos RegF32:$a)), (FCOSrr32 RndDefault, RegF32:$a)>;
+def : Pat<(f32 (fcos fpimm:$a)), (FCOSri32 RndDefault, fpimm:$a)>;
+def : Pat<(f64 (fcos RegF64:$a)), (FCOSrr64 RndDefault, RegF64:$a)>;
+def : Pat<(f64 (fcos fpimm:$a)), (FCOSri64 RndDefault, fpimm:$a)>;
+
+// Type conversion notes:
+// - PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+// - PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero. This follows
+// the C convention that any non-zero value is equivalent to 'true'.
+
+// Conversion to pred
+def : Pat<(i1 (trunc RegI16:$a)), (SETPGTu16ri RegI16:$a, 0)>;
+def : Pat<(i1 (trunc RegI32:$a)), (SETPGTu32ri RegI32:$a, 0)>;
+def : Pat<(i1 (trunc RegI64:$a)), (SETPGTu64ri RegI64:$a, 0)>;
+def : Pat<(i1 (fp_to_uint RegF32:$a)), (SETPGTu32ri (MOVi32f32 RegF32:$a), 0)>;
+def : Pat<(i1 (fp_to_uint RegF64:$a)), (SETPGTu64ri (MOVi64f64 RegF64:$a), 0)>;
+
+// Conversion to u16
+def : Pat<(i16 (anyext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (sext RegPred:$a)), (SELPi16ii RegPred:$a, 0xFFFF, 0)>;
+def : Pat<(i16 (zext RegPred:$a)), (SELPi16ii RegPred:$a, 1, 0)>;
+def : Pat<(i16 (trunc RegI32:$a)), (CVTu16u32 RegI32:$a)>;
+def : Pat<(i16 (trunc RegI64:$a)), (CVTu16u64 RegI64:$a)>;
+def : Pat<(i16 (fp_to_uint RegF32:$a)), (CVTu16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_sint RegF32:$a)), (CVTs16f32 RndDefault, RegF32:$a)>;
+def : Pat<(i16 (fp_to_uint RegF64:$a)), (CVTu16f64 RndDefault, RegF64:$a)>;
+def : Pat<(i16 (fp_to_sint RegF64:$a)), (CVTs16f64 RndDefault, RegF64:$a)>;
+
+// Conversion to u32
+def : Pat<(i32 (anyext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (sext RegPred:$a)), (SELPi32ii RegPred:$a, 0xFFFFFFFF, 0)>;
+def : Pat<(i32 (zext RegPred:$a)), (SELPi32ii RegPred:$a, 1, 0)>;
+def : Pat<(i32 (anyext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (sext RegI16:$a)), (CVTs32s16 RegI16:$a)>;
+def : Pat<(i32 (zext RegI16:$a)), (CVTu32u16 RegI16:$a)>;
+def : Pat<(i32 (trunc RegI64:$a)), (CVTu32u64 RegI64:$a)>;
+def : Pat<(i32 (fp_to_uint RegF32:$a)), (CVTu32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_sint RegF32:$a)), (CVTs32f32 RndDefault, RegF32:$a)>;
+def : Pat<(i32 (fp_to_uint RegF64:$a)), (CVTu32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (fp_to_sint RegF64:$a)), (CVTs32f64 RndDefault, RegF64:$a)>;
+def : Pat<(i32 (bitconvert RegF32:$a)), (MOVi32f32 RegF32:$a)>;
+
+// Conversion to u64
+def : Pat<(i64 (anyext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (sext RegPred:$a)), (SELPi64ii RegPred:$a,
+ 0xFFFFFFFFFFFFFFFF, 0)>;
+def : Pat<(i64 (zext RegPred:$a)), (SELPi64ii RegPred:$a, 1, 0)>;
+def : Pat<(i64 (anyext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (sext RegI16:$a)), (CVTs64s16 RegI16:$a)>;
+def : Pat<(i64 (zext RegI16:$a)), (CVTu64u16 RegI16:$a)>;
+def : Pat<(i64 (anyext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (sext RegI32:$a)), (CVTs64s32 RegI32:$a)>;
+def : Pat<(i64 (zext RegI32:$a)), (CVTu64u32 RegI32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF32:$a)), (CVTu64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_sint RegF32:$a)), (CVTs64f32 RndDefault, RegF32:$a)>;
+def : Pat<(i64 (fp_to_uint RegF64:$a)), (CVTu64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (fp_to_sint RegF64:$a)), (CVTs64f64 RndDefault, RegF64:$a)>;
+def : Pat<(i64 (bitconvert RegF64:$a)), (MOVi64f64 RegF64:$a)>;
+
+// Conversion to f32
+def : Pat<(f32 (uint_to_fp RegPred:$a)), (SELPf32rr RegPred:$a,
+ (MOVf32i32 0x3F800000), (MOVf32i32 0))>;
+def : Pat<(f32 (uint_to_fp RegI16:$a)), (CVTf32u16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (sint_to_fp RegI16:$a)), (CVTf32s16 RndDefault, RegI16:$a)>;
+def : Pat<(f32 (uint_to_fp RegI32:$a)), (CVTf32u32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (sint_to_fp RegI32:$a)), (CVTf32s32 RndDefault, RegI32:$a)>;
+def : Pat<(f32 (uint_to_fp RegI64:$a)), (CVTf32u64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (sint_to_fp RegI64:$a)), (CVTf32s64 RndDefault, RegI64:$a)>;
+def : Pat<(f32 (fround RegF64:$a)), (CVTf32f64 RndDefault, RegF64:$a)>;
+def : Pat<(f32 (bitconvert RegI32:$a)), (MOVf32i32 RegI32:$a)>;
+
+// Conversion to f64
+def : Pat<(f64 (uint_to_fp RegPred:$a)), (SELPf64rr RegPred:$a,
+ (MOVf64i64 0x3F80000000000000), (MOVf64i64 0))>;
+def : Pat<(f64 (uint_to_fp RegI16:$a)), (CVTf64u16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (sint_to_fp RegI16:$a)), (CVTf64s16 RndDefault, RegI16:$a)>;
+def : Pat<(f64 (uint_to_fp RegI32:$a)), (CVTf64u32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (sint_to_fp RegI32:$a)), (CVTf64s32 RndDefault, RegI32:$a)>;
+def : Pat<(f64 (uint_to_fp RegI64:$a)), (CVTf64u64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
+def : Pat<(f64 (fextend RegF32:$a)), (CVTf64f32 RegF32:$a)>;
+def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
+
+// setcc - predicate inversion for branch conditions
+def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)),
+ (XORripreds RegPred:$a, imm:$b)>;
+
+///===- Intrinsic Instructions --------------------------------------------===//
+include "PTXIntrinsicInstrInfo.td"
+
+///===- Load/Store Instructions -------------------------------------------===//
+include "PTXInstrLoadStore.td"
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
new file mode 100644
index 0000000..7a62684
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXInstrLoadStore.td
@@ -0,0 +1,278 @@
+//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX load/store instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Addressing Predicates
+// We have to differentiate between 32- and 64-bit pointer types
+def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
+def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments for Loads/Stores
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Constant;
+ return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+def store_global
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Global;
+ return false;
+}]>;
+
+def store_shared
+ : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+ const Value *Src;
+ const PointerType *PT;
+ if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+ (PT = dyn_cast<PointerType>(Src->getType())))
+ return PT->getAddressSpace() == PTXStateSpace::Shared;
+ return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
+def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
+
+// Address operands
+def MEMri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops RegI64, i64imm);
+}
+def LOCALri32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def LOCALri64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+def MEMii32 : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+ let PrintMethod = "printParamOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+def MEMret : Operand<i32> {
+ let PrintMethod = "printReturnOperand";
+ let MIOperandInfo = (ops i32imm);
+}
+
+
+// Load/store .param space
+def PTXloadparam
+ : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+ : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+def PTXreadparam
+ : SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXwriteparam
+ : SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Classes for loads/stores
+//===----------------------------------------------------------------------===//
+multiclass PTX_LD<string opstr, string typestr,
+ RegisterClass RC, PatFrag pat_load> {
+ def rr32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRrr64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs RC:$d),
+ (ins MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs RC:$d),
+ (ins MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRri64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs RC:$d),
+ (ins MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii32:$a))]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs RC:$d),
+ (ins MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (pat_load ADDRii64:$a))]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
+ PatFrag pat_store> {
+ def rr32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def rr64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRrr64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ri32 : InstPTX<(outs),
+ (ins RC:$d, MEMri32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ri64 : InstPTX<(outs),
+ (ins RC:$d, MEMri64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRri64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+ def ii32 : InstPTX<(outs),
+ (ins RC:$d, MEMii32:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii32:$a)]>,
+ Requires<[Use32BitAddresses]>;
+ def ii64 : InstPTX<(outs),
+ (ins RC:$d, MEMii64:$a),
+ !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+ [(pat_store RC:$d, ADDRii64:$a)]>,
+ Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
+ def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal32:$a))]>;
+ def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
+ !strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (load_global ADDRlocal64:$a))]>;
+ def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal32:$a)]>;
+ def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
+ !strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
+ [(store_global RC:$d, ADDRlocal64:$a)]>;
+}
+
+multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
+ let hasSideEffects = 1 in {
+ def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
+ !strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
+ [(set RC:$d, (PTXloadparam texternalsym:$a))]>;
+ def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
+ !strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
+ [(PTXstoreparam texternalsym:$d, RC:$a)]>;
+ }
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+ defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+ defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+ defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+ defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+ defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+ defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+ defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+ defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+ defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+ defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for loads/stores
+//===----------------------------------------------------------------------===//
+
+// Global/shared stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// Global/shared/constant loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// Param loads/stores
+defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
+defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
+defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
+defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
+defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
+defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
+
+// Local loads/stores
+defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
+defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
+defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
+defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
+defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
+defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
new file mode 100644
index 0000000..3416f1c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -0,0 +1,110 @@
+//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsic instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
+ : InstPTX<(outs RegI64:$d), (ins),
+ !strconcat("mov.u64\t$d, %", regname),
+ [(set RegI64:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
+ : InstPTX<(outs RegI32:$d), (ins),
+ !strconcat("mov.u32\t$d, %", regname),
+ [(set RegI32:$d, (intop))]>;
+
+// TODO Add read vector-version of special registers
+
+//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
+// int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
+ int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
+ int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
+ int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
+ int_ptx_read_tid_w>;
+
+//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
+// int_ptx_read_ntid_r64>;
+def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
+ int_ptx_read_ntid_x>;
+def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
+ int_ptx_read_ntid_y>;
+def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
+ int_ptx_read_ntid_z>;
+def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
+ int_ptx_read_ntid_w>;
+
+def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
+ int_ptx_read_laneid>;
+def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
+ int_ptx_read_warpid>;
+def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
+ int_ptx_read_nwarpid>;
+
+//def PTX_READ_CTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
+def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
+ int_ptx_read_ctaid_x>;
+def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
+ int_ptx_read_ctaid_y>;
+def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
+ int_ptx_read_ctaid_z>;
+def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
+ int_ptx_read_ctaid_w>;
+
+//def PTX_READ_NCTAID_R64 :
+//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
+def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
+ int_ptx_read_nctaid_x>;
+def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
+ int_ptx_read_nctaid_y>;
+def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
+ int_ptx_read_nctaid_z>;
+def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
+ int_ptx_read_nctaid_w>;
+
+def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
+ int_ptx_read_smid>;
+def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
+ int_ptx_read_nsmid>;
+def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
+ int_ptx_read_gridid>;
+
+def PTX_READ_LANEMASK_EQ
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
+def PTX_READ_LANEMASK_LE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
+def PTX_READ_LANEMASK_LT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
+def PTX_READ_LANEMASK_GE
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
+def PTX_READ_LANEMASK_GT
+ : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
+
+def PTX_READ_CLOCK
+ : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
+def PTX_READ_CLOCK64
+ : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
+
+def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
+def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
+def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
+def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
+ [(int_ptx_bar_sync imm:$i)]>;
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
new file mode 100644
index 0000000..3ed67a6
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -0,0 +1,556 @@
+//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class PTXMCAsmStreamer : public MCStreamer {
+ formatted_raw_ostream &OS;
+ const MCAsmInfo &MAI;
+ OwningPtr<MCInstPrinter> InstPrinter;
+ OwningPtr<MCCodeEmitter> Emitter;
+
+ SmallString<128> CommentToEmit;
+ raw_svector_ostream CommentStream;
+
+ unsigned IsVerboseAsm : 1;
+ unsigned ShowInst : 1;
+
+public:
+ PTXMCAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &os,
+ bool isVerboseAsm, bool useLoc,
+ MCInstPrinter *printer,
+ MCCodeEmitter *emitter,
+ bool showInst)
+ : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+ InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
+ IsVerboseAsm(isVerboseAsm),
+ ShowInst(showInst) {
+ if (InstPrinter && IsVerboseAsm)
+ InstPrinter->setCommentStream(CommentStream);
+ }
+
+ ~PTXMCAsmStreamer() {}
+
+ inline void EmitEOL() {
+ // If we don't have any comments, just emit a \n.
+ if (!IsVerboseAsm) {
+ OS << '\n';
+ return;
+ }
+ EmitCommentsAndEOL();
+ }
+ void EmitCommentsAndEOL();
+
+ /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+ /// all.
+ virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+ /// hasRawTextSupport - We support EmitRawText.
+ virtual bool hasRawTextSupport() const { return true; }
+
+ /// AddComment - Add a comment that can be emitted to the generated .s
+ /// file if applicable as a QoI issue to make the output of the compiler
+ /// more readable. This only affects the MCAsmStreamer, and only when
+ /// verbose assembly output is enabled.
+ virtual void AddComment(const Twine &T);
+
+ /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+ virtual void AddEncodingComment(const MCInst &Inst);
+
+ /// GetCommentOS - Return a raw_ostream that comments can be written to.
+ /// Unlike AddComment, you are required to terminate comments with \n if you
+ /// use this method.
+ virtual raw_ostream &GetCommentOS() {
+ if (!IsVerboseAsm)
+ return nulls(); // Discard comments unless in verbose asm mode.
+ return CommentStream;
+ }
+
+ /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+ virtual void AddBlankLine() {
+ EmitEOL();
+ }
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void ChangeSection(const MCSection *Section);
+ virtual void InitSections() { /* PTX does not use sections */ }
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+
+ virtual void EmitThumbFunc(MCSymbol *Func);
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+ virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+
+ virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize);
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+ ///
+ /// @param Symbol - The common symbol to emit.
+ /// @param Size - The size of the common symbol.
+ /// @param ByteAlignment - The alignment of the common symbol in bytes.
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0);
+
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+ virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace);
+ virtual void EmitULEB128Value(const MCExpr *Value);
+ virtual void EmitSLEB128Value(const MCExpr *Value);
+ virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace);
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
+ virtual bool EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+ StringRef Filename);
+
+ virtual void EmitInstruction(const MCInst &Inst);
+
+ /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+ /// the specified string in the output .s file. This capability is
+ /// indicated by the hasRawTextSupport() predicate.
+ virtual void EmitRawText(StringRef String);
+
+ virtual void FinishImpl();
+
+ /// @}
+
+}; // class PTXMCAsmStreamer
+
+}
+
+/// TODO: Add appropriate implementation of Emit*() methods when needed
+
+void PTXMCAsmStreamer::AddComment(const Twine &T) {
+ if (!IsVerboseAsm) return;
+
+ // Make sure that CommentStream is flushed.
+ CommentStream.flush();
+
+ T.toVector(CommentToEmit);
+ // Each comment goes on its own line.
+ CommentToEmit.push_back('\n');
+
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+void PTXMCAsmStreamer::EmitCommentsAndEOL() {
+ if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+ OS << '\n';
+ return;
+ }
+
+ CommentStream.flush();
+ StringRef Comments = CommentToEmit.str();
+
+ assert(Comments.back() == '\n' &&
+ "Comment array not newline terminated");
+ do {
+ // Emit a line of comments.
+ OS.PadToColumn(MAI.getCommentColumn());
+ size_t Position = Comments.find('\n');
+ OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+ Comments = Comments.substr(Position+1);
+ } while (!Comments.empty());
+
+ CommentToEmit.clear();
+ // Tell the comment stream that the vector changed underneath it.
+ CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+ assert(Bytes && "Invalid size!");
+ return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+}
+
+void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(getCurrentSection() && "Cannot emit before setting section!");
+
+ OS << *Symbol << MAI.getLabelSuffix();
+ EmitEOL();
+ Symbol->setSection(*getCurrentSection());
+}
+
+void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+
+void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
+
+void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << *Symbol << " = " << *Value;
+ EmitEOL();
+
+ // FIXME: Lift context changes into super class.
+ Symbol->setVariableValue(Value);
+}
+
+void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
+ const MCSymbol *Symbol) {
+ OS << ".weakref " << *Alias << ", " << *Symbol;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+ const MCSymbol *LastLabel,
+ const MCSymbol *Label,
+ unsigned PointerSize) {
+ report_fatal_error("Unimplemented.");
+}
+
+void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {}
+
+void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
+
+void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
+
+void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+
+void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size, unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
+ MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+ OS << '"';
+
+ for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+ unsigned char C = Data[i];
+ if (C == '"' || C == '\\') {
+ OS << '\\' << (char)C;
+ continue;
+ }
+
+ if (isprint((unsigned char)C)) {
+ OS << (char)C;
+ continue;
+ }
+
+ switch (C) {
+ case '\b': OS << "\\b"; break;
+ case '\f': OS << "\\f"; break;
+ case '\n': OS << "\\n"; break;
+ case '\r': OS << "\\r"; break;
+ case '\t': OS << "\\t"; break;
+ default:
+ OS << '\\';
+ OS << toOctal(C >> 6);
+ OS << toOctal(C >> 3);
+ OS << toOctal(C >> 0);
+ break;
+ }
+ }
+
+ OS << '"';
+}
+
+void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ if (Data.empty()) return;
+
+ if (Data.size() == 1) {
+ OS << MAI.getData8bitsDirective(AddrSpace);
+ OS << (unsigned)(unsigned char)Data[0];
+ EmitEOL();
+ return;
+ }
+
+ // If the data ends with 0 and the target supports .asciz, use it, otherwise
+ // use .ascii
+ if (MAI.getAscizDirective() && Data.back() == 0) {
+ OS << MAI.getAscizDirective();
+ Data = Data.substr(0, Data.size()-1);
+ } else {
+ OS << MAI.getAsciiDirective();
+ }
+
+ OS << ' ';
+ PrintQuotedString(Data, OS);
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+ const char *Directive = 0;
+ switch (Size) {
+ default: break;
+ case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+ case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+ case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+ case 8:
+ Directive = MAI.getData64bitsDirective(AddrSpace);
+ // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+ if (Directive) break;
+ int64_t IntValue;
+ if (!Value->EvaluateAsAbsolute(IntValue))
+ report_fatal_error("Don't know how to emit this value.");
+ if (getContext().getAsmInfo().isLittleEndian()) {
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ } else {
+ EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+ EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+ }
+ return;
+ }
+
+ assert(Directive && "Invalid size for machine code value!");
+ OS << Directive << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
+ assert(MAI.hasLEB128() && "Cannot print a .uleb");
+ OS << ".uleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
+ assert(MAI.hasLEB128() && "Cannot print a .sleb");
+ OS << ".sleb128 " << *Value;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ assert(MAI.getGPRel32Directive() != 0);
+ OS << MAI.getGPRel32Directive() << *Value;
+ EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue. This implements directives such as '.space'.
+void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ if (NumBytes == 0) return;
+
+ if (AddrSpace == 0)
+ if (const char *ZeroDirective = MAI.getZeroDirective()) {
+ OS << ZeroDirective << NumBytes;
+ if (FillValue != 0)
+ OS << ',' << (int)FillValue;
+ EmitEOL();
+ return;
+ }
+
+ // Emit a byte at a time.
+ MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // Some assemblers don't support non-power of two alignments, so we always
+ // emit alignments as a power of two if possible.
+ if (isPowerOf2_32(ByteAlignment)) {
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << MAI.getAlignDirective(); break;
+ // FIXME: use MAI for this!
+ case 2: OS << ".p2alignw "; break;
+ case 4: OS << ".p2alignl "; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ if (MAI.getAlignmentIsInBytes())
+ OS << ByteAlignment;
+ else
+ OS << Log2_32(ByteAlignment);
+
+ if (Value || MaxBytesToEmit) {
+ OS << ", 0x";
+ OS.write_hex(truncateToSize(Value, ValueSize));
+
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ }
+ EmitEOL();
+ return;
+ }
+
+ // Non-power of two alignment. This is not widely supported by assemblers.
+ // FIXME: Parameterize this based on MAI.
+ switch (ValueSize) {
+ default: llvm_unreachable("Invalid size for machine code value!");
+ case 1: OS << ".balign"; break;
+ case 2: OS << ".balignw"; break;
+ case 4: OS << ".balignl"; break;
+ case 8: llvm_unreachable("Unsupported alignment size!");
+ }
+
+ OS << ' ' << ByteAlignment;
+ OS << ", " << truncateToSize(Value, ValueSize);
+ if (MaxBytesToEmit)
+ OS << ", " << MaxBytesToEmit;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {}
+
+bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {return false;}
+
+
+void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
+ assert(MAI.hasSingleParameterDotFile());
+ OS << "\t.file\t";
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+}
+
+// FIXME: should we inherit from MCAsmStreamer?
+bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Directory,
+ StringRef Filename) {
+ if (!Directory.empty()) {
+ if (sys::path::is_absolute(Filename))
+ return EmitDwarfFileDirective(FileNo, "", Filename);
+ SmallString<128> FullPathName = Directory;
+ sys::path::append(FullPathName, Filename);
+ return EmitDwarfFileDirective(FileNo, "", FullPathName);
+ }
+
+ OS << "\t.file\t" << FileNo << ' ';
+ PrintQuotedString(Filename, OS);
+ EmitEOL();
+ return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
+}
+
+void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
+
+void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+ assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+ // Show the encoding in a comment if we have a code emitter.
+ if (Emitter)
+ AddEncodingComment(Inst);
+
+ // Show the MCInst if enabled.
+ if (ShowInst) {
+ Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+ GetCommentOS() << "\n";
+ }
+
+ // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+ if (InstPrinter)
+ InstPrinter->printInst(&Inst, OS, "");
+ else
+ Inst.print(OS, &MAI);
+ EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file. This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void PTXMCAsmStreamer::EmitRawText(StringRef String) {
+ if (!String.empty() && String.back() == '\n')
+ String = String.substr(0, String.size()-1);
+ OS << String;
+ EmitEOL();
+}
+
+void PTXMCAsmStreamer::FinishImpl() {}
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Context,
+ formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory,
+ MCInstPrinter *IP,
+ MCCodeEmitter *CE, MCAsmBackend *MAB,
+ bool ShowInst) {
+ return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+ IP, CE, ShowInst);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
new file mode 100644
index 0000000..142e639
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMCInstLower.cpp
@@ -0,0 +1,32 @@
+//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PTX MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXAsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ PTXAsmPrinter &AP) {
+ OutMI.setOpcode(MI->getOpcode());
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ MCOperand MCOp;
+ OutMI.addOperand(AP.lowerOperand(MO));
+ }
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
new file mode 100644
index 0000000..172a0e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -0,0 +1,85 @@
+//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an information extractor for PTX machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-mf-info-extract"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "PTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+// NOTE: PTXMFInfoExtract must after register allocation!
+
+namespace {
+ /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
+ /// function information for PTXAsmPrinter
+ ///
+ class PTXMFInfoExtract : public MachineFunctionPass {
+ private:
+ static char ID;
+
+ public:
+ PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+ : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "PTX Machine Function Info Extractor";
+ }
+ }; // class PTXMFInfoExtract
+} // end anonymous namespace
+
+using namespace llvm;
+
+char PTXMFInfoExtract::ID = 0;
+
+bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
+ PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Generate list of all virtual registers used in this function
+ for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
+ unsigned RegType;
+ if (TRC == PTX::RegPredRegisterClass)
+ RegType = PTXRegisterType::Pred;
+ else if (TRC == PTX::RegI16RegisterClass)
+ RegType = PTXRegisterType::B16;
+ else if (TRC == PTX::RegI32RegisterClass)
+ RegType = PTXRegisterType::B32;
+ else if (TRC == PTX::RegI64RegisterClass)
+ RegType = PTXRegisterType::B64;
+ else if (TRC == PTX::RegF32RegisterClass)
+ RegType = PTXRegisterType::F32;
+ else if (TRC == PTX::RegF64RegisterClass)
+ RegType = PTXRegisterType::F64;
+ else
+ llvm_unreachable("Unkown register class.");
+ MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
+ }
+
+ return false;
+}
+
+FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new PTXMFInfoExtract(TM, OptLevel);
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.cpp
new file mode 100644
index 0000000..60acfc7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void PTXMachineFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
new file mode 100644
index 0000000..bb7574c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -0,0 +1,202 @@
+//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PTX-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MACHINE_FUNCTION_INFO_H
+#define PTX_MACHINE_FUNCTION_INFO_H
+
+#include "PTX.h"
+#include "PTXParamManager.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private PTX target-specific information for each MachineFunction.
+///
+class PTXMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+ bool IsKernel;
+ DenseSet<unsigned> RegArgs;
+ DenseSet<unsigned> RegRets;
+
+ typedef DenseMap<int, std::string> FrameMap;
+
+ FrameMap FrameSymbols;
+
+ struct RegisterInfo {
+ unsigned Reg;
+ unsigned Type;
+ unsigned Space;
+ unsigned Offset;
+ unsigned Encoded;
+ };
+
+ typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
+
+ RegisterInfoMap RegInfo;
+
+ PTXParamManager ParamManager;
+
+public:
+ typedef DenseSet<unsigned>::const_iterator reg_iterator;
+
+ PTXMachineFunctionInfo(MachineFunction &MF)
+ : IsKernel(false) {
+ }
+
+ /// getParamManager - Returns the PTXParamManager instance for this function.
+ PTXParamManager& getParamManager() { return ParamManager; }
+ const PTXParamManager& getParamManager() const { return ParamManager; }
+
+ /// setKernel/isKernel - Gets/sets a flag that indicates if this function is
+ /// a PTX kernel function.
+ void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
+ bool isKernel() const { return IsKernel; }
+
+ /// argreg_begin/argreg_end - Returns iterators to the set of registers
+ /// containing function arguments.
+ reg_iterator argreg_begin() const { return RegArgs.begin(); }
+ reg_iterator argreg_end() const { return RegArgs.end(); }
+
+ /// retreg_begin/retreg_end - Returns iterators to the set of registers
+ /// containing the function return values.
+ reg_iterator retreg_begin() const { return RegRets.begin(); }
+ reg_iterator retreg_end() const { return RegRets.end(); }
+
+ /// addRegister - Adds a virtual register to the set of all used registers
+ void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
+ if (!RegInfo.count(Reg)) {
+ RegisterInfo Info;
+ Info.Reg = Reg;
+ Info.Type = RegType;
+ Info.Space = RegSpace;
+
+ // Determine register offset
+ Info.Offset = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
+ e = RegInfo.end(); i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Space == RegSpace)
+ if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
+ Info.Offset++;
+ }
+
+ // Encode the register data into a single register number
+ Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
+
+ RegInfo[Reg] = Info;
+
+ if (RegSpace == PTXRegisterSpace::Argument)
+ RegArgs.insert(Reg);
+ else if (RegSpace == PTXRegisterSpace::Return)
+ RegRets.insert(Reg);
+ }
+ }
+
+ /// countRegisters - Returns the number of registers of the given type and
+ /// space.
+ unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
+ unsigned Count = 0;
+ for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
+ i != e; ++i) {
+ const RegisterInfo& RI = i->second;
+ if (RI.Type == RegType && RI.Space == RegSpace)
+ Count++;
+ }
+ return Count;
+ }
+
+ /// getEncodedRegister - Returns the encoded value of the register.
+ unsigned getEncodedRegister(unsigned Reg) const {
+ return RegInfo.lookup(Reg).Encoded;
+ }
+
+ /// addRetReg - Adds a register to the set of return-value registers.
+ void addRetReg(unsigned Reg) {
+ if (!RegRets.count(Reg)) {
+ RegRets.insert(Reg);
+ }
+ }
+
+ /// addArgReg - Adds a register to the set of function argument registers.
+ void addArgReg(unsigned Reg) {
+ RegArgs.insert(Reg);
+ }
+
+ /// getRegisterName - Returns the name of the specified virtual register. This
+ /// name is used during PTX emission.
+ std::string getRegisterName(unsigned Reg) const {
+ if (RegInfo.count(Reg)) {
+ const RegisterInfo& RI = RegInfo.lookup(Reg);
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, RI.Encoded);
+ NameStr.flush();
+ return Name;
+ }
+ else if (Reg == PTX::NoRegister)
+ return "%noreg";
+ else
+ llvm_unreachable("Register not in register name map");
+ }
+
+ /// getEncodedRegisterName - Returns the name of the encoded register.
+ std::string getEncodedRegisterName(unsigned EncodedReg) const {
+ std::string Name;
+ raw_string_ostream NameStr(Name);
+ decodeRegisterName(NameStr, EncodedReg);
+ NameStr.flush();
+ return Name;
+ }
+
+ /// getRegisterType - Returns the type of the specified virtual register.
+ unsigned getRegisterType(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Type;
+ else
+ llvm_unreachable("Unknown register");
+ }
+
+ /// getOffsetForRegister - Returns the offset of the virtual register
+ unsigned getOffsetForRegister(unsigned Reg) const {
+ if (RegInfo.count(Reg))
+ return RegInfo.lookup(Reg).Offset;
+ else
+ return 0;
+ }
+
+ /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
+ const char* getFrameSymbol(int FrameIndex) {
+ if (FrameSymbols.count(FrameIndex)) {
+ return FrameSymbols.lookup(FrameIndex).c_str();
+ } else {
+ std::string Name = "__local";
+ Name += utostr(FrameIndex);
+ // The whole point of caching this name is to ensure the pointer we pass
+ // to any getExternalSymbol() calls will remain valid for the lifetime of
+ // the back-end instance. This is to work around an issue in SelectionDAG
+ // where symbol names are expected to be life-long strings.
+ FrameSymbols[FrameIndex] = Name;
+ return FrameSymbols[FrameIndex].c_str();
+ }
+ }
+}; // class PTXMachineFunctionInfo
+} // namespace llvm
+
+#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
new file mode 100644
index 0000000..cc1cc71
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.cpp
@@ -0,0 +1,73 @@
+//===-- PTXParamManager.cpp - Manager for .param variables ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXParamManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXParamManager.h"
+#include "PTX.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+PTXParamManager::PTXParamManager() {
+}
+
+unsigned PTXParamManager::addArgumentParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_ARGUMENT;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__param_";
+ Name += utostr(ArgumentParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ArgumentParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addReturnParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_RETURN;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__ret_";
+ Name += utostr(ReturnParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ ReturnParams.push_back(Index);
+
+ return Index;
+}
+
+unsigned PTXParamManager::addLocalParam(unsigned Size) {
+ PTXParam Param;
+ Param.Type = PTX_PARAM_TYPE_LOCAL;
+ Param.Size = Size;
+
+ std::string Name;
+ Name = "__localparam_";
+ Name += utostr(LocalParams.size()+1);
+ Param.Name = Name;
+
+ unsigned Index = AllParams.size();
+ AllParams[Index] = Param;
+ LocalParams.push_back(Index);
+
+ return Index;
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXParamManager.h b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
new file mode 100644
index 0000000..92e7728
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXParamManager.h
@@ -0,0 +1,87 @@
+//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTXParamManager class, which manages all defined .param
+// variables for a particular function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_PARAM_MANAGER_H
+#define PTX_PARAM_MANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+
+namespace llvm {
+
+/// PTXParamManager - This class manages all .param variables defined for a
+/// particular function.
+class PTXParamManager {
+private:
+
+ /// PTXParamType - Type of a .param variable
+ enum PTXParamType {
+ PTX_PARAM_TYPE_ARGUMENT,
+ PTX_PARAM_TYPE_RETURN,
+ PTX_PARAM_TYPE_LOCAL
+ };
+
+ /// PTXParam - Definition of a PTX .param variable
+ struct PTXParam {
+ PTXParamType Type;
+ unsigned Size;
+ std::string Name;
+ };
+
+ DenseMap<unsigned, PTXParam> AllParams;
+ SmallVector<unsigned, 4> ArgumentParams;
+ SmallVector<unsigned, 4> ReturnParams;
+ SmallVector<unsigned, 4> LocalParams;
+
+public:
+
+ typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
+
+ PTXParamManager();
+
+ param_iterator arg_begin() const { return ArgumentParams.begin(); }
+ param_iterator arg_end() const { return ArgumentParams.end(); }
+ param_iterator ret_begin() const { return ReturnParams.begin(); }
+ param_iterator ret_end() const { return ReturnParams.end(); }
+ param_iterator local_begin() const { return LocalParams.begin(); }
+ param_iterator local_end() const { return LocalParams.end(); }
+
+ /// addArgumentParam - Returns a new .param used as an argument.
+ unsigned addArgumentParam(unsigned Size);
+
+ /// addReturnParam - Returns a new .param used as a return argument.
+ unsigned addReturnParam(unsigned Size);
+
+ /// addLocalParam - Returns a new .param used as a local .param variable.
+ unsigned addLocalParam(unsigned Size);
+
+ /// getParamName - Returns the name of the parameter as a string.
+ const std::string &getParamName(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Name;
+ }
+
+ /// getParamSize - Returns the size of the parameter in bits.
+ unsigned getParamSize(unsigned Param) const {
+ assert(AllParams.count(Param) == 1 && "Param has not been defined!");
+ return AllParams.find(Param)->second.Size;
+ }
+
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
new file mode 100644
index 0000000..7fd5375
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegAlloc.cpp
@@ -0,0 +1,53 @@
+//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a register allocator for PTX code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-reg-alloc"
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+
+using namespace llvm;
+
+namespace {
+ // Special register allocator for PTX.
+ class PTXRegAlloc : public MachineFunctionPass {
+ public:
+ static char ID;
+ PTXRegAlloc() : MachineFunctionPass(ID) {}
+
+ virtual const char* getPassName() const {
+ return "PTX Register Allocator";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // We do not actually do anything (at least not yet).
+ return false;
+ }
+ };
+
+ char PTXRegAlloc::ID = 0;
+
+ static RegisterRegAlloc
+ ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
+}
+
+FunctionPass *llvm::createPTXRegisterAllocator() {
+ return new PTXRegAlloc();
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
new file mode 100644
index 0000000..b6ffd38
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -0,0 +1,38 @@
+//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXRegisterInfo.h"
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
+ const TargetInstrInfo &tii)
+ // PTX does not have a return address register.
+ : PTXGenRegisterInfo(0), TII(tii) {
+}
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/,
+ int /*SPAdj*/,
+ RegScavenger * /*RS*/) const {
+ llvm_unreachable("FrameIndex should have been previously eliminated!");
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
new file mode 100644
index 0000000..5614ce7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.h
@@ -0,0 +1,56 @@
+//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_REGISTER_INFO_H
+#define PTX_REGISTER_INFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_HEADER
+#include "PTXGenRegisterInfo.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+class MachineFunction;
+
+struct PTXRegisterInfo : public PTXGenRegisterInfo {
+private:
+ const TargetInstrInfo &TII;
+
+public:
+ PTXRegisterInfo(PTXTargetMachine &TM,
+ const TargetInstrInfo &tii);
+
+ virtual const uint16_t
+ *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
+ static const uint16_t CalleeSavedRegs[] = { 0 };
+ return CalleeSavedRegs; // save nothing
+ }
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ return Reserved; // reserve no regs
+ }
+
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj,
+ RegScavenger *RS = NULL) const;
+
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const {
+ llvm_unreachable("PTX does not have a frame register");
+ }
+}; // struct PTXRegisterInfo
+} // namespace llvm
+
+#endif // PTX_REGISTER_INFO_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
new file mode 100644
index 0000000..e8b262e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXRegisterInfo.td
@@ -0,0 +1,36 @@
+//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+ let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+// The generated register info code throws warnings for empty register classes
+// (e.g. zero-length arrays), so we use a dummy register here just to prevent
+// these warnings.
+def DUMMY_REG : PTXReg<"R0">;
+
+//===----------------------------------------------------------------------===//
+// Register classes
+//===----------------------------------------------------------------------===//
+def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
new file mode 100644
index 0000000..a116fab
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp
@@ -0,0 +1,150 @@
+//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-selectiondag-info"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
+}
+
+PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
+}
+
+SDValue
+PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ // Always inline memcpys. In PTX, we do not have a C library that provides
+ // a memcpy function.
+ //if (!AlwaysInline)
+ // return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+ EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+ false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff),
+ isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, PointerType, Src,
+ DAG.getConstant(SrcOff, PointerType)),
+ SrcPtrInfo.getWithOffset(SrcOff), false, false,
+ false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, PointerType, Dst,
+ DAG.getConstant(DstOff, PointerType)),
+ DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+SDValue PTXSelectionDAGInfo::
+EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst,
+ SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ llvm_unreachable("memset lowering not implemented for PTX yet");
+}
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
new file mode 100644
index 0000000..e0c7167
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h
@@ -0,0 +1,53 @@
+//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PTX subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXSELECTIONDAGINFO_H
+#define PTXSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
+/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
+class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const PTXSubtarget *Subtarget;
+
+public:
+ explicit PTXSelectionDAGInfo(const TargetMachine &TM);
+ ~PTXSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Op1, SDValue Op2,
+ SDValue Op3, unsigned Align,
+ bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const;
+};
+
+}
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
new file mode 100644
index 0000000..454f64e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.cpp
@@ -0,0 +1,68 @@
+//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXSubtarget.h"
+#include "PTX.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PTXGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+void PTXSubtarget::anchor() { }
+
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit)
+ : PTXGenSubtargetInfo(TT, CPU, FS),
+ PTXTarget(PTX_COMPUTE_1_0),
+ PTXVersion(PTX_VERSION_2_0),
+ SupportsDouble(false),
+ SupportsFMA(true),
+ Is64Bit(is64Bit) {
+ std::string TARGET = CPU;
+ if (TARGET.empty())
+ TARGET = "generic";
+ ParseSubtargetFeatures(TARGET, FS);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+ switch(PTXTarget) {
+ default: llvm_unreachable("Unknown PTX target");
+ case PTX_SM_1_0: return "sm_10";
+ case PTX_SM_1_1: return "sm_11";
+ case PTX_SM_1_2: return "sm_12";
+ case PTX_SM_1_3: return "sm_13";
+ case PTX_SM_2_0: return "sm_20";
+ case PTX_SM_2_1: return "sm_21";
+ case PTX_SM_2_2: return "sm_22";
+ case PTX_SM_2_3: return "sm_23";
+ case PTX_COMPUTE_1_0: return "compute_10";
+ case PTX_COMPUTE_1_1: return "compute_11";
+ case PTX_COMPUTE_1_2: return "compute_12";
+ case PTX_COMPUTE_1_3: return "compute_13";
+ case PTX_COMPUTE_2_0: return "compute_20";
+ }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+ switch(PTXVersion) {
+ case PTX_VERSION_2_0: return "2.0";
+ case PTX_VERSION_2_1: return "2.1";
+ case PTX_VERSION_2_2: return "2.2";
+ case PTX_VERSION_2_3: return "2.3";
+ }
+ llvm_unreachable("Invalid PTX version");
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXSubtarget.h b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
new file mode 100644
index 0000000..ce93fef
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXSubtarget.h
@@ -0,0 +1,131 @@
+//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_SUBTARGET_H
+#define PTX_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PTXGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+ class PTXSubtarget : public PTXGenSubtargetInfo {
+ virtual void anchor();
+ public:
+
+ /**
+ * Enumeration of Shader Models supported by the back-end.
+ */
+ enum PTXTargetEnum {
+ PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
+ PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
+ PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
+ PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
+ PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
+ PTX_LAST_COMPUTE,
+
+ PTX_SM_1_0, /*< Shader Model 1.0 */
+ PTX_SM_1_1, /*< Shader Model 1.1 */
+ PTX_SM_1_2, /*< Shader Model 1.2 */
+ PTX_SM_1_3, /*< Shader Model 1.3 */
+ PTX_SM_2_0, /*< Shader Model 2.0 */
+ PTX_SM_2_1, /*< Shader Model 2.1 */
+ PTX_SM_2_2, /*< Shader Model 2.2 */
+ PTX_SM_2_3, /*< Shader Model 2.3 */
+ PTX_LAST_SM
+ };
+
+ /**
+ * Enumeration of PTX versions supported by the back-end.
+ *
+ * Currently, PTX 2.0 is the minimum supported version.
+ */
+ enum PTXVersionEnum {
+ PTX_VERSION_2_0, /*< PTX Version 2.0 */
+ PTX_VERSION_2_1, /*< PTX Version 2.1 */
+ PTX_VERSION_2_2, /*< PTX Version 2.2 */
+ PTX_VERSION_2_3 /*< PTX Version 2.3 */
+ };
+
+ private:
+
+ /// Shader Model supported on the target GPU.
+ PTXTargetEnum PTXTarget;
+
+ /// PTX Language Version.
+ PTXVersionEnum PTXVersion;
+
+ // The native .f64 type is supported on the hardware.
+ bool SupportsDouble;
+
+ // Support the fused-multiply add (FMA) and multiply-add (MAD)
+ // instructions
+ bool SupportsFMA;
+
+ // Use .u64 instead of .u32 for addresses.
+ bool Is64Bit;
+
+ public:
+
+ PTXSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool is64Bit);
+
+ // Target architecture accessors
+ std::string getTargetString() const;
+
+ std::string getPTXVersionString() const;
+
+ bool supportsDouble() const { return SupportsDouble; }
+
+ bool is64Bit() const { return Is64Bit; }
+
+ bool supportsFMA() const { return SupportsFMA; }
+
+ bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
+
+ bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
+
+ bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
+
+ bool fdivNeedsRoundingMode() const {
+ return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool fmadNeedsRoundingMode() const {
+ return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool useParamSpaceForDeviceArgs() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool callsAreHandled() const {
+ return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+ (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+ }
+
+ bool emitPtrAttribute() const {
+ return PTXVersion >= PTX_VERSION_2_2;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ }; // class PTXSubtarget
+} // namespace llvm
+
+#endif // PTX_SUBTARGET_H
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
new file mode 100644
index 0000000..97b8de1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.cpp
@@ -0,0 +1,165 @@
+//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXTargetMachine.h"
+#include "PTX.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
+
+
+using namespace llvm;
+
+namespace llvm {
+ MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc,
+ bool useCFI, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE,
+ MCAsmBackend *MAB,
+ bool ShowInst);
+}
+
+extern "C" void LLVMInitializePTXTarget() {
+
+ RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
+ RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
+
+ TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
+}
+
+namespace {
+ const char* DataLayout32 =
+ "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+ const char* DataLayout64 =
+ "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+}
+
+// DataLayout and FrameLowering are filled with dummy data
+PTXTargetMachine::PTXTargetMachine(const Target &T,
+ StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ DataLayout(is64Bit ? DataLayout64 : DataLayout32),
+ Subtarget(TT, CPU, FS, is64Bit),
+ FrameLowering(Subtarget),
+ InstrInfo(*this),
+ TSInfo(*this),
+ TLInfo(*this) {
+}
+
+void PTX32TargetMachine::anchor() { }
+
+PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+}
+
+void PTX64TargetMachine::anchor() { }
+
+PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+}
+
+namespace llvm {
+/// PTX Code Generator Pass Configuration Options.
+class PTXPassConfig : public TargetPassConfig {
+public:
+ PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ PTXTargetMachine &getPTXTargetMachine() const {
+ return getTM<PTXTargetMachine>();
+ }
+
+ bool addInstSelector();
+ FunctionPass *createTargetRegisterAllocator(bool);
+ void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+ bool addPostRegAlloc();
+ void addMachineLateOptimization();
+ bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) {
+ PTXPassConfig *PassConfig = new PTXPassConfig(this, PM);
+ PassConfig->disablePass(PrologEpilogCodeInserterID);
+ return PassConfig;
+}
+
+bool PTXPassConfig::addInstSelector() {
+ PM->add(createPTXISelDag(getPTXTargetMachine(), getOptLevel()));
+ return false;
+}
+
+FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) {
+ return createPTXRegisterAllocator();
+}
+
+// Modify the optimized compilation path to bypass optimized register alloction.
+void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+ addFastRegAlloc(RegAllocPass);
+}
+
+bool PTXPassConfig::addPostRegAlloc() {
+ // PTXMFInfoExtract must after register allocation!
+ //PM->add(createPTXMFInfoExtract(getPTXTargetMachine()));
+ return false;
+}
+
+/// Add passes that optimize machine instructions after register allocation.
+void PTXPassConfig::addMachineLateOptimization() {
+ if (addPass(BranchFolderPassID) != &NoPassID)
+ printAndVerify("After BranchFolding");
+
+ if (addPass(TailDuplicateID) != &NoPassID)
+ printAndVerify("After TailDuplicate");
+}
+
+bool PTXPassConfig::addPreEmitPass() {
+ PM->add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel()));
+ PM->add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel()));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
new file mode 100644
index 0000000..278d155
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/PTXTargetMachine.h
@@ -0,0 +1,104 @@
+//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_TARGET_MACHINE_H
+#define PTX_TARGET_MACHINE_H
+
+#include "PTXISelLowering.h"
+#include "PTXInstrInfo.h"
+#include "PTXFrameLowering.h"
+#include "PTXSelectionDAGInfo.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PTXTargetMachine : public LLVMTargetMachine {
+ private:
+ const TargetData DataLayout;
+ PTXSubtarget Subtarget; // has to be initialized before FrameLowering
+ PTXFrameLowering FrameLowering;
+ PTXInstrInfo InstrInfo;
+ PTXSelectionDAGInfo TSInfo;
+ PTXTargetLowering TLInfo;
+
+ public:
+ PTXTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL,
+ bool is64Bit);
+
+ virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+ virtual const TargetFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+
+ virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ virtual const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo(); }
+
+ virtual const PTXTargetLowering *getTargetLowering() const {
+ return &TLInfo; }
+
+ virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+ // Emission of machine code through JITCodeEmitter is not supported.
+ virtual bool addPassesToEmitMachineCode(PassManagerBase &,
+ JITCodeEmitter &,
+ bool = true) {
+ return true;
+ }
+
+ // Emission of machine code through MCJIT is not supported.
+ virtual bool addPassesToEmitMC(PassManagerBase &,
+ MCContext *&,
+ raw_ostream &,
+ bool = true) {
+ return true;
+ }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+}; // class PTXTargetMachine
+
+
+class PTX32TargetMachine : public PTXTargetMachine {
+ virtual void anchor();
+public:
+
+ PTX32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+}; // class PTX32TargetMachine
+
+class PTX64TargetMachine : public PTXTargetMachine {
+ virtual void anchor();
+public:
+
+ PTX64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+}; // class PTX32TargetMachine
+
+} // namespace llvm
+
+#endif // PTX_TARGET_MACHINE_H
diff --git a/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
new file mode 100644
index 0000000..09a2735
--- /dev/null
+++ b/contrib/llvm/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -0,0 +1,25 @@
+//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::ThePTX32Target;
+Target llvm::ThePTX64Target;
+
+extern "C" void LLVMInitializePTXTargetInfo() {
+ // see llvm/ADT/Triple.h
+ RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
+ "PTX (32-bit) [Experimental]");
+ RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
+ "PTX (64-bit) [Experimental]");
+}
OpenPOWER on IntegriCloud